diff --git a/Cargo.lock b/Cargo.lock index c55ea60d34..55f99d14b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12,15 +12,6 @@ dependencies = [ "regex", ] -[[package]] -name = "addr2line" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" -dependencies = [ - "gimli", -] - [[package]] name = "adler2" version = "2.0.1" @@ -46,7 +37,7 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", "const-random", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "serde", "version_check", @@ -89,6 +80,12 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -106,9 +103,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.20" +version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", "anstyle-parse", @@ -333,7 +330,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.11.4", + "indexmap 2.12.0", "lexical-core", "memchr", "num", @@ -438,6 +435,18 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71938f30533e4d95a6d17aa530939da3842c2ab6f4f84b9dae68447e4129f74a" +[[package]] +name = "async-channel" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "924ed96dd52d1b75e9c1a3e6275715fd320f5f9439fb5a4a11fa51f4221158d2" +dependencies = [ + "concurrent-queue", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + [[package]] name = "async-compression" version = "0.4.19" @@ -484,8 +493,8 @@ dependencies = [ "futures-timer", "futures-util", "handlebars", - "http", - "indexmap 2.11.4", + "http 1.3.1", + "indexmap 2.12.0", "mime", "multer", "num-traits", @@ -507,12 +516,12 @@ checksum = "fd45deb3dbe5da5cdb8d6a670a7736d735ba65b455328440f236dfb113727a3d" dependencies = [ "Inflector", "async-graphql-parser", - "darling", + "darling 0.20.11", "proc-macro-crate", "proc-macro2", "quote", - "strum", - "syn 2.0.106", + "strum 0.26.3", + "syn 2.0.108", "thiserror 1.0.69", ] @@ -536,7 +545,7 @@ checksum = "4dcb6b3a79ee6cecec0ffbef55add2be12ca362540b775b0cb6c66a47d61c3ae" dependencies = [ "async-graphql", "futures-util", - "http", + "http 1.3.1", "mime", "poem", "serde_json", @@ -552,7 +561,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34ecdaff7c9cffa3614a9f9999bf9ee4c3078fe3ce4d6a6e161736b56febf2de" dependencies = [ "bytes", - "indexmap 2.11.4", + "indexmap 2.12.0", "serde", "serde_json", ] @@ -594,6 +603,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "async-recursion" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "async-stream" version = "0.3.6" @@ -613,7 +633,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -624,7 +644,16 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", +] + +[[package]] +name = "async_cell" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "447ab28afbb345f5408b120702a44e5529ebf90b1796ec76e9528df8e288e6c2" +dependencies = [ + "loom", ] [[package]] @@ -649,763 +678,1365 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] -name = "axum" -version = "0.7.9" +name = "aws-config" +version = "1.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +checksum = "37cf2b6af2a95a20e266782b4f76f1a5e12bf412a9db2de9c1e9123b9d8c0ad8" dependencies = [ - "async-trait", - "axum-core", + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "itoa", - "matchit", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower 0.5.2", - "tower-layer", - "tower-service", + "fastrand", + "hex", + "http 1.3.1", + "ring", + "time", + "tokio", + "tracing", + "url", + "zeroize", ] [[package]] -name = "axum-core" -version = "0.4.5" +name = "aws-credential-types" +version = "1.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +checksum = "faf26925f4a5b59eb76722b63c2892b1d70d06fa053c72e4a100ec308c1d47bc" dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper", - "tower-layer", - "tower-service", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", ] [[package]] -name = "backoff" -version = "0.4.0" +name = "aws-lc-rs" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" +checksum = "879b6c89592deb404ba4dc0ae6b58ffd1795c78991cbb5b8bc441c48a070440d" dependencies = [ - "futures-core", - "getrandom 0.2.16", - "instant", - "pin-project-lite", - "rand 0.8.5", - "tokio", + "aws-lc-sys", + "zeroize", ] [[package]] -name = "backtrace" -version = "0.3.76" +name = "aws-lc-sys" +version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" +checksum = "107a4e9d9cab9963e04e84bb8dee0e25f2a987f9a8bad5ed054abd439caa8f8c" dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-link", + "bindgen", + "cc", + "cmake", + "dunce", + "fs_extra", ] [[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - -[[package]] -name = "base64-compat" -version = "1.0.0" +name = "aws-runtime" +version = "1.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a8d4d2746f89841e49230dd26917df1876050f95abafafbe34f47cb534b88d7" +checksum = "bfa006bb32360ed90ac51203feafb9d02e3d21046e1fd3a450a404b90ea73e5d" dependencies = [ - "byteorder", + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http-body 0.4.6", + "percent-encoding", + "pin-project-lite", + "tracing", + "uuid", ] [[package]] -name = "bigdecimal" -version = "0.4.8" +name = "aws-sdk-dynamodb" +version = "1.96.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +checksum = "a2e1f3871da847c7ff682ddd2c2b802fa10a562db34eaf16dd863fc2d3f72ad6" dependencies = [ - "autocfg", - "libm", - "num-bigint", - "num-integer", - "num-traits", - "serde", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", ] [[package]] -name = "bincode" -version = "1.3.3" +name = "aws-sdk-sso" +version = "1.86.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +checksum = "4a0abbfab841446cce6e87af853a3ba2cc1bc9afcd3f3550dd556c43d434c86d" dependencies = [ - "serde", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", ] [[package]] -name = "bit-set" -version = "0.8.0" +name = "aws-sdk-ssooidc" +version = "1.88.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +checksum = "9a68d675582afea0e94d38b6ca9c5aaae4ca14f1d36faa6edb19b42e687e70d7" dependencies = [ - "bit-vec", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", ] [[package]] -name = "bit-vec" -version = "0.8.0" +name = "aws-sdk-sts" +version = "1.88.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" +checksum = "d30990923f4f675523c51eb1c0dec9b752fb267b36a61e83cbc219c9d86da715" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "fastrand", + "http 0.2.12", + "regex-lite", + "tracing", +] [[package]] -name = "bitflags" -version = "2.9.4" +name = "aws-sigv4" +version = "1.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" +checksum = "bffc03068fbb9c8dd5ce1c6fb240678a5cffb86fb2b7b1985c999c4b83c8df68" dependencies = [ - "serde", + "aws-credential-types", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.3.1", + "percent-encoding", + "sha2", + "time", + "tracing", ] [[package]] -name = "bitpacking" -version = "0.9.2" +name = "aws-smithy-async" +version = "1.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c1d3e2bfd8d06048a179f7b17afc3188effa10385e7b00dc65af6aae732ea92" +checksum = "127fcfad33b7dfc531141fda7e1c402ac65f88aca5511a4d31e2e3d2cd01ce9c" dependencies = [ - "crunchy", + "futures-util", + "pin-project-lite", + "tokio", ] [[package]] -name = "blake2" -version = "0.10.6" +name = "aws-smithy-http" +version = "0.62.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +checksum = "3feafd437c763db26aa04e0cc7591185d0961e64c61885bece0fb9d50ceac671" dependencies = [ - "digest", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", ] [[package]] -name = "blake3" -version = "1.8.2" +name = "aws-smithy-http-client" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", +checksum = "1053b5e587e6fa40ce5a79ea27957b04ba660baa02b28b7436f64850152234f1" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "h2 0.3.27", + "h2 0.4.12", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.7.0", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.7", + "hyper-util", + "pin-project-lite", + "rustls 0.21.12", + "rustls 0.23.34", + "rustls-native-certs 0.8.2", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower 0.5.2", + "tracing", ] [[package]] -name = "block-buffer" -version = "0.10.4" +name = "aws-smithy-json" +version = "0.61.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +checksum = "cff418fc8ec5cadf8173b10125f05c2e7e1d46771406187b2c878557d4503390" dependencies = [ - "generic-array", + "aws-smithy-types", ] [[package]] -name = "brotli" -version = "7.0.0" +name = "aws-smithy-observability" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" +checksum = "2d1881b1ea6d313f9890710d65c158bdab6fb08c91ea825f74c1c8c357baf4cc" dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor 4.0.3", + "aws-smithy-runtime-api", ] [[package]] -name = "brotli" -version = "8.0.2" +name = "aws-smithy-query" +version = "0.60.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" +checksum = "d28a63441360c477465f80c7abac3b9c4d075ca638f982e605b7dc2a2c7156c9" dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor 5.0.0", + "aws-smithy-types", + "urlencoding", ] [[package]] -name = "brotli-decompressor" -version = "4.0.3" +name = "aws-smithy-runtime" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd" +checksum = "40ab99739082da5347660c556689256438defae3bcefd66c52b095905730e404" dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-http-client", + "aws-smithy-observability", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "http-body 1.0.1", + "pin-project-lite", + "pin-utils", + "tokio", + "tracing", ] [[package]] -name = "brotli-decompressor" -version = "5.0.0" +name = "aws-smithy-runtime-api" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +checksum = "3683c5b152d2ad753607179ed71988e8cfd52964443b4f74fd8e552d0bbfeb46" dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.3.1", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", ] [[package]] -name = "bumpalo" -version = "3.19.0" +name = "aws-smithy-types" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "9f5b3a7486f6690ba25952cabf1e7d75e34d69eaff5081904a47bc79074d6457" +dependencies = [ + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.3.1", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", + "tokio", + "tokio-util", +] [[package]] -name = "bytemuck" -version = "1.23.2" +name = "aws-smithy-xml" +version = "0.60.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3995eaeebcdf32f91f980d360f78732ddc061097ab4e39991ae7a6ace9194677" +checksum = "e9c34127e8c624bc2999f3b657e749c1393bedc9cd97b92a804db8ced4d2e163" dependencies = [ - "bytemuck_derive", + "xmlparser", ] [[package]] -name = "bytemuck_derive" -version = "1.10.1" +name = "aws-types" +version = "1.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f154e572231cb6ba2bd1176980827e3d5dc04cc183a75dea38109fbdd672d29" +checksum = "e2fd329bf0e901ff3f60425691410c69094dc2a1f34b331f37bfc4e9ac1565a1" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version", + "tracing", ] [[package]] -name = "byteorder" -version = "1.5.0" +name = "axum" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "bytes" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" dependencies = [ + "async-trait", + "axum-core 0.4.5", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "itoa", + "matchit 0.7.3", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", "serde", + "sync_wrapper", + "tower 0.5.2", + "tower-layer", + "tower-service", ] [[package]] -name = "bzip2" -version = "0.4.4" +name = "axum" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +checksum = "8a18ed336352031311f4e0b4dd2ff392d4fbb370777c9d18d7fc9d7359f73871" dependencies = [ - "bzip2-sys", - "libc", + "axum-core 0.5.5", + "bytes", + "form_urlencoded", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "hyper 1.7.0", + "hyper-util", + "itoa", + "matchit 0.8.4", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower 0.5.2", + "tower-layer", + "tower-service", + "tracing", ] [[package]] -name = "bzip2" -version = "0.5.2" +name = "axum-core" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" dependencies = [ - "bzip2-sys", + "async-trait", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", ] [[package]] -name = "bzip2" -version = "0.6.0" +name = "axum-core" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff" +checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22" dependencies = [ - "libbz2-rs-sys", + "bytes", + "futures-core", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", ] [[package]] -name = "bzip2-sys" -version = "0.1.13+1.0.8" +name = "backoff" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" +checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" dependencies = [ - "cc", - "pkg-config", + "futures-core", + "getrandom 0.2.16", + "instant", + "pin-project-lite", + "rand 0.8.5", + "tokio", ] [[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - -[[package]] -name = "cc" -version = "1.2.39" +name = "backon" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1354349954c6fc9cb0deab020f27f783cf0b604e8bb754dc4658ecf0d29c35f" +checksum = "cffb0e931875b666fc4fcb20fee52e9bbd1ef836fd9e9e04ec21555f9f85f7ef" dependencies = [ - "find-msvc-tools", - "jobserver", - "libc", - "shlex", + "fastrand", + "gloo-timers", + "tokio", ] [[package]] -name = "census" -version = "0.4.2" +name = "base64" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] -name = "cfg-if" -version = "1.0.3" +name = "base64" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] -name = "cfg_aliases" -version = "0.2.1" +name = "base64-compat" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +checksum = "5a8d4d2746f89841e49230dd26917df1876050f95abafafbe34f47cb534b88d7" +dependencies = [ + "byteorder", +] [[package]] -name = "chrono" -version = "0.4.42" +name = "base64-simd" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" dependencies = [ - "iana-time-zone", - "js-sys", - "num-traits", - "serde", - "wasm-bindgen", - "windows-link", + "outref", + "vsimd", ] [[package]] -name = "chrono-tz" -version = "0.8.6" +name = "base64ct" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" +checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" + +[[package]] +name = "bigdecimal" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934" dependencies = [ - "chrono", - "chrono-tz-build", - "phf 0.11.3", + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", + "serde", ] [[package]] -name = "chrono-tz" -version = "0.10.4" +name = "bincode" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ - "chrono", - "phf 0.12.1", + "serde", ] [[package]] -name = "chrono-tz-build" -version = "0.2.1" +name = "bindgen" +version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "parse-zoneinfo", - "phf 0.11.3", - "phf_codegen", + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 2.1.1", + "shlex", + "syn 2.0.108", ] [[package]] -name = "ciborium" -version = "0.2.2" +name = "bit-set" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", + "bit-vec", ] [[package]] -name = "ciborium-io" -version = "0.2.2" +name = "bit-vec" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" [[package]] -name = "ciborium-ll" -version = "0.2.2" +name = "bitflags" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" dependencies = [ - "ciborium-io", - "half", + "serde_core", ] [[package]] -name = "cipher" -version = "0.4.4" +name = "bitpacking" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +checksum = "4c1d3e2bfd8d06048a179f7b17afc3188effa10385e7b00dc65af6aae732ea92" dependencies = [ - "crypto-common", - "inout", + "crunchy", ] [[package]] -name = "clap" -version = "4.5.48" +name = "bitvec" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" dependencies = [ - "clap_builder", - "clap_derive", + "funty", + "radium", + "tap", + "wyz", ] [[package]] -name = "clap_builder" -version = "4.5.48" +name = "blake2" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" dependencies = [ - "anstream", - "anstyle", - "clap_lex", - "strsim", + "digest", ] [[package]] -name = "clap_derive" -version = "4.5.47" +name = "blake3" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c" +checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" dependencies = [ - "heck 0.5.0", - "proc-macro2", - "quote", - "syn 2.0.106", + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", ] [[package]] -name = "clap_lex" -version = "0.7.5" +name = "block-buffer" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] [[package]] -name = "colorchoice" -version = "1.0.4" +name = "block-padding" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +checksum = "a8894febbff9f758034a5b8e12d87918f56dfc64a8e1fe757d65e29041538d93" +dependencies = [ + "generic-array", +] [[package]] -name = "comfy-table" -version = "7.1.2" +name = "bon" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1" dependencies = [ - "strum", - "strum_macros", - "unicode-width", + "bon-macros", + "rustversion", ] [[package]] -name = "concurrent-queue" -version = "2.5.0" +name = "bon-macros" +version = "3.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645" dependencies = [ - "crossbeam-utils", + "darling 0.21.3", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.108", ] [[package]] -name = "config" -version = "0.14.1" +name = "brotli" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68578f196d2a33ff61b27fae256c3164f65e36382648e30666dde05b8cc9dfdf" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" dependencies = [ - "async-trait", - "convert_case", - "json5", - "nom", - "pathdiff", - "ron", - "rust-ini", - "serde", - "serde_json", - "toml", - "yaml-rust2", + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor 4.0.3", ] [[package]] -name = "const-oid" -version = "0.9.6" +name = "brotli" +version = "8.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor 5.0.0", +] [[package]] -name = "const-random" -version = "0.1.18" +name = "brotli-decompressor" +version = "4.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd" dependencies = [ - "const-random-macro", + "alloc-no-stdlib", + "alloc-stdlib", ] [[package]] -name = "const-random-macro" -version = "0.1.16" +name = "brotli-decompressor" +version = "5.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" dependencies = [ - "getrandom 0.2.16", - "once_cell", - "tiny-keccak", + "alloc-no-stdlib", + "alloc-stdlib", ] [[package]] -name = "constant_time_eq" -version = "0.3.1" +name = "bumpalo" +version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] -name = "convert_case" -version = "0.6.0" +name = "bytemuck" +version = "1.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422" dependencies = [ - "unicode-segmentation", + "bytemuck_derive", ] [[package]] -name = "core-foundation" -version = "0.9.4" +name = "bytemuck_derive" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +checksum = "3fa76293b4f7bb636ab88fd78228235b5248b4d05cc589aed610f954af5d7c7a" dependencies = [ - "core-foundation-sys", - "libc", + "proc-macro2", + "quote", + "syn 2.0.108", ] [[package]] -name = "core-foundation" -version = "0.10.1" +name = "byteorder" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" -dependencies = [ - "core-foundation-sys", - "libc", -] +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] -name = "core-foundation-sys" -version = "0.8.7" +name = "bytes" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +dependencies = [ + "serde", +] [[package]] -name = "cpufeatures" -version = "0.2.17" +name = "bytes-utils" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" dependencies = [ - "libc", + "bytes", + "either", ] [[package]] -name = "crc" -version = "3.3.0" +name = "bzip2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" dependencies = [ - "crc-catalog", + "bzip2-sys", + "libc", ] [[package]] -name = "crc-catalog" -version = "2.4.0" +name = "bzip2" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" +dependencies = [ + "bzip2-sys", +] [[package]] -name = "crc32fast" -version = "1.5.0" +name = "bzip2" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" dependencies = [ - "cfg-if", + "libbz2-rs-sys", ] [[package]] -name = "criterion" -version = "0.5.1" +name = "bzip2-sys" +version = "0.1.13+1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" dependencies = [ - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "is-terminal", - "itertools 0.10.5", - "num-traits", - "once_cell", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", + "cc", + "pkg-config", ] [[package]] -name = "criterion-plot" -version = "0.5.0" +name = "cast" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" -dependencies = [ - "cast", - "itertools 0.10.5", -] +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] -name = "crossbeam-channel" -version = "0.5.15" +name = "cbc" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +checksum = "26b52a9543ae338f279b96b0b9fed9c8093744685043739079ce85cd58f289a6" dependencies = [ - "crossbeam-utils", + "cipher", ] [[package]] -name = "crossbeam-deque" -version = "0.8.6" +name = "cc" +version = "1.2.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +checksum = "81bbf3b3619004ad9bd139f62a9ab5cfe467f307455a0d307b0cf58bf070feaa" dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", + "find-msvc-tools", + "jobserver", + "libc", + "shlex", ] [[package]] -name = "crossbeam-epoch" -version = "0.9.18" +name = "census" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] +checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" [[package]] -name = "crossbeam-queue" -version = "0.3.12" +name = "cexpr" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" dependencies = [ - "crossbeam-utils", + "nom 7.1.3", ] [[package]] -name = "crossbeam-utils" -version = "0.8.21" +name = "cfg-if" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] -name = "crunchy" -version = "0.2.4" +name = "cfg_aliases" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] -name = "crypto-common" -version = "0.1.6" +name = "chrono" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" dependencies = [ - "generic-array", - "typenum", + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link 0.1.3", ] [[package]] -name = "csv" -version = "1.3.1" +name = "chrono-tz" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", + "chrono", + "chrono-tz-build", + "phf 0.11.3", ] [[package]] -name = "csv-core" -version = "0.1.12" +name = "chrono-tz" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" dependencies = [ - "memchr", + "chrono", + "phf 0.12.1", ] [[package]] -name = "darling" -version = "0.20.11" +name = "chrono-tz-build" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" dependencies = [ - "darling_core", - "darling_macro", + "parse-zoneinfo", + "phf 0.11.3", + "phf_codegen", ] [[package]] -name = "darling_core" -version = "0.20.11" +name = "ciborium" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", - "syn 2.0.106", + "ciborium-io", + "ciborium-ll", + "serde", ] [[package]] -name = "darling_macro" -version = "0.20.11" +name = "ciborium-io" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" -dependencies = [ - "darling_core", +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + +[[package]] +name = "clang-sys" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "4.5.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.108", +] + +[[package]] +name = "clap_lex" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" + +[[package]] +name = "cmake" +version = "0.1.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" +dependencies = [ + "cc", +] + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "comfy-table" +version = "7.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +dependencies = [ + "strum 0.26.3", + "strum_macros 0.26.4", + "unicode-width", +] + +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "config" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68578f196d2a33ff61b27fae256c3164f65e36382648e30666dde05b8cc9dfdf" +dependencies = [ + "async-trait", + "convert_case", + "json5", + "nom 7.1.3", + "pathdiff", + "ron", + "rust-ini 0.20.0", + "serde", + "serde_json", + "toml", + "yaml-rust2", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.16", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + +[[package]] +name = "crc32c" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + +[[package]] +name = "darling" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +dependencies = [ + "darling_core 0.21.3", + "darling_macro 0.21.3", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.108", +] + +[[package]] +name = "darling_core" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.108", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core 0.20.11", + "quote", + "syn 2.0.108", +] + +[[package]] +name = "darling_macro" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +dependencies = [ + "darling_core 0.21.3", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -1432,16 +2063,16 @@ checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" [[package]] name = "datafusion" -version = "50.0.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "481d0c1cad7606cee11233abcdff8eec46e43dd25abda007db6d5d26ae8483c4" +checksum = "fc6759cf9ef57c5c469e4027ac4b4cfa746e06a0f5472c2b922b6a403c2a64c4" dependencies = [ "arrow", "arrow-ipc", "arrow-schema", "async-trait", "bytes", - "bzip2 0.6.0", + "bzip2 0.6.1", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -1487,9 +2118,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "50.0.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d70327e81ab3a1f5832d8b372d55fa607851d7cea6d1f8e65ff0c98fcc32d222" +checksum = "8a1c48fc7e6d62590d45f7be7c531980b8ff091d1ab113a9ddf465bef41e4093" dependencies = [ "arrow", "async-trait", @@ -1513,9 +2144,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "50.0.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "268819e6bb20ba70a664abddc20deac604f30d3267f8c91847064542a8c0720c" +checksum = "3db1266da115de3ab0b2669fc027d96cf0ff777deb3216d52c74b528446ccdd6" dependencies = [ "arrow", "async-trait", @@ -1536,9 +2167,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "054873d5563f115f83ef4270b560ac2ce4de713905e825a40cac49d6ff348254" +checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" dependencies = [ "ahash", "arrow", @@ -1547,7 +2178,7 @@ dependencies = [ "chrono", "half", "hashbrown 0.14.5", - "indexmap 2.11.4", + "indexmap 2.12.0", "libc", "log", "object_store", @@ -1561,9 +2192,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a1d1bc69aaaadb8008b65329ed890b33e845dc063225c190f77b20328fbe1d" +checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" dependencies = [ "futures", "log", @@ -1572,15 +2203,15 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "50.0.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d855160469020982880fd9bd0962e033d2f4728f56f85a83d8c90785638b6519" +checksum = "904c2e1089b3ccf10786f2dae12bc560fda278e4194a8917c5844d2e8c212818" dependencies = [ "arrow", "async-compression", "async-trait", "bytes", - "bzip2 0.6.0", + "bzip2 0.6.1", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -1609,9 +2240,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "50.0.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ec3aa7575378d23aae96b955b5233bea6f9d461648174f6ccc8f3c160f2b7a7" +checksum = "8336a805c42ef4e359daaad142ddc53649f23c7e934c117d8516816afe6b7a3d" dependencies = [ "arrow", "async-trait", @@ -1634,9 +2265,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "50.0.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00cfb8f33e2864eeb3188b6818acf5546d56a5a487d423cce9b684a554caabfa" +checksum = "c691b1565e245ea369bc8418b472a75ea84c2ad2deb61b1521cfa38319a9cd47" dependencies = [ "arrow", "async-trait", @@ -1659,9 +2290,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "50.0.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3bfb48fb4ff42ac1485a12ea56434eaab53f7da8f00b2443b1a3d35a0b6d10" +checksum = "f9f7576ceb5974c5f6874d7f2a5ebfeb58960a920da64017def849e0352fe2d8" dependencies = [ "arrow", "async-trait", @@ -1692,15 +2323,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fbf41013cf55c2369b5229594898e8108c8a1beeb49d97feb5e0cce9933eb8f" +checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" [[package]] name = "datafusion-execution" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fd0c1ffe3885687758f985ed548184bf63b17b2a7a5ae695de422ad6432118" +checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" dependencies = [ "arrow", "async-trait", @@ -1718,9 +2349,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c4fe6411218a9dab656437b1e69b00a470a7a2d7db087867a366c145eb164a7" +checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" dependencies = [ "arrow", "async-trait", @@ -1731,7 +2362,7 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", - "indexmap 2.11.4", + "indexmap 2.12.0", "paste", "recursive", "serde_json", @@ -1740,22 +2371,22 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a45bee7d2606bfb41ceb1d904ba7cecf69bd5a6f8f3e6c57c3f5a83d84bdd97" +checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" dependencies = [ "arrow", "datafusion-common", - "indexmap 2.11.4", + "indexmap 2.12.0", "itertools 0.14.0", "paste", ] [[package]] name = "datafusion-functions" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c7e1c532ff9d14f291160bca23e55ffd4899800301dd2389786c2f02d76904a" +checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" dependencies = [ "arrow", "arrow-buffer", @@ -1782,9 +2413,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05d47426645aef1e73b1a034c75ab2401bc504175feb191accbe211ec24a342" +checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" dependencies = [ "ahash", "arrow", @@ -1803,9 +2434,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05c99f648b2b1743de0c1c19eef07e8cc5a085237f172b2e20bf6934e0a804e4" +checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" dependencies = [ "ahash", "arrow", @@ -1816,9 +2447,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4227782023f4fb68d3d5c5eb190665212f43c9a0b437553e4b938b379aff6cf6" +checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" dependencies = [ "arrow", "arrow-ord", @@ -1838,9 +2469,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "50.0.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d902b1769f69058236e89f04f3bff2cf62f24311adb7bf3c6c3e945c9451076" +checksum = "2fa4a380ca362eb0fbd33093e8ca6b7a31057616c7e6ee999b87a4ad3c7c0b3f" dependencies = [ "arrow", "async-trait", @@ -1854,9 +2485,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b8ee43974c92eb9920fe8e97e0fab48675e93b062abcb48bef4c1d4305b6ee4" +checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" dependencies = [ "arrow", "datafusion-common", @@ -1872,9 +2503,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1e149d36cdd44fb425dc815c5fac55025aa9a592dd65cb3c421881096292c02" +checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1882,20 +2513,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07c9faa0cdefb6e6e756482b846397b5c2d84d369e30b009472b9ab9b1430fbd" +checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" dependencies = [ "datafusion-expr", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] name = "datafusion-optimizer" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f16a4f7059302ad1de6e97ab0eebb5c34405917b1f80806a30a66e38ad118251" +checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" dependencies = [ "arrow", "chrono", @@ -1903,7 +2534,7 @@ dependencies = [ "datafusion-expr", "datafusion-expr-common", "datafusion-physical-expr", - "indexmap 2.11.4", + "indexmap 2.12.0", "itertools 0.14.0", "log", "recursive", @@ -1913,9 +2544,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10bb87a605d8ce9672d5347c0293c12211b0c03923fc12fbdc665fe76e6f9e01" +checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" dependencies = [ "ahash", "arrow", @@ -1926,19 +2557,19 @@ dependencies = [ "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "indexmap 2.11.4", + "indexmap 2.12.0", "itertools 0.14.0", "log", "parking_lot", "paste", - "petgraph 0.8.2", + "petgraph 0.8.3", ] [[package]] name = "datafusion-physical-expr-adapter" -version = "50.0.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da3a7429a555dd5ff0bec4d24bd5532ec43876764088da635cad55b2f178dc2" +checksum = "086877d4eca538e9cd1f28b917db0036efe0ad8b4fb7c702f520510672032c8d" dependencies = [ "arrow", "datafusion-common", @@ -1951,9 +2582,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "845eb44ef1e04d2a15c6d955cb146b40a41814a7be4377f0a541857d3e257d6f" +checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" dependencies = [ "ahash", "arrow", @@ -1965,9 +2596,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "50.0.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b9b648ee2785722c79eae366528e52e93ece6808aef9297cf8e5521de381da" +checksum = "ab9fb8b3fba2634d444e0177862797dc1231e0e20bc4db291a15d39c0d4136c3" dependencies = [ "arrow", "datafusion-common", @@ -1985,9 +2616,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6688d17b78104e169d7069749832c20ff50f112be853d2c058afe46c889064" +checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" dependencies = [ "ahash", "arrow", @@ -2006,7 +2637,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap 2.11.4", + "indexmap 2.12.0", "itertools 0.14.0", "log", "parking_lot", @@ -2016,9 +2647,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "50.0.0" +version = "50.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a893a46c56f5f190085e13949eb8ec163672c7ec2ac33bdb82c84572e71ca73" +checksum = "1f84b866d906118c320459f30385048aeedbe36ac06973d3e4fa0cc5d60d722c" dependencies = [ "arrow", "arrow-schema", @@ -2034,9 +2665,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8b62684c7a1db6121a8c83100209cffa1e664a8d9ced87e1a32f8cdc2fff3c2" +checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" dependencies = [ "arrow", "async-trait", @@ -2058,15 +2689,15 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "50.0.0" +version = "50.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f09cff94b8242843e1da5d069e9d2cfc53807f1f00b1c0da78c297f47c21456e" +checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" dependencies = [ "arrow", "bigdecimal", "datafusion-common", "datafusion-expr", - "indexmap 2.11.4", + "indexmap 2.12.0", "log", "recursive", "regex", @@ -2092,11 +2723,31 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" +[[package]] +name = "deepsize" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cdb987ec36f6bf7bfbea3f928b75590b736fc42af8e54d97592481351b2b96c" +dependencies = [ + "deepsize_derive", +] + +[[package]] +name = "deepsize_derive" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990101d41f3bc8c1a45641024377ee284ecc338e5ecf3ea0f0e236d897c72796" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "deflate64" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da692b8d1080ea3045efaab14434d40468c3d8657e42abddfffca87b428f4c1b" +checksum = "26bf8fc351c5ed29b5c2f0cbbac1b209b74f60ecd62e675a998df72c49af5204" [[package]] name = "delegate" @@ -2116,6 +2767,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" dependencies = [ "const-oid", + "pem-rfc7468", + "zeroize", ] [[package]] @@ -2136,7 +2789,7 @@ checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -2154,10 +2807,10 @@ version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" dependencies = [ - "darling", + "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -2167,7 +2820,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -2178,7 +2831,7 @@ checksum = "ccfae181bab5ab6c5478b2ccb69e4c68a02f8c3ec72f6616bfec9dbc599d2ee0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -2200,6 +2853,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", + "const-oid", "crypto-common", "subtle", ] @@ -2222,7 +2876,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -2239,7 +2893,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -2263,6 +2917,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" +[[package]] +name = "downcast-rs" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" + [[package]] name = "doxygen-rs" version = "0.4.2" @@ -2272,6 +2932,18 @@ dependencies = [ "phf 0.11.3", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + [[package]] name = "dynamic-graphql" version = "0.10.1" @@ -2290,11 +2962,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6027c3698e530bf88b37a618a05fd7a5e761dc2777771d5757ff07103f66189" dependencies = [ "Inflector", - "darling", + "darling 0.20.11", "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", "thiserror 2.0.17", ] @@ -2330,7 +3002,7 @@ checksum = "685adfa4d6f3d765a26bc5dbc936577de9abf756c1feeb3089b01dd395034842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -2356,9 +3028,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] +[[package]] +name = "ethnum" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca81e6b4777c89fd810c25a4be2b1bd93ea034fbe58e6a75216a34c6b82c539b" + [[package]] name = "event-listener" version = "5.4.1" @@ -2387,7 +3065,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab" dependencies = [ "futures-core", - "nom", + "nom 7.1.3", "pin-project-lite", ] @@ -2402,6 +3080,12 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "fast-float2" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" + [[package]] name = "fast_chemail" version = "0.9.6" @@ -2411,6 +3095,18 @@ dependencies = [ "ascii_utils", ] +[[package]] +name = "fastbloom" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18c1ddb9231d8554c2d6bdf4cfaabf0c59251658c68b6c95cd52dd0c513a912a" +dependencies = [ + "getrandom 0.3.4", + "libm", + "rand 0.9.2", + "siphasher", +] + [[package]] name = "fastdivide" version = "0.4.2" @@ -2425,9 +3121,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.2" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" [[package]] name = "fixedbitset" @@ -2447,9 +3143,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.2" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" dependencies = [ "crc32fast", "libz-rs-sys", @@ -2487,6 +3183,37 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "fsst" +version = "0.38.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "480fc4f47567da549ab44bb2f37f6db1570c9eff7200e50334b69fa1daa74339" +dependencies = [ + "arrow-array", + "rand 0.9.2", +] + +[[package]] +name = "fst" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" +dependencies = [ + "utf8-ranges", +] + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures" version = "0.3.31" @@ -2543,7 +3270,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -2582,11 +3309,25 @@ dependencies = [ "slab", ] +[[package]] +name = "generator" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2" +dependencies = [ + "cc", + "cfg-if", + "libc", + "log", + "rustversion", + "windows", +] + [[package]] name = "generic-array" -version = "0.14.7" +version = "0.14.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" dependencies = [ "typenum", "version_check", @@ -2601,30 +3342,24 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "js-sys", "libc", "r-efi", - "wasi 0.14.7+wasi-0.2.4", + "wasip2", "wasm-bindgen", ] -[[package]] -name = "gimli" -version = "0.32.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" - [[package]] name = "glam" version = "0.29.3" @@ -2637,6 +3372,37 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "gloo-timers" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap 2.12.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.12" @@ -2648,8 +3414,8 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", - "indexmap 2.11.4", + "http 1.3.1", + "indexmap 2.12.0", "slab", "tokio", "tokio-util", @@ -2658,13 +3424,14 @@ dependencies = [ [[package]] name = "half" -version = "2.6.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "cfg-if", "crunchy", "num-traits", + "zerocopy", ] [[package]] @@ -2732,7 +3499,7 @@ dependencies = [ "base64 0.22.1", "bytes", "headers-core", - "http", + "http 1.3.1", "httpdate", "mime", "sha1", @@ -2744,7 +3511,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4" dependencies = [ - "http", + "http 1.3.1", ] [[package]] @@ -2818,12 +3585,32 @@ dependencies = [ "digest", ] +[[package]] +name = "home" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "htmlescape" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.3.1" @@ -2835,6 +3622,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -2842,7 +3640,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.3.1", ] [[package]] @@ -2853,8 +3651,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "pin-project-lite", ] @@ -2876,6 +3674,30 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.7.0" @@ -2886,9 +3708,9 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2", - "http", - "http-body", + "h2 0.4.12", + "http 1.3.1", + "http-body 1.0.1", "httparse", "httpdate", "itoa", @@ -2899,22 +3721,38 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "rustls-native-certs 0.6.3", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http", - "hyper", + "http 1.3.1", + "hyper 1.7.0", "hyper-util", - "rustls", - "rustls-native-certs 0.8.1", + "rustls 0.23.34", + "rustls-native-certs 0.8.2", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", - "webpki-roots 1.0.2", + "webpki-roots 1.0.3", ] [[package]] @@ -2923,7 +3761,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper", + "hyper 1.7.0", "hyper-util", "pin-project-lite", "tokio", @@ -2941,19 +3779,28 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "http", - "http-body", - "hyper", + "http 1.3.1", + "http-body 1.0.1", + "hyper 1.7.0", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.0", + "socket2 0.6.1", "tokio", "tower-service", "tracing", ] +[[package]] +name = "hyperloglogplus" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3" +dependencies = [ + "serde", +] + [[package]] name = "iana-time-zone" version = "0.1.64" @@ -2966,7 +3813,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core", + "windows-core 0.62.2", ] [[package]] @@ -3099,13 +3946,14 @@ checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", + "serde", ] [[package]] name = "indexmap" -version = "2.11.4" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" dependencies = [ "equivalent", "hashbrown 0.16.0", @@ -3116,9 +3964,12 @@ dependencies = [ [[package]] name = "indoc" -version = "2.0.6" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] [[package]] name = "inout" @@ -3126,6 +3977,7 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" dependencies = [ + "block-padding", "generic-array", ] @@ -3156,17 +4008,6 @@ dependencies = [ "rustversion", ] -[[package]] -name = "io-uring" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" -dependencies = [ - "bitflags", - "cfg-if", - "libc", -] - [[package]] name = "ipnet" version = "2.11.0" @@ -3185,136 +4026,717 @@ dependencies = [ [[package]] name = "is-terminal" -version = "0.4.16" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "is_terminal_polyfill" -version = "1.70.1" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "iter-enum" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c52f2d5e063459674b4735f21870dd911e0d96dbfebb984650068195c2df838" +dependencies = [ + "derive_utils", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "jiff" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" +dependencies = [ + "jiff-static", + "jiff-tzdb-platform", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", + "windows-sys 0.59.0", +] + +[[package]] +name = "jiff-static" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + +[[package]] +name = "jiff-tzdb" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1283705eb0a21404d2bfd6eef2a7593d240bc42a0bdb39db0ad6fa2ec026524" + +[[package]] +name = "jiff-tzdb-platform" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" +dependencies = [ + "jiff-tzdb", +] + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "json5" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1" +dependencies = [ + "pest", + "pest_derive", + "serde", +] + +[[package]] +name = "jsonb" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a452366d21e8d3cbca680c41388e01d6a88739afef7877961946a6da409f9ccd" +dependencies = [ + "byteorder", + "ethnum", + "fast-float2", + "itoa", + "jiff", + "nom 8.0.0", + "num-traits", + "ordered-float 5.1.0", + "rand 0.9.2", + "ryu", + "serde", + "serde_json", +] + +[[package]] +name = "jsonwebtoken" +version = "9.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" +dependencies = [ + "base64 0.22.1", + "js-sys", + "pem", + "ring", + "serde", + "serde_json", + "simple_asn1", +] + +[[package]] +name = "kdam" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5740f66a8d86a086ebcacfb937070e8be6eb2f8fb45e4ae7fa428ca2a98a7b1f" +dependencies = [ + "pyo3", + "terminal_size", + "windows-sys 0.59.0", +] + +[[package]] +name = "lance" +version = "0.38.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e2d2472f58d01894bc5f0a9f9d28dfca4649c9e28faf467c47e87f788ef322b" +dependencies = [ + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-ipc", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "async_cell", + "aws-credential-types", + "aws-sdk-dynamodb", + "byteorder", + "bytes", + "chrono", + "dashmap", + "datafusion", + "datafusion-expr", + "datafusion-functions", + "datafusion-physical-expr", + "datafusion-physical-plan", + "deepsize", + "either", + "futures", + "half", + "humantime", + "itertools 0.13.0", + "lance-arrow", + "lance-core", + "lance-datafusion", + "lance-encoding", + "lance-file", + "lance-index", + "lance-io", + "lance-linalg", + "lance-table", + "log", + "moka", + "object_store", + "permutation", + "pin-project", + "prost", + "prost-types", + "rand 0.9.2", + "roaring", + "serde", + "serde_json", + "snafu", + "tantivy 0.24.2", + "tokio", + "tokio-stream", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "lance-arrow" +version = "0.38.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2abba8770c4217fbdc8b517cdfb7183639b02dc5c2bcad1e7c69ffdcf4fbe1a" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", + "bytes", + "getrandom 0.2.16", + "half", + "jsonb", + "num-traits", + "rand 0.9.2", +] + +[[package]] +name = "lance-bitpacking" +version = "0.38.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +checksum = "efb7af69bff8d8499999684f961b0a4dc6e159065c773041545d19bc158f0814" +dependencies = [ + "arrayref", + "paste", + "seq-macro", +] [[package]] -name = "iter-enum" -version = "1.2.0" +name = "lance-core" +version = "0.38.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c52f2d5e063459674b4735f21870dd911e0d96dbfebb984650068195c2df838" +checksum = "356a5df5f9cd7cb4aedaf78a4e346190ae50ba574b828316caed7d1df3b6dcd8" dependencies = [ - "derive_utils", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "async-trait", + "byteorder", + "bytes", + "chrono", + "datafusion-common", + "datafusion-sql", + "deepsize", + "futures", + "lance-arrow", + "libc", + "log", + "mock_instant", + "moka", + "num_cpus", + "object_store", + "pin-project", + "prost", + "rand 0.9.2", + "roaring", + "serde_json", + "snafu", + "tempfile", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", + "url", ] [[package]] -name = "itertools" -version = "0.10.5" +name = "lance-datafusion" +version = "0.38.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "b8e8ec07021bdaba6a441563d8fbcb0431350aae6842910ae3622557765f218f" dependencies = [ - "either", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-select", + "async-trait", + "datafusion", + "datafusion-common", + "datafusion-functions", + "datafusion-physical-expr", + "futures", + "jsonb", + "lance-arrow", + "lance-core", + "lance-datagen", + "log", + "pin-project", + "prost", + "snafu", + "tokio", + "tracing", ] [[package]] -name = "itertools" -version = "0.11.0" +name = "lance-datagen" +version = "0.38.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +checksum = "d4fe98730cd5297dc68b22f6ad7e1e27cf34e2db05586b64d3540ca74a519a61" dependencies = [ - "either", + "arrow", + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "futures", + "half", + "hex", + "rand 0.9.2", + "rand_xoshiro", + "random_word", ] [[package]] -name = "itertools" -version = "0.12.1" +name = "lance-encoding" +version = "0.38.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +checksum = "ef073d419cc00ef41dd95cb25203b333118b224151ae397145530b1d559769c9" dependencies = [ - "either", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", + "bytemuck", + "byteorder", + "bytes", + "fsst", + "futures", + "hex", + "hyperloglogplus", + "itertools 0.13.0", + "lance-arrow", + "lance-bitpacking", + "lance-core", + "log", + "lz4", + "num-traits", + "prost", + "prost-build", + "prost-types", + "rand 0.9.2", + "snafu", + "strum 0.25.0", + "tokio", + "tracing", + "xxhash-rust", + "zstd", ] [[package]] -name = "itertools" -version = "0.13.0" +name = "lance-file" +version = "0.38.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +checksum = "0e34aba3a41f119188da997730560e4a6915ee5a38b672bbf721fdc99121aa1e" dependencies = [ - "either", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "byteorder", + "bytes", + "datafusion-common", + "deepsize", + "futures", + "lance-arrow", + "lance-core", + "lance-encoding", + "lance-io", + "log", + "num-traits", + "object_store", + "prost", + "prost-build", + "prost-types", + "roaring", + "snafu", + "tokio", + "tracing", ] [[package]] -name = "itertools" -version = "0.14.0" +name = "lance-index" +version = "0.38.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +checksum = "c5f480f801c8efb41a6dedc48a5cacff6044a10f82c6f9764b8dac7194a7754e" dependencies = [ - "either", + "arrow", + "arrow-arith", + "arrow-array", + "arrow-ord", + "arrow-schema", + "arrow-select", + "async-channel", + "async-recursion", + "async-trait", + "bitpacking", + "bitvec", + "bytes", + "crossbeam-queue", + "datafusion", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-sql", + "deepsize", + "dirs", + "fastbloom", + "fst", + "futures", + "half", + "itertools 0.13.0", + "jsonb", + "lance-arrow", + "lance-core", + "lance-datafusion", + "lance-datagen", + "lance-encoding", + "lance-file", + "lance-io", + "lance-linalg", + "lance-table", + "libm", + "log", + "ndarray", + "num-traits", + "object_store", + "prost", + "prost-build", + "prost-types", + "rand 0.9.2", + "rand_distr 0.5.1", + "rayon", + "roaring", + "serde", + "serde_json", + "snafu", + "tantivy 0.24.2", + "tempfile", + "tokio", + "tracing", + "twox-hash", + "uuid", ] [[package]] -name = "itoa" -version = "1.0.15" +name = "lance-io" +version = "0.38.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "0708125c74965b2b7e5e0c4fe2d8e6bd8346a7031484f8844cf06c08bfa29a72" +dependencies = [ + "arrow", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "arrow-select", + "async-recursion", + "async-trait", + "aws-config", + "aws-credential-types", + "byteorder", + "bytes", + "chrono", + "deepsize", + "futures", + "lance-arrow", + "lance-core", + "log", + "object_store", + "object_store_opendal", + "opendal", + "path_abs", + "pin-project", + "prost", + "rand 0.9.2", + "serde", + "shellexpand", + "snafu", + "tokio", + "tracing", + "url", +] [[package]] -name = "jobserver" -version = "0.1.34" +name = "lance-linalg" +version = "0.38.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +checksum = "da9d1c22deed92420a1869e4b89188ccecc7e1aee2ea4e5bca92eae861511d60" dependencies = [ - "getrandom 0.3.3", - "libc", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "bitvec", + "cc", + "deepsize", + "futures", + "half", + "lance-arrow", + "lance-core", + "log", + "num-traits", + "rand 0.9.2", + "rayon", + "tokio", + "tracing", ] [[package]] -name = "js-sys" -version = "0.3.81" +name = "lance-namespace" +version = "0.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +checksum = "7c0629165b5d85ff305f2de8833dcee507e899b36b098864c59f14f3b8b8e62d" dependencies = [ - "once_cell", - "wasm-bindgen", + "arrow", + "async-trait", + "bytes", + "lance", + "lance-namespace-reqwest-client", + "opendal", + "reqwest", + "serde_json", + "thiserror 1.0.69", + "url", ] [[package]] -name = "json5" -version = "0.4.1" +name = "lance-namespace-reqwest-client" +version = "0.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b0db21af676c1ce64250b5f40f3ce2cf27e4e47cb91ed91eb6fe9350b430c1" +checksum = "3ea349999bcda4eea53fc05d334b3775ec314761e6a706555c777d7a29b18d19" dependencies = [ - "pest", - "pest_derive", + "reqwest", "serde", + "serde_json", + "serde_repr", + "url", ] [[package]] -name = "jsonwebtoken" -version = "9.3.1" +name = "lance-table" +version = "0.38.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" +checksum = "805e6c64efbb3295f74714668c9033121ffdfa6c868f067024e65ade700b8b8b" dependencies = [ - "base64 0.22.1", - "js-sys", - "pem", - "ring", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ipc", + "arrow-schema", + "async-trait", + "aws-credential-types", + "aws-sdk-dynamodb", + "byteorder", + "bytes", + "chrono", + "deepsize", + "futures", + "lance-arrow", + "lance-core", + "lance-file", + "lance-io", + "log", + "object_store", + "prost", + "prost-build", + "prost-types", + "rand 0.9.2", + "rangemap", + "roaring", "serde", "serde_json", - "simple_asn1", + "snafu", + "tokio", + "tracing", + "url", + "uuid", +] + +[[package]] +name = "lance-testing" +version = "0.38.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ac735b5eb153a6ac841ce0206e4c30df941610c812cc89c8ae20006f8d0b018" +dependencies = [ + "arrow-array", + "arrow-schema", + "lance-arrow", + "num-traits", + "rand 0.9.2", ] [[package]] -name = "kdam" -version = "0.6.3" +name = "lancedb" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5740f66a8d86a086ebcacfb937070e8be6eb2f8fb45e4ae7fa428ca2a98a7b1f" +checksum = "5c54bd65c4edfefdaf93804d3852445ae169adf7ba850933370bb67b50d76602" dependencies = [ - "pyo3", - "terminal_size", - "windows-sys 0.59.0", + "arrow", + "arrow-array", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-ord", + "arrow-schema", + "async-trait", + "bytemuck_derive", + "bytes", + "chrono", + "crunchy", + "datafusion-catalog", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-plan", + "futures", + "half", + "lance", + "lance-datafusion", + "lance-encoding", + "lance-index", + "lance-io", + "lance-linalg", + "lance-namespace", + "lance-table", + "lance-testing", + "lazy_static", + "log", + "moka", + "num-traits", + "object_store", + "pin-project", + "regex", + "semver", + "serde", + "serde_json", + "serde_with", + "snafu", + "tokio", + "url", ] [[package]] @@ -3322,6 +4744,9 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] [[package]] name = "levenshtein_automata" @@ -3394,9 +4819,19 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.176" +version = "0.2.177" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" + +[[package]] +name = "libloading" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link 0.2.1", +] [[package]] name = "libm" @@ -3406,9 +4841,9 @@ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "libredox" -version = "0.1.6" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4488594b9328dee448adb906d8b126d9b7deb7cf5c22161ee591610bb1be83c0" +checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" dependencies = [ "bitflags", "libc", @@ -3454,11 +4889,10 @@ dependencies = [ [[package]] name = "lock_api" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", "serde", ] @@ -3469,6 +4903,19 @@ version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + [[package]] name = "lru" version = "0.12.5" @@ -3484,6 +4931,25 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lz4" +version = "1.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a20b523e860d03443e98350ceaac5e71c6ba89aea7d960769ec3ce37f4de5af4" +dependencies = [ + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.11.1+lz4-1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "lz4_flex" version = "0.11.5" @@ -3529,6 +4995,12 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + [[package]] name = "matrixmultiply" version = "0.3.10" @@ -3536,7 +5008,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" dependencies = [ "autocfg", + "num_cpus", + "once_cell", "rawpointer", + "thread-tree", ] [[package]] @@ -3559,6 +5034,15 @@ dependencies = [ "log", ] +[[package]] +name = "measure_time" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e" +dependencies = [ + "log", +] + [[package]] name = "memchr" version = "2.7.6" @@ -3567,9 +5051,9 @@ checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "memmap2" -version = "0.9.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" dependencies = [ "libc", ] @@ -3632,17 +5116,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] name = "mio" -version = "1.0.4" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" +checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" dependencies = [ "libc", - "wasi 0.11.1+wasi-snapshot-preview1", - "windows-sys 0.59.0", + "wasi", + "windows-sys 0.61.2", +] + +[[package]] +name = "mock_instant" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9366861eb2a2c436c20b12c8dbec5f798cea6b47ad99216be0282942e2c81ea0" +dependencies = [ + "once_cell", ] [[package]] @@ -3675,7 +5169,7 @@ dependencies = [ "bytes", "encoding_rs", "futures-util", - "http", + "http 1.3.1", "httparse", "memchr", "mime", @@ -3729,11 +5223,11 @@ dependencies = [ "paste", "pin-project-lite", "rustls-native-certs 0.7.3", - "rustls-pemfile", + "rustls-pemfile 2.2.0", "serde", "thiserror 1.0.69", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "url", "webpki-roots 0.26.11", ] @@ -3745,7 +5239,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a0d57c55d2d1dc62a2b1d16a0a1079eb78d67c36bdf468d582ab4482ec7002" dependencies = [ "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -3776,13 +5270,22 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + [[package]] name = "nu-ansi-term" -version = "0.50.1" +version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -3809,6 +5312,23 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-bigint-dig" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +dependencies = [ + "byteorder", + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.5", + "smallvec", + "zeroize", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -3892,15 +5412,6 @@ dependencies = [ "rustc-hash 2.1.1", ] -[[package]] -name = "object" -version = "0.37.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" -dependencies = [ - "memchr", -] - [[package]] name = "object_store" version = "0.12.4" @@ -3908,14 +5419,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" dependencies = [ "async-trait", + "base64 0.22.1", "bytes", "chrono", + "form_urlencoded", "futures", - "http", + "http 1.3.1", + "http-body-util", + "httparse", "humantime", + "hyper 1.7.0", "itertools 0.14.0", + "md-5", "parking_lot", "percent-encoding", + "quick-xml 0.38.3", + "rand 0.9.2", + "reqwest", + "ring", + "rustls-pemfile 2.2.0", + "serde", + "serde_json", + "serde_urlencoded", "thiserror 2.0.17", "tokio", "tracing", @@ -3925,6 +5450,21 @@ dependencies = [ "web-time", ] +[[package]] +name = "object_store_opendal" +version = "0.54.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0b88fc0e0c4890c1d99e2b8c519c5db40f7d9b69a0f562ff1ad4967a4c8bbc6" +dependencies = [ + "async-trait", + "bytes", + "futures", + "object_store", + "opendal", + "pin-project", + "tokio", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -3933,9 +5473,9 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "once_cell_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "oneshot" @@ -3949,6 +5489,35 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "opendal" +version = "0.54.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42afda58fa2cf50914402d132cc1caacff116a85d10c72ab2082bb7c50021754" +dependencies = [ + "anyhow", + "backon", + "base64 0.22.1", + "bytes", + "chrono", + "crc32c", + "futures", + "getrandom 0.2.16", + "http 1.3.1", + "http-body 1.0.1", + "log", + "md-5", + "percent-encoding", + "quick-xml 0.38.3", + "reqsign", + "reqwest", + "serde", + "serde_json", + "sha2", + "tokio", + "uuid", +] + [[package]] name = "openssl-probe" version = "0.1.6" @@ -3977,7 +5546,7 @@ checksum = "91cf61a1868dacc576bf2b2a1c3e9ab150af7272909e80085c3173384fe11f76" dependencies = [ "async-trait", "futures-core", - "http", + "http 1.3.1", "opentelemetry", "opentelemetry-proto", "opentelemetry_sdk", @@ -4045,6 +5614,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "ordered-float" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" +dependencies = [ + "num-traits", +] + [[package]] name = "ordered-multimap" version = "0.7.3" @@ -4076,9 +5654,15 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn 2.0.106", + "syn 2.0.108", ] +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + [[package]] name = "ownedbytes" version = "0.7.0" @@ -4088,6 +5672,15 @@ dependencies = [ "stable_deref_trait", ] +[[package]] +name = "ownedbytes" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e" +dependencies = [ + "stable_deref_trait", +] + [[package]] name = "page_size" version = "0.6.0" @@ -4106,9 +5699,9 @@ checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" [[package]] name = "parking_lot" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -4116,15 +5709,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.11" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-link 0.2.1", ] [[package]] @@ -4179,6 +5772,18 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "path_abs" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05ef02f6342ac01d8a93b65f96db53fe68a92a15f41144f97fb00a9e669633c3" +dependencies = [ + "serde", + "serde_derive", + "std_prelude", + "stfu8", +] + [[package]] name = "pathdiff" version = "0.2.3" @@ -4197,12 +5802,21 @@ dependencies = [ [[package]] name = "pem" -version = "3.0.5" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38af38e8470ac9dee3ce1bae1af9c1671fffc44ddfd8bd1d0a3445bf349a8ef3" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" dependencies = [ "base64 0.22.1", - "serde", + "serde_core", +] + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", ] [[package]] @@ -4211,22 +5825,27 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "permutation" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7" + [[package]] name = "pest" -version = "2.8.2" +version = "2.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e0a3a33733faeaf8651dfee72dd0f388f0c8e5ad496a3478fa5a922f49cfa8" +checksum = "989e7521a040efde50c3ab6bbadafbe15ab6dc042686926be59ac35d74607df4" dependencies = [ "memchr", - "thiserror 2.0.17", "ucd-trie", ] [[package]] name = "pest_derive" -version = "2.8.2" +version = "2.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc58706f770acb1dbd0973e6530a3cff4746fb721207feb3a8a6064cd0b6c663" +checksum = "187da9a3030dbafabbbfb20cb323b976dc7b7ce91fcd84f2f74d6e31d378e2de" dependencies = [ "pest", "pest_generator", @@ -4234,22 +5853,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.8.2" +version = "2.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d4f36811dfe07f7b8573462465d5cb8965fffc2e71ae377a33aecf14c2c9a2f" +checksum = "49b401d98f5757ebe97a26085998d6c0eecec4995cad6ab7fc30ffdf4b052843" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] name = "pest_meta" -version = "2.8.2" +version = "2.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42919b05089acbd0a5dcd5405fb304d17d1053847b81163d09c4ad18ce8e8420" +checksum = "72f27a2cfee9f9039c4d86faa5af122a0ac3851441a34865b8a043b46be0065a" dependencies = [ "pest", "sha2", @@ -4262,18 +5881,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" dependencies = [ "fixedbitset", - "indexmap 2.11.4", + "indexmap 2.12.0", ] [[package]] name = "petgraph" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" dependencies = [ "fixedbitset", "hashbrown 0.15.5", - "indexmap 2.11.4", + "indexmap 2.12.0", "serde", ] @@ -4326,7 +5945,7 @@ dependencies = [ "phf_shared 0.11.3", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -4364,7 +5983,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -4379,6 +5998,44 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs5" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e847e2c91a18bfa887dd028ec33f2fe6f25db77db3619024764914affe8b69a6" +dependencies = [ + "aes", + "cbc", + "der", + "pbkdf2", + "scrypt", + "sha2", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "pkcs5", + "rand_core 0.6.4", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.32" @@ -4425,10 +6082,10 @@ dependencies = [ "futures-util", "headers", "hex", - "http", + "http 1.3.1", "http-body-util", "httpdate", - "hyper", + "hyper 1.7.0", "hyper-util", "mime", "mime_guess", @@ -4462,7 +6119,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -4525,7 +6182,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -4534,14 +6191,14 @@ version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "toml_edit 0.23.6", + "toml_edit 0.23.7", ] [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] @@ -4554,7 +6211,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", "version_check", "yansi", ] @@ -4587,7 +6244,7 @@ checksum = "4ee1c9ac207483d5e7db4940700de86a9aae46ef90c48b57f99fe7edb8345e49" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -4616,7 +6273,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.106", + "syn 2.0.108", "tempfile", ] @@ -4630,7 +6287,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -4644,9 +6301,9 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" +checksum = "e66fcd288453b748497d8fb18bccc83a16b0518e3906d4b8df0a8d42d93dbb1c" dependencies = [ "cc", ] @@ -4659,7 +6316,7 @@ checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a" dependencies = [ "chrono", "chrono-tz 0.10.4", - "indexmap 2.11.4", + "indexmap 2.12.0", "indoc", "inventory", "libc", @@ -4685,7 +6342,7 @@ dependencies = [ "arrow-schema", "arrow-select", "half", - "indexmap 2.11.4", + "indexmap 2.12.0", "numpy", "pyo3", "thiserror 1.0.69", @@ -4720,7 +6377,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -4733,7 +6390,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -4746,7 +6403,27 @@ checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" name = "quick-error" version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quick-xml" +version = "0.37.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "331e97a1af0bf59823e6eadffe373d7b27f485be8748f71471c662c1f269b7fb" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "quick-xml" +version = "0.38.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42a232e7487fc2ef313d96dde7948e7a3c05101870d8985e4fd8d26aedd27b89" +dependencies = [ + "memchr", + "serde", +] [[package]] name = "quickcheck" @@ -4771,8 +6448,8 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash 2.1.1", - "rustls", - "socket2 0.6.0", + "rustls 0.23.34", + "socket2 0.6.1", "thiserror 2.0.17", "tokio", "tracing", @@ -4786,12 +6463,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" dependencies = [ "bytes", - "getrandom 0.3.3", + "getrandom 0.3.4", "lru-slab", "rand 0.9.2", "ring", "rustc-hash 2.1.1", - "rustls", + "rustls 0.23.34", "rustls-pki-types", "slab", "thiserror 2.0.17", @@ -4809,7 +6486,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.0", + "socket2 0.6.1", "tracing", "windows-sys 0.60.2", ] @@ -4829,6 +6506,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -4885,7 +6568,7 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", ] [[package]] @@ -4898,6 +6581,16 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "rand_distr" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" +dependencies = [ + "num-traits", + "rand 0.9.2", +] + [[package]] name = "rand_xorshift" version = "0.4.0" @@ -4907,16 +6600,46 @@ dependencies = [ "rand_core 0.9.3", ] +[[package]] +name = "rand_xoshiro" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41" +dependencies = [ + "rand_core 0.9.3", +] + +[[package]] +name = "random_word" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e47a395bdb55442b883c89062d6bcff25dc90fa5f8369af81e0ac6d49d78cf81" +dependencies = [ + "ahash", + "brotli 8.0.2", + "paste", + "rand 0.9.2", + "unicase", +] + +[[package]] +name = "rangemap" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93e7e49bb0bf967717f7bd674458b3d6b0c5f48ec7e3038166026a69fc22223" + [[package]] name = "raphtory" version = "0.16.3" dependencies = [ "ahash", "arrow", + "arrow-array", "arrow-json", "arroy", "async-openai", "async-trait", + "axum 0.8.6", "bigdecimal", "bincode", "bytemuck", @@ -4932,11 +6655,12 @@ dependencies = [ "glam", "hashbrown 0.15.5", "heed", - "indexmap 2.11.4", + "indexmap 2.12.0", "indoc", "iter-enum", "itertools 0.13.0", "kdam", + "lancedb", "memmap2", "minijinja", "minijinja-contrib", @@ -4962,7 +6686,7 @@ dependencies = [ "pyo3-arrow", "quad-rand", "rand 0.8.5", - "rand_distr", + "rand_distr 0.4.3", "raphtory", "raphtory-api", "raphtory-core", @@ -4976,7 +6700,7 @@ dependencies = [ "serde_json", "streaming-stats", "strsim", - "tantivy", + "tantivy 0.22.1", "tempfile", "thiserror 2.0.17", "tokio", @@ -5239,34 +6963,54 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] name = "redox_syscall" -version = "0.5.17" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ "bitflags", ] [[package]] name = "redox_users" -version = "0.5.0" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ "getrandom 0.2.16", "libredox", "thiserror 2.0.17", ] +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "regex" -version = "1.11.3" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", @@ -5276,53 +7020,95 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.11" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" + [[package]] name = "regex-syntax" -version = "0.8.6" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "reqsign" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43451dbf3590a7590684c25fb8d12ecdcc90ed3ac123433e500447c7d77ed701" +dependencies = [ + "anyhow", + "async-trait", + "base64 0.22.1", + "chrono", + "form_urlencoded", + "getrandom 0.2.16", + "hex", + "hmac", + "home", + "http 1.3.1", + "jsonwebtoken", + "log", + "once_cell", + "percent-encoding", + "quick-xml 0.37.5", + "rand 0.8.5", + "reqwest", + "rsa", + "rust-ini 0.21.3", + "serde", + "serde_json", + "sha1", + "sha2", + "tokio", +] [[package]] name = "reqwest" -version = "0.12.23" +version = "0.12.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" +checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" dependencies = [ + "async-compression", "base64 0.22.1", "bytes", + "encoding_rs", "futures-channel", "futures-core", "futures-util", - "http", - "http-body", + "h2 0.4.12", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.7.0", + "hyper-rustls 0.27.7", "hyper-util", "js-sys", "log", + "mime", "mime_guess", "percent-encoding", "pin-project-lite", "quinn", - "rustls", - "rustls-native-certs 0.8.1", + "rustls 0.23.34", + "rustls-native-certs 0.8.2", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-util", "tower 0.5.2", "tower-http", @@ -5332,7 +7118,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.2", + "webpki-roots 1.0.3", ] [[package]] @@ -5345,7 +7131,7 @@ dependencies = [ "futures-core", "futures-timer", "mime", - "nom", + "nom 7.1.3", "pin-project-lite", "reqwest", "thiserror 1.0.69", @@ -5402,11 +7188,32 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "rsa" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" +dependencies = [ + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "sha2", + "signature", + "spki", + "subtle", + "zeroize", +] + [[package]] name = "rust-embed" -version = "8.7.2" +version = "8.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "025908b8682a26ba8d12f6f2d66b987584a4a87bc024abc5bbc12553a8cd178a" +checksum = "fb44e1917075637ee8c7bcb865cf8830e3a92b5b1189e44e3a0ab5a0d5be314b" dependencies = [ "rust-embed-impl", "rust-embed-utils", @@ -5415,23 +7222,23 @@ dependencies = [ [[package]] name = "rust-embed-impl" -version = "8.7.2" +version = "8.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6065f1a4392b71819ec1ea1df1120673418bf386f50de1d6f54204d836d4349c" +checksum = "382499b49db77a7c19abd2a574f85ada7e9dbe125d5d1160fa5cad7c4cf71fc9" dependencies = [ "proc-macro2", "quote", "rust-embed-utils", "shellexpand", - "syn 2.0.106", + "syn 2.0.108", "walkdir", ] [[package]] name = "rust-embed-utils" -version = "8.7.2" +version = "8.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6cc0c81648b20b70c491ff8cce00c1c3b223bb8ed2b5d41f0e54c6c4c0a3594" +checksum = "21fcbee55c2458836bcdbfffb6ec9ba74bbc23ca7aa6816015a3dd2c4d8fc185" dependencies = [ "sha2", "walkdir", @@ -5459,6 +7266,16 @@ dependencies = [ "ordered-multimap", ] +[[package]] +name = "rust-ini" +version = "0.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "796e8d2b6696392a43bea58116b667fb4c29727dc5abd27d6acf338bb4f688c7" +dependencies = [ + "cfg-if", + "ordered-multimap", +] + [[package]] name = "rust-stemmers" version = "1.2.0" @@ -5469,12 +7286,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "rustc-demangle" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" - [[package]] name = "rustc-hash" version = "1.1.0" @@ -5519,23 +7330,48 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.11.0", - "windows-sys 0.61.1", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", ] [[package]] name = "rustls" -version = "0.23.32" +version = "0.23.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd3c25631629d034ce7cd9940adc9d45762d46de2b0f57193c4443b92c6d4d40" +checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7" dependencies = [ + "aws-lc-rs", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.7", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework 2.11.1", +] + [[package]] name = "rustls-native-certs" version = "0.7.3" @@ -5543,7 +7379,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" dependencies = [ "openssl-probe", - "rustls-pemfile", + "rustls-pemfile 2.2.0", "rustls-pki-types", "schannel", "security-framework 2.11.1", @@ -5551,9 +7387,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" +checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923" dependencies = [ "openssl-probe", "rustls-pki-types", @@ -5561,6 +7397,15 @@ dependencies = [ "security-framework 3.5.1", ] +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", +] + [[package]] name = "rustls-pemfile" version = "2.2.0" @@ -5582,10 +7427,21 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.6" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8572f3c2cb9934231157b45499fc41e1f58c589fdfb81a844ba873265e80f8eb" +checksum = "e10b3f4191e8a80e6b43eebabfac91e5dcecebb27a71f04e820c47ec41d314bf" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -5599,9 +7455,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "rusty-fork" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" dependencies = [ "fnv", "quick-error", @@ -5615,6 +7471,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "salsa20" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a22f5af31f73a954c10289c93e8a50cc23d971e80ee446f1f6f7137a088213" +dependencies = [ + "cipher", +] + [[package]] name = "same-file" version = "1.0.6" @@ -5630,15 +7495,66 @@ version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" dependencies = [ - "windows-sys 0.61.1", + "windows-sys 0.61.2", +] + +[[package]] +name = "schemars" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", +] + +[[package]] +name = "schemars" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" +dependencies = [ + "dyn-clone", + "ref-cast", + "serde", + "serde_json", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scrypt" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0516a385866c09368f0b5bcd1caff3366aace790fcd46e2bb032697bb172fd1f" +dependencies = [ + "pbkdf2", + "salsa20", + "sha2", +] + +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "secrecy" version = "0.8.0" @@ -5724,7 +7640,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -5740,6 +7656,28 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + +[[package]] +name = "serde_repr" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "serde_spanned" version = "0.6.9" @@ -5761,6 +7699,37 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "3.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa66c845eee442168b2c8134fec70ac50dc20e760769c8ba0ad1319ca1959b04" +dependencies = [ + "base64 0.22.1", + "chrono", + "hex", + "indexmap 1.9.3", + "indexmap 2.12.0", + "schemars 0.9.0", + "schemars 1.0.4", + "serde_core", + "serde_json", + "serde_with_macros", + "time", +] + +[[package]] +name = "serde_with_macros" +version = "3.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91a903660542fced4e99881aa481bdbaec1634568ee02e0b8bd57c64cb38955" +dependencies = [ + "darling 0.21.3", + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "sha1" version = "0.10.6" @@ -5816,6 +7785,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + [[package]] name = "simd-adler32" version = "0.3.7" @@ -5855,6 +7834,15 @@ dependencies = [ "serde", ] +[[package]] +name = "sketches-ddsketch" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" +dependencies = [ + "serde", +] + [[package]] name = "slab" version = "0.4.11" @@ -5867,6 +7855,27 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "snafu" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" +dependencies = [ + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.108", +] + [[package]] name = "snap" version = "1.1.1" @@ -5885,12 +7894,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -5915,6 +7924,7 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" dependencies = [ + "base64ct", "der", ] @@ -5937,20 +7947,20 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] name = "stable_deref_trait" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stacker" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" dependencies = [ "cc", "cfg-if", @@ -5971,6 +7981,18 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7beae5182595e9a8b683fa98c4317f956c9a2dec3b9716990d20023cc60c766" +[[package]] +name = "std_prelude" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8207e78455ffdf55661170876f88daf85356e4edd54e0a3dbc79586ca1e50cbe" + +[[package]] +name = "stfu8" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51f1e89f093f99e7432c491c382b88a6860a5adbe6bf02574bf0a08efff1978" + [[package]] name = "streaming-stats" version = "0.2.3" @@ -5986,13 +8008,35 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros 0.25.3", +] + [[package]] name = "strum" version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" dependencies = [ - "strum_macros", + "strum_macros 0.26.4", +] + +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck 0.4.1", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.108", ] [[package]] @@ -6005,7 +8049,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -6027,9 +8071,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.106" +version = "2.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" dependencies = [ "proc-macro2", "quote", @@ -6062,7 +8106,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -6085,7 +8129,7 @@ dependencies = [ "census", "crc32fast", "crossbeam-channel", - "downcast-rs", + "downcast-rs 1.2.1", "fastdivide", "fnv", "fs4", @@ -6095,7 +8139,7 @@ dependencies = [ "log", "lru", "lz4_flex", - "measure_time", + "measure_time 0.8.3", "memmap2", "num_cpus", "once_cell", @@ -6106,15 +8150,15 @@ dependencies = [ "rustc-hash 1.1.0", "serde", "serde_json", - "sketches-ddsketch", + "sketches-ddsketch 0.2.2", "smallvec", - "tantivy-bitpacker", - "tantivy-columnar", - "tantivy-common", + "tantivy-bitpacker 0.6.0", + "tantivy-columnar 0.3.0", + "tantivy-common 0.7.0", "tantivy-fst", - "tantivy-query-grammar", - "tantivy-stacker", - "tantivy-tokenizer-api", + "tantivy-query-grammar 0.22.0", + "tantivy-stacker 0.3.0", + "tantivy-tokenizer-api 0.3.0", "tempfile", "thiserror 1.0.69", "time", @@ -6122,6 +8166,58 @@ dependencies = [ "winapi", ] +[[package]] +name = "tantivy" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a966cb0e76e311f09cf18507c9af192f15d34886ee43d7ba7c7e3803660c43" +dependencies = [ + "aho-corasick", + "arc-swap", + "base64 0.22.1", + "bitpacking", + "bon", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs 2.0.2", + "fastdivide", + "fnv", + "fs4", + "htmlescape", + "hyperloglogplus", + "itertools 0.14.0", + "levenshtein_automata", + "log", + "lru", + "lz4_flex", + "measure_time 0.9.0", + "memmap2", + "once_cell", + "oneshot", + "rayon", + "regex", + "rust-stemmers", + "rustc-hash 2.1.1", + "serde", + "serde_json", + "sketches-ddsketch 0.3.0", + "smallvec", + "tantivy-bitpacker 0.8.0", + "tantivy-columnar 0.5.0", + "tantivy-common 0.9.0", + "tantivy-fst", + "tantivy-query-grammar 0.24.0", + "tantivy-stacker 0.5.0", + "tantivy-tokenizer-api 0.5.0", + "tempfile", + "thiserror 2.0.17", + "time", + "uuid", + "winapi", +] + [[package]] name = "tantivy-bitpacker" version = "0.6.0" @@ -6131,20 +8227,45 @@ dependencies = [ "bitpacking", ] +[[package]] +name = "tantivy-bitpacker" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1adc286a39e089ae9938935cd488d7d34f14502544a36607effd2239ff0e2494" +dependencies = [ + "bitpacking", +] + [[package]] name = "tantivy-columnar" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12722224ffbe346c7fec3275c699e508fd0d4710e629e933d5736ec524a1f44e" dependencies = [ - "downcast-rs", + "downcast-rs 1.2.1", "fastdivide", "itertools 0.12.1", "serde", - "tantivy-bitpacker", - "tantivy-common", - "tantivy-sstable", - "tantivy-stacker", + "tantivy-bitpacker 0.6.0", + "tantivy-common 0.7.0", + "tantivy-sstable 0.3.0", + "tantivy-stacker 0.3.0", +] + +[[package]] +name = "tantivy-columnar" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6300428e0c104c4f7db6f95b466a6f5c1b9aece094ec57cdd365337908dc7344" +dependencies = [ + "downcast-rs 2.0.2", + "fastdivide", + "itertools 0.14.0", + "serde", + "tantivy-bitpacker 0.8.0", + "tantivy-common 0.9.0", + "tantivy-sstable 0.5.0", + "tantivy-stacker 0.5.0", ] [[package]] @@ -6155,7 +8276,20 @@ checksum = "8019e3cabcfd20a1380b491e13ff42f57bb38bf97c3d5fa5c07e50816e0621f4" dependencies = [ "async-trait", "byteorder", - "ownedbytes", + "ownedbytes 0.7.0", + "serde", + "time", +] + +[[package]] +name = "tantivy-common" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b6ea6090ce03dc72c27d0619e77185d26cc3b20775966c346c6d4f7e99d7f" +dependencies = [ + "async-trait", + "byteorder", + "ownedbytes 0.9.0", "serde", "time", ] @@ -6177,7 +8311,18 @@ version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "847434d4af57b32e309f4ab1b4f1707a6c566656264caa427ff4285c4d9d0b82" dependencies = [ - "nom", + "nom 7.1.3", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e810cdeeebca57fc3f7bfec5f85fdbea9031b2ac9b990eb5ff49b371d52bbe6a" +dependencies = [ + "nom 7.1.3", + "serde", + "serde_json", ] [[package]] @@ -6186,8 +8331,22 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c69578242e8e9fc989119f522ba5b49a38ac20f576fc778035b96cc94f41f98e" dependencies = [ - "tantivy-bitpacker", - "tantivy-common", + "tantivy-bitpacker 0.6.0", + "tantivy-common 0.7.0", + "tantivy-fst", + "zstd", +] + +[[package]] +name = "tantivy-sstable" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "709f22c08a4c90e1b36711c1c6cad5ae21b20b093e535b69b18783dd2cb99416" +dependencies = [ + "futures-util", + "itertools 0.14.0", + "tantivy-bitpacker 0.8.0", + "tantivy-common 0.9.0", "tantivy-fst", "zstd", ] @@ -6199,8 +8358,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c56d6ff5591fc332739b3ce7035b57995a3ce29a93ffd6012660e0949c956ea8" dependencies = [ "murmurhash32", - "rand_distr", - "tantivy-common", + "rand_distr 0.4.3", + "tantivy-common 0.7.0", +] + +[[package]] +name = "tantivy-stacker" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bcdebb267671311d1e8891fd9d1301803fdb8ad21ba22e0a30d0cab49ba59c1" +dependencies = [ + "murmurhash32", + "rand_distr 0.4.3", + "tantivy-common 0.9.0", ] [[package]] @@ -6212,6 +8382,21 @@ dependencies = [ "serde", ] +[[package]] +name = "tantivy-tokenizer-api" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfa942fcee81e213e09715bbce8734ae2180070b97b33839a795ba1de201547d" +dependencies = [ + "serde", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "target-lexicon" version = "0.13.3" @@ -6225,10 +8410,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "rustix 1.1.2", - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -6267,7 +8452,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -6278,7 +8463,16 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", +] + +[[package]] +name = "thread-tree" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffbd370cb847953a25954d9f63e14824a36113f8c72eecf6eccef5dc4b45d630" +dependencies = [ + "crossbeam-channel", ] [[package]] @@ -6378,33 +8572,40 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.47.1" +version = "1.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" dependencies = [ - "backtrace", "bytes", - "io-uring", "libc", "mio", "parking_lot", "pin-project-lite", "signal-hook-registry", - "slab", - "socket2 0.6.0", + "socket2 0.6.1", "tokio-macros", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "tokio-macros" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", +] + +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", ] [[package]] @@ -6413,7 +8614,7 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.34", "tokio", ] @@ -6477,9 +8678,9 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f1085dec27c2b6632b04c80b3bb1b4300d6495d1e129693bdda7d91e72eec1" +checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" dependencies = [ "serde_core", ] @@ -6490,7 +8691,7 @@ version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ - "indexmap 2.11.4", + "indexmap 2.12.0", "serde", "serde_spanned", "toml_datetime 0.6.11", @@ -6500,21 +8701,21 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.23.6" +version = "0.23.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3effe7c0e86fdff4f69cdd2ccc1b96f933e24811c5441d44904e8683e27184b" +checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" dependencies = [ - "indexmap 2.11.4", - "toml_datetime 0.7.2", + "indexmap 2.12.0", + "toml_datetime 0.7.3", "toml_parser", "winnow", ] [[package]] name = "toml_parser" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cf893c33be71572e0e9aa6dd15e6677937abd686b066eac3f8cd3531688a627" +checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" dependencies = [ "winnow", ] @@ -6533,14 +8734,14 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" dependencies = [ "async-stream", "async-trait", - "axum", + "axum 0.7.9", "base64 0.22.1", "bytes", - "h2", - "http", - "http-body", + "h2 0.4.12", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.7.0", "hyper-timeout", "hyper-util", "percent-encoding", @@ -6588,6 +8789,7 @@ dependencies = [ "tokio", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -6599,8 +8801,8 @@ dependencies = [ "bitflags", "bytes", "futures-util", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "iri-string", "pin-project-lite", "tower 0.5.2", @@ -6626,6 +8828,7 @@ version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ + "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -6639,7 +8842,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -6713,7 +8916,7 @@ checksum = "eadc29d668c91fcc564941132e17b28a7ceb2f3ebf0b9dae3e03fd7a6748eb0d" dependencies = [ "bytes", "data-encoding", - "http", + "http 1.3.1", "httparse", "log", "rand 0.9.2", @@ -6733,9 +8936,9 @@ dependencies = [ [[package]] name = "typenum" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "ucd-trie" @@ -6766,9 +8969,9 @@ checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" [[package]] name = "unicode-ident" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" +checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" [[package]] name = "unicode-segmentation" @@ -6778,9 +8981,9 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "unindent" @@ -6806,6 +9009,12 @@ dependencies = [ "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf-8" version = "0.7.6" @@ -6836,7 +9045,7 @@ version = "1.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "js-sys", "serde", "wasm-bindgen", @@ -6854,6 +9063,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "wait-timeout" version = "0.2.1" @@ -6888,15 +9103,6 @@ version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" -[[package]] -name = "wasi" -version = "0.14.7+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c" -dependencies = [ - "wasip2", -] - [[package]] name = "wasip2" version = "1.0.1+wasi-0.2.4" @@ -6929,7 +9135,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", "wasm-bindgen-shared", ] @@ -6964,7 +9170,7 @@ checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -7017,14 +9223,14 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.2", + "webpki-roots 1.0.3", ] [[package]] name = "webpki-roots" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2" +checksum = "32b130c0d2d49f8b6889abc456e795e82525204f27c42cf767cf0d7734e089b8" dependencies = [ "rustls-pki-types", ] @@ -7057,7 +9263,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -7066,63 +9272,143 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.61.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" +dependencies = [ + "windows-collections", + "windows-core 0.61.2", + "windows-future", + "windows-link 0.1.3", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" +dependencies = [ + "windows-core 0.61.2", +] + +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.1.3", + "windows-result 0.3.4", + "windows-strings 0.4.2", +] + [[package]] name = "windows-core" -version = "0.62.1" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6844ee5416b285084d3d3fffd743b925a6c9385455f64f6d4fa3031c4c2749a9" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", - "windows-link", - "windows-result", - "windows-strings", + "windows-link 0.2.1", + "windows-result 0.4.1", + "windows-strings 0.5.1", +] + +[[package]] +name = "windows-future" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" +dependencies = [ + "windows-core 0.61.2", + "windows-link 0.1.3", + "windows-threading", ] [[package]] name = "windows-implement" -version = "0.60.1" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edb307e42a74fb6de9bf3a02d9712678b22399c87e6fa869d6dfcd8c1b7754e0" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] name = "windows-interface" -version = "0.59.2" +version = "0.59.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0abd1ddbc6964ac14db11c7213d6532ef34bd9aa042c2e5935f59d7908b46a5" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] name = "windows-link" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-numerics" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" +checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" +dependencies = [ + "windows-core 0.61.2", + "windows-link 0.1.3", +] [[package]] name = "windows-result" -version = "0.4.0" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows-result" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link", + "windows-link 0.2.1", ] [[package]] name = "windows-strings" -version = "0.5.0" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link 0.1.3", +] + +[[package]] +name = "windows-strings" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -7149,16 +9435,16 @@ version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.53.4", + "windows-targets 0.53.5", ] [[package]] name = "windows-sys" -version = "0.61.1" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link", + "windows-link 0.2.1", ] [[package]] @@ -7179,19 +9465,28 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.53.4" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link 0.2.1", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + +[[package]] +name = "windows-threading" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d42b7b7f66d2a06854650af09cfdf8713e427a439c97ad65a6375318033ac4b" +checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" dependencies = [ - "windows-link", - "windows_aarch64_gnullvm 0.53.0", - "windows_aarch64_msvc 0.53.0", - "windows_i686_gnu 0.53.0", - "windows_i686_gnullvm 0.53.0", - "windows_i686_msvc 0.53.0", - "windows_x86_64_gnu 0.53.0", - "windows_x86_64_gnullvm 0.53.0", - "windows_x86_64_msvc 0.53.0", + "windows-link 0.1.3", ] [[package]] @@ -7202,9 +9497,9 @@ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" [[package]] name = "windows_aarch64_msvc" @@ -7214,9 +9509,9 @@ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_aarch64_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" [[package]] name = "windows_i686_gnu" @@ -7226,9 +9521,9 @@ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnu" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" [[package]] name = "windows_i686_gnullvm" @@ -7238,9 +9533,9 @@ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" [[package]] name = "windows_i686_msvc" @@ -7250,9 +9545,9 @@ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_i686_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" [[package]] name = "windows_x86_64_gnu" @@ -7262,9 +9557,9 @@ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnu" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" [[package]] name = "windows_x86_64_gnullvm" @@ -7274,9 +9569,9 @@ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" [[package]] name = "windows_x86_64_msvc" @@ -7286,9 +9581,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "windows_x86_64_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" @@ -7311,6 +9606,27 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + +[[package]] +name = "xxhash-rust" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" + [[package]] name = "xz2" version = "0.1.7" @@ -7357,7 +9673,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", "synstructure", ] @@ -7378,7 +9694,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -7398,7 +9714,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", "synstructure", ] @@ -7419,7 +9735,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -7452,7 +9768,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.106", + "syn 2.0.108", ] [[package]] @@ -7470,9 +9786,9 @@ dependencies = [ "deflate64", "displaydoc", "flate2", - "getrandom 0.3.3", + "getrandom 0.3.4", "hmac", - "indexmap 2.11.4", + "indexmap 2.12.0", "lzma-rs", "memchr", "pbkdf2", diff --git a/Cargo.toml b/Cargo.toml index e0e8454df1..644ed53ecb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ readme = "README.md" homepage = "https://github.com/Raphtory/raphtory/" keywords = ["graph", "temporal-graph", "temporal"] authors = ["Pometry"] -rust-version = "1.86.0" +rust-version = "1.88.0" edition = "2021" # debug symbols are using a lot of resources @@ -87,7 +87,7 @@ parking_lot = { version = "0.12.1", features = [ "send_guard", ] } ordered-float = "4.2.0" -chrono = { version = "0.4.42", features = ["serde"] } +chrono = { version = "0.4.41", features = ["serde"] } tempfile = "3.10.0" futures-util = "0.3.30" thiserror = "2.0.0" @@ -154,6 +154,7 @@ minijinja = "2.2.0" minijinja-contrib = { version = "2.2.0", features = ["datetime"] } datafusion = { version = "50.0.0" } arroy = "0.6.1" +lancedb = "0.22.2" # this is the latest and asks for chrono 0.4.41 heed = "0.22.0" sqlparser = "0.58.0" futures = "0.3" @@ -170,5 +171,3 @@ indexmap = { version = "2.7.0", features = ["rayon"] } fake = { version = "3.1.0", features = ["chrono"] } strsim = { version = "0.11.1" } uuid = { version = "1.16.0", features = ["v4"] } - - diff --git a/milvus/docker-compose.yml b/milvus/docker-compose.yml new file mode 100644 index 0000000000..3589060510 --- /dev/null +++ b/milvus/docker-compose.yml @@ -0,0 +1,26 @@ +version: "3.9" + +services: + milvus: + image: milvusdb/milvus:v2.6.1 + container_name: milvus + command: ["milvus", "run", "standalone"] + security_opt: + - seccomp:unconfined + environment: + ETCD_USE_EMBED: "true" + COMMON_STORAGETYPE: "local" + DEPLOY_MODE: "STANDALONE" + ports: + - "9091:9091" + - "19530:19530" + + attu: + image: zilliz/attu:v2.6 + container_name: attu + environment: + MILVUS_URL: "http://milvus:19530" + ports: + - "8000:3000" + depends_on: + - milvus diff --git a/python/tests/test_base_install/test_vectors.py b/python/tests/test_base_install/test_vectors.py index 9eb455eae4..4ad997718e 100644 --- a/python/tests/test_base_install/test_vectors.py +++ b/python/tests/test_base_install/test_vectors.py @@ -1,7 +1,10 @@ +from time import sleep +import pytest from raphtory import Graph -from raphtory.vectors import VectorisedGraph +from raphtory.vectors import VectorisedGraph, OpenAIEmbeddings, embedding_server embedding_map = { + "raphtory": [1.0, 0.0, 0.0], # this is now needed, "node1": [1.0, 0.0, 0.0], "node2": [0.0, 1.0, 0.0], "node3": [0.0, 0.0, 1.0], @@ -11,20 +14,20 @@ "edge3": [0.0, 1.0, 1.0], } - -def single_embedding(text: str): - try: +@pytest.fixture(autouse=True) +def test_server(): + @embedding_server(address="0.0.0.0:7340") # TODO: ask only for PORT!!! + def custom_embeddings(text: str): return embedding_map[text] - except: - raise Exception(f"unexpected document content: {text}") - -def embedding(texts): - return [single_embedding(text) for text in texts] + custom_embeddings.start() + sleep(1) + yield + custom_embeddings.stop() def floats_are_equals(float1: float, float2: float) -> bool: - return float1 + 0.001 > float2 and float1 - 0.001 < float2 + return float1 + 0.00001 > float2 and float1 - 0.01 < float2 # the graph generated by this function looks like this: @@ -48,7 +51,8 @@ def create_graph() -> VectorisedGraph: g.add_edge(3, "node1", "node3", {"name": "edge2"}) g.add_edge(4, "node3", "node4", {"name": "edge3"}) - vg = g.vectorise(embedding, nodes="{{ name }}", edges="{{ properties.name }}") + embeddings = OpenAIEmbeddings(api_base="http://localhost:7340") + vg = g.vectorise(embeddings, nodes="{{ name }}", edges="{{ properties.name }}") return vg @@ -56,18 +60,18 @@ def create_graph() -> VectorisedGraph: def test_selection(): vg = create_graph() - ################################ - selection = vg.empty_selection() - nodes_to_select = ["node1", "node2"] - edges_to_select = [("node1", "node2"), ("node1", "node3")] - selection = vg.empty_selection() - selection.add_nodes(nodes_to_select) - selection.add_edges(edges_to_select) - nodes = selection.nodes() - ########################### + # ################################ + # selection = vg.empty_selection() + # nodes_to_select = ["node1", "node2"] + # edges_to_select = [("node1", "node2"), ("node1", "node3")] + # selection = vg.empty_selection() + # selection.add_nodes(nodes_to_select) + # selection.add_edges(edges_to_select) + # nodes = selection.nodes() + # ########################### assert len(vg.empty_selection().get_documents()) == 0 - assert len(vg.empty_selection().get_documents_with_scores()) == 0 + assert len(vg.empty_selection().get_documents_with_distances()) == 0 nodes_to_select = ["node1", "node2"] edges_to_select = [("node1", "node2"), ("node1", "node3")] @@ -77,7 +81,9 @@ def test_selection(): nodes = selection.nodes() node_names_returned = [node.name for node in nodes] assert node_names_returned == nodes_to_select + print("before get documents") docs = [doc.content for doc in selection.get_documents()] + print("after get documents") assert docs == ["node1", "node2"] selection = vg.empty_selection() @@ -113,8 +119,8 @@ def test_search(): assert edge_names_returned == [("node1", "node2")] # TODO: same for edges ? - [(doc1, score1)] = vg.entities_by_similarity("node1", 1).get_documents_with_scores() - assert floats_are_equals(score1, 1.0) + [(doc1, distance1)] = vg.entities_by_similarity("node1", 1).get_documents_with_distances() + assert floats_are_equals(distance1, 0.0) assert (doc1.entity.name, doc1.content) == ("node1", "node1") # chained search @@ -205,16 +211,19 @@ def test_filtering_by_entity_type(): assert contents == ["edge1", "edge2", "edge3"] -def constant_embedding(texts): - return [[1.0, 0.0, 0.0] for text in texts] +@embedding_server(address="0.0.0.0:7341") +def constant_embedding(_text): + return [1.0, 0.0, 0.0] def test_default_template(): g = Graph() g.add_node(1, "node1") g.add_edge(2, "node1", "node1") - vg = g.vectorise(constant_embedding) + constant_embedding.start() + + vg = g.vectorise(OpenAIEmbeddings(api_base="http://localhost:7341")) node_docs = vg.nodes_by_similarity(query="whatever", limit=10).get_documents() assert len(node_docs) == 1 @@ -226,3 +235,5 @@ def test_default_template(): edge_docs[0].content == "There is an edge from node1 to node1 with events at:\n- Jan 1 1970 00:00\n" ) + + constant_embedding.stop() diff --git a/raphtory-benchmark/src/common/vectors.rs b/raphtory-benchmark/src/common/vectors.rs index 701ace6db2..f259108580 100644 --- a/raphtory-benchmark/src/common/vectors.rs +++ b/raphtory-benchmark/src/common/vectors.rs @@ -35,7 +35,7 @@ pub fn create_graph_for_vector_bench(size: usize) -> Graph { } pub async fn vectorise_graph_for_bench_async(graph: Graph) -> VectorisedGraph { - let cache = VectorCache::in_memory(embedding_model); + let cache = VectorCache::in_memory(embedding_model).await.unwrap(); let template = DocumentTemplate { node_template: Some("{{name}}".to_owned()), edge_template: None, diff --git a/raphtory-graphql/src/data.rs b/raphtory-graphql/src/data.rs index dbcaa30c6a..70c634751e 100644 --- a/raphtory-graphql/src/data.rs +++ b/raphtory-graphql/src/data.rs @@ -8,15 +8,16 @@ use itertools::Itertools; use moka::future::Cache; use raphtory::{ db::api::view::MaterializedGraph, - errors::{GraphError, InvalidPathReason}, + errors::{GraphError, GraphResult, InvalidPathReason}, prelude::CacheOps, vectors::{ - cache::VectorCache, template::DocumentTemplate, vectorisable::Vectorisable, + cache::{CachedEmbeddingModel, VectorCache}, + template::DocumentTemplate, + vectorisable::Vectorisable, vectorised_graph::VectorisedGraph, }, }; use std::{ - collections::HashMap, path::{Path, PathBuf}, sync::Arc, }; @@ -24,12 +25,12 @@ use tokio::fs; use tracing::{error, warn}; use walkdir::WalkDir; -#[derive(Clone)] -pub struct EmbeddingConf { - pub(crate) cache: VectorCache, - pub(crate) global_template: Option, - pub(crate) individual_templates: HashMap, -} +// #[derive(Clone)] +// pub struct EmbeddingConf { +// pub(crate) cache: VectorCache, +// // pub(crate) global_template: Option, +// // pub(crate) individual_templates: HashMap, +// } pub(crate) fn get_relative_path( work_dir: PathBuf, @@ -54,14 +55,14 @@ pub(crate) fn get_relative_path( #[derive(Clone)] pub struct Data { - pub(crate) work_dir: PathBuf, + pub(crate) work_dir: PathBuf, // TODO: move this to config? cache: Cache, - pub(crate) create_index: bool, - pub(crate) embedding_conf: Option, + pub(crate) create_index: bool, // TODO: move this to config? + pub(crate) vector_cache: VectorCache, } impl Data { - pub fn new(work_dir: &Path, configs: &AppConfig) -> Self { + pub async fn new(work_dir: &Path, configs: &AppConfig) -> GraphResult { let cache_configs = &configs.cache; let cache = Cache::::builder() @@ -80,12 +81,14 @@ impl Data { #[cfg(not(feature = "search"))] let create_index = false; - Self { + // TODO: make vector feature optional? + + Ok(Self { work_dir: work_dir.to_path_buf(), cache, create_index, - embedding_conf: Default::default(), - } + vector_cache: VectorCache::on_disk(&work_dir.join(".vector-cache")).await?, // FIXME: need to disable graph names starting with a dot + }) } pub async fn get_graph( @@ -116,8 +119,8 @@ impl Data { let folder_clone = folder.clone(); let graph_clone = graph.clone(); blocking_io(move || graph_clone.cache(folder_clone)).await?; - let vectors = self.vectorise(graph.clone(), &folder).await; - let graph = GraphWithVectors::new(graph, vectors); + // let vectors = self.vectorise(graph.clone(), &folder).await; + let graph = GraphWithVectors::new(graph, None); graph .folder .get_or_try_init(|| Ok::<_, GraphError>(folder.into()))?; @@ -134,23 +137,16 @@ impl Data { Ok(()) } - fn resolve_template(&self, graph: &Path) -> Option<&DocumentTemplate> { - let conf = self.embedding_conf.as_ref()?; - conf.individual_templates - .get(graph) - .or(conf.global_template.as_ref()) - } - async fn vectorise_with_template( &self, graph: MaterializedGraph, folder: &ValidGraphFolder, template: &DocumentTemplate, + model: CachedEmbeddingModel, ) -> Option> { - let conf = self.embedding_conf.as_ref()?; let vectors = graph .vectorise( - conf.cache.clone(), + model, template.clone(), Some(&folder.get_vectors_path()), true, // verbose @@ -166,34 +162,15 @@ impl Data { } } - async fn vectorise( + pub(crate) async fn vectorise_folder( &self, - graph: MaterializedGraph, - folder: &ValidGraphFolder, - ) -> Option> { - let template = self.resolve_template(folder.get_original_path())?; - self.vectorise_with_template(graph, folder, template).await - } - - async fn vectorise_folder(&self, folder: &ExistingGraphFolder) -> Option<()> { - // it's important that we check if there is a valid template set for this graph path - // before actually loading the graph, otherwise we are loading the graph for no reason - let template = self.resolve_template(folder.get_original_path())?; - let graph = self - .read_graph_from_folder(folder.clone()) - .await - .ok()? - .graph; - self.vectorise_with_template(graph, folder, template).await; - Some(()) - } - - pub(crate) async fn vectorise_all_graphs_that_are_not(&self) -> Result<(), GraphError> { - for folder in self.get_all_graph_folders() { - if !folder.get_vectors_path().exists() { - self.vectorise_folder(&folder).await; - } - } + folder: &ExistingGraphFolder, + template: &DocumentTemplate, + model: CachedEmbeddingModel, + ) -> GraphResult<()> { + let graph = self.read_graph_from_folder(folder.clone()).await?.graph; + self.vectorise_with_template(graph, folder, template, model) + .await; Ok(()) } @@ -216,9 +193,9 @@ impl Data { &self, folder: ExistingGraphFolder, ) -> Result { - let cache = self.embedding_conf.as_ref().map(|conf| conf.cache.clone()); let create_index = self.create_index; - blocking_io(move || GraphWithVectors::read_from_folder(&folder, cache, create_index)).await + GraphWithVectors::read_from_folder(&folder, &self.vector_cache, create_index).await + // FIXME: I need some blocking_io inside of GraphWithVectors::read_from_folder } } @@ -276,12 +253,12 @@ pub(crate) mod data_tests { File::create(path.join("graph")).unwrap(); } - pub(crate) fn save_graphs_to_work_dir( + pub(crate) async fn save_graphs_to_work_dir( work_dir: &Path, graphs: &HashMap, ) -> Result<(), GraphError> { for (name, graph) in graphs.into_iter() { - let data = Data::new(work_dir, &AppConfig::default()); + let data = Data::new(work_dir, &AppConfig::default()).await?; let folder = ValidGraphFolder::try_from(data.work_dir, name)?; #[cfg(feature = "storage")] diff --git a/raphtory-graphql/src/embeddings.rs b/raphtory-graphql/src/embeddings.rs deleted file mode 100644 index d65b59ee6e..0000000000 --- a/raphtory-graphql/src/embeddings.rs +++ /dev/null @@ -1,16 +0,0 @@ -use crate::data::Data; -use async_graphql::Context; -use raphtory::{errors::GraphResult, vectors::Embedding}; - -pub(crate) trait EmbedQuery { - async fn embed_query(&self, text: String) -> GraphResult; -} - -impl EmbedQuery for Context<'_> { - /// this is meant to be called from a vector context, so the embedding conf is assumed to exist - async fn embed_query(&self, text: String) -> GraphResult { - let data = self.data_unchecked::(); - let cache = &data.embedding_conf.as_ref().unwrap().cache; - cache.get_single(text).await - } -} diff --git a/raphtory-graphql/src/graph.rs b/raphtory-graphql/src/graph.rs index 50a3468e60..809ceb5ff1 100644 --- a/raphtory-graphql/src/graph.rs +++ b/raphtory-graphql/src/graph.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use crate::paths::ExistingGraphFolder; use once_cell::sync::OnceCell; use raphtory::{ @@ -76,9 +78,9 @@ impl GraphWithVectors { } } - pub(crate) fn read_from_folder( + pub(crate) async fn read_from_folder( folder: &ExistingGraphFolder, - cache: Option, + cache: &VectorCache, // TODO: make this mandatory!! create_index: bool, ) -> Result { let graph_path = &folder.get_graph_path(); @@ -87,9 +89,11 @@ impl GraphWithVectors { } else { MaterializedGraph::load_cached(folder.clone())? }; - let vectors = cache.and_then(|cache| { - VectorisedGraph::read_from_path(&folder.get_vectors_path(), graph.clone(), cache).ok() - }); + let vectors = + VectorisedGraph::read_from_path(&folder.get_vectors_path(), graph.clone(), cache) + .await + .ok(); + println!("Graph loaded = {}", folder.get_original_path_str()); if create_index { graph.create_index()?; diff --git a/raphtory-graphql/src/lib.rs b/raphtory-graphql/src/lib.rs index 55c99df880..0b2b0a36fe 100644 --- a/raphtory-graphql/src/lib.rs +++ b/raphtory-graphql/src/lib.rs @@ -1,7 +1,6 @@ pub use crate::server::GraphServer; mod auth; pub mod data; -mod embeddings; mod graph; pub mod model; pub mod observability; @@ -99,7 +98,9 @@ mod graphql_test { let graphs = HashMap::from([("master".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); + save_graphs_to_work_dir(tmp_dir.path(), &graphs) + .await + .unwrap(); let config = AppConfigBuilder::new().with_create_index(true).build(); let data = Data::new(tmp_dir.path(), &config); @@ -199,7 +200,9 @@ mod graphql_test { let graph: MaterializedGraph = graph.into(); let graphs = HashMap::from([("lotr".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); + save_graphs_to_work_dir(tmp_dir.path(), &graphs) + .await + .unwrap(); let data = Data::new(tmp_dir.path(), &AppConfig::default()); @@ -310,7 +313,9 @@ mod graphql_test { let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); + save_graphs_to_work_dir(tmp_dir.path(), &graphs) + .await + .unwrap(); let data = Data::new(tmp_dir.path(), &AppConfig::default()); let schema = App::create_schema().data(data).finish().unwrap(); @@ -413,7 +418,9 @@ mod graphql_test { let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); + save_graphs_to_work_dir(tmp_dir.path(), &graphs) + .await + .unwrap(); let data = Data::new(tmp_dir.path(), &AppConfig::default()); let schema = App::create_schema().data(data).finish().unwrap(); @@ -478,7 +485,9 @@ mod graphql_test { let graph: MaterializedGraph = g.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); + save_graphs_to_work_dir(tmp_dir.path(), &graphs) + .await + .unwrap(); let expected = json!({ "graph": { @@ -629,7 +638,9 @@ mod graphql_test { let g = g.into(); let graphs = HashMap::from([("graph".to_string(), g)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); + save_graphs_to_work_dir(tmp_dir.path(), &graphs) + .await + .unwrap(); let data = Data::new(tmp_dir.path(), &AppConfig::default()); let schema = App::create_schema().data(data).finish().unwrap(); @@ -878,7 +889,9 @@ mod graphql_test { let graph = graph.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); + save_graphs_to_work_dir(tmp_dir.path(), &graphs) + .await + .unwrap(); let data = Data::new(tmp_dir.path(), &AppConfig::default()); let schema = App::create_schema().data(data).finish().unwrap(); @@ -1053,7 +1066,9 @@ mod graphql_test { let graph = graph.into(); let graphs = HashMap::from([("graph".to_string(), graph)]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); + save_graphs_to_work_dir(tmp_dir.path(), &graphs) + .await + .unwrap(); let data = Data::new(tmp_dir.path(), &AppConfig::default()); let schema = App::create_schema().data(data).finish().unwrap(); @@ -1194,7 +1209,9 @@ mod graphql_test { ("graph6".to_string(), graph6.into()), ]); let tmp_dir = tempdir().unwrap(); - save_graphs_to_work_dir(tmp_dir.path(), &graphs).unwrap(); + save_graphs_to_work_dir(tmp_dir.path(), &graphs) + .await + .unwrap(); let data = Data::new(tmp_dir.path(), &AppConfig::default()); let schema = App::create_schema().data(data).finish().unwrap(); diff --git a/raphtory-graphql/src/main.rs b/raphtory-graphql/src/main.rs index 8957ce6e22..b593854f3c 100644 --- a/raphtory-graphql/src/main.rs +++ b/raphtory-graphql/src/main.rs @@ -81,7 +81,7 @@ async fn main() -> IoResult<()> { let schema = App::create_schema().finish().unwrap(); println!("{}", schema.sdl()); } else { - let mut builder = AppConfigBuilder::new() + let builder = AppConfigBuilder::new() .with_cache_capacity(args.cache_capacity) .with_cache_tti_seconds(args.cache_tti_seconds) .with_log_level(args.log_level) @@ -101,7 +101,8 @@ async fn main() -> IoResult<()> { let app_config = Some(builder.build()); - GraphServer::new(args.working_dir, app_config, None)? + GraphServer::new(args.working_dir, app_config, None) + .await? .run_with_port(args.port) .await?; } diff --git a/raphtory-graphql/src/model/graph/mutable_graph.rs b/raphtory-graphql/src/model/graph/mutable_graph.rs index 627fec1170..eb46c69851 100644 --- a/raphtory-graphql/src/model/graph/mutable_graph.rs +++ b/raphtory-graphql/src/model/graph/mutable_graph.rs @@ -598,10 +598,7 @@ impl GqlMutableEdge { #[cfg(test)] mod tests { use super::*; - use crate::{ - config::app_config::AppConfig, - data::{Data, EmbeddingConf}, - }; + use crate::{config::app_config::AppConfig, data::Data}; use itertools::Itertools; use raphtory::{ db::api::view::MaterializedGraph, diff --git a/raphtory-graphql/src/model/graph/vector_selection.rs b/raphtory-graphql/src/model/graph/vector_selection.rs index 0e3090ec2b..1f320d592a 100644 --- a/raphtory-graphql/src/model/graph/vector_selection.rs +++ b/raphtory-graphql/src/model/graph/vector_selection.rs @@ -4,12 +4,12 @@ use super::{ node::GqlNode, vectorised_graph::{IntoWindowTuple, Window}, }; -use crate::{embeddings::EmbedQuery, rayon::blocking_compute}; -use async_graphql::Context; +use crate::rayon::blocking_compute; use dynamic_graphql::{InputObject, ResolvedObject, ResolvedObjectFields}; use raphtory::{ - db::api::view::MaterializedGraph, errors::GraphResult, - vectors::vector_selection::VectorSelection, + db::api::view::MaterializedGraph, + errors::GraphResult, + vectors::{vector_selection::VectorSelection, vectorised_graph::VectorisedGraph, Embedding}, }; #[derive(InputObject)] @@ -45,18 +45,16 @@ impl GqlVectorSelection { /// Returns a list of documents in the current selection. async fn get_documents(&self) -> GraphResult> { let cloned = self.0.clone(); - blocking_compute(move || { - let docs = cloned.get_documents_with_scores()?.into_iter(); - Ok(docs - .map(|(doc, score)| GqlDocument { - content: doc.content, - entity: doc.entity.into(), - embedding: doc.embedding.to_vec(), - score, - }) - .collect()) - }) - .await + let docs = cloned.get_documents_with_distances().await?.into_iter(); + Ok(docs + .map(|(doc, score)| GqlDocument { + content: doc.content, + entity: doc.entity.into(), + embedding: doc.embedding.to_vec(), + score, + }) + .collect()) + // TODO: there was a blocking_compute here before...? } /// Adds all the documents associated with the specified nodes to the current selection. @@ -64,11 +62,8 @@ impl GqlVectorSelection { /// Documents added by this call are assumed to have a score of 0. async fn add_nodes(&self, nodes: Vec) -> Self { let mut selection = self.cloned(); - blocking_compute(move || { - selection.add_nodes(nodes); - selection.into() - }) - .await + selection.add_nodes(nodes); + selection.into() } /// Adds all the documents associated with the specified edges to the current selection. @@ -76,12 +71,9 @@ impl GqlVectorSelection { /// Documents added by this call are assumed to have a score of 0. async fn add_edges(&self, edges: Vec) -> Self { let mut selection = self.cloned(); - blocking_compute(move || { - let edges = edges.into_iter().map(|edge| (edge.src, edge.dst)).collect(); - selection.add_edges(edges); - selection.into() - }) - .await + let edges = edges.into_iter().map(|edge| (edge.src, edge.dst)).collect(); + selection.add_edges(edges); + selection.into() } /// Add all the documents a specified number of hops away to the selection. @@ -100,55 +92,49 @@ impl GqlVectorSelection { /// Adds documents, from the set of one hop neighbours to the current selection, to the selection based on their similarity score with the specified query. This function loops so that the set of one hop neighbours expands on each loop and number of documents added is determined by the specified limit. async fn expand_entities_by_similarity( &self, - ctx: &Context<'_>, query: String, limit: usize, window: Option, ) -> GraphResult { - let vector = ctx.embed_query(query).await?; + let vector = self.embed_text(query).await?; let window = window.into_window_tuple(); let mut selection = self.cloned(); - blocking_compute(move || { - selection.expand_entities_by_similarity(&vector, limit, window)?; - Ok(selection.into()) - }) - .await + selection + .expand_entities_by_similarity(&vector, limit, window) + .await?; + Ok(selection.into()) } /// Add the adjacent nodes with higher score for query to the selection up to a specified limit. This function loops like expand_entities_by_similarity but is restricted to nodes. async fn expand_nodes_by_similarity( &self, - ctx: &Context<'_>, query: String, limit: usize, window: Option, ) -> GraphResult { - let vector = ctx.embed_query(query).await?; + let vector = self.embed_text(query).await?; let window = window.into_window_tuple(); let mut selection = self.cloned(); - blocking_compute(move || { - selection.expand_nodes_by_similarity(&vector, limit, window)?; - Ok(selection.into()) - }) - .await + selection + .expand_nodes_by_similarity(&vector, limit, window) + .await?; + Ok(selection.into()) } /// Add the adjacent edges with higher score for query to the selection up to a specified limit. This function loops like expand_entities_by_similarity but is restricted to edges. async fn expand_edges_by_similarity( &self, - ctx: &Context<'_>, query: String, limit: usize, window: Option, ) -> GraphResult { - let vector = ctx.embed_query(query).await?; + let vector = self.embed_text(query).await?; let window = window.into_window_tuple(); let mut selection = self.cloned(); - blocking_compute(move || { - selection.expand_edges_by_similarity(&vector, limit, window)?; - Ok(selection.into()) - }) - .await + selection + .expand_edges_by_similarity(&vector, limit, window) + .await?; + Ok(selection.into()) } } @@ -156,4 +142,8 @@ impl GqlVectorSelection { fn cloned(&self) -> VectorSelection { self.0.clone() } + + async fn embed_text(&self, text: String) -> GraphResult { + self.0.get_vectorised_graph().embed_text(text).await + } } diff --git a/raphtory-graphql/src/model/graph/vectorised_graph.rs b/raphtory-graphql/src/model/graph/vectorised_graph.rs index b3ebbd01d2..389dd2b554 100644 --- a/raphtory-graphql/src/model/graph/vectorised_graph.rs +++ b/raphtory-graphql/src/model/graph/vectorised_graph.rs @@ -1,6 +1,4 @@ use super::vector_selection::GqlVectorSelection; -use crate::{embeddings::EmbedQuery, model::blocking_io}; -use async_graphql::Context; use dynamic_graphql::{InputObject, ResolvedObject, ResolvedObjectFields}; use raphtory::{ db::api::view::MaterializedGraph, errors::GraphResult, @@ -45,42 +43,42 @@ impl GqlVectorisedGraph { /// Search the top scoring entities according to a specified query returning no more than a specified limit of entities. async fn entities_by_similarity( &self, - ctx: &Context<'_>, query: String, limit: usize, window: Option, ) -> GraphResult { - let vector = ctx.embed_query(query).await?; + let vector = self.0.embed_text(query).await?; let w = window.into_window_tuple(); let cloned = self.0.clone(); - blocking_io(move || Ok(cloned.entities_by_similarity(&vector, limit, w)?.into())).await + Ok(cloned + .entities_by_similarity(&vector, limit, w) + .await? + .into()) } /// Search the top scoring nodes according to a specified query returning no more than a specified limit of nodes. async fn nodes_by_similarity( &self, - ctx: &Context<'_>, query: String, limit: usize, window: Option, ) -> GraphResult { - let vector = ctx.embed_query(query).await?; + let vector = self.0.embed_text(query).await?; let w = window.into_window_tuple(); let cloned = self.0.clone(); - blocking_io(move || Ok(cloned.nodes_by_similarity(&vector, limit, w)?.into())).await + Ok(cloned.nodes_by_similarity(&vector, limit, w).await?.into()) } /// Search the top scoring edges according to a specified query returning no more than a specified limit of edges. async fn edges_by_similarity( &self, - ctx: &Context<'_>, query: String, limit: usize, window: Option, ) -> GraphResult { - let vector = ctx.embed_query(query).await?; + let vector = self.0.embed_text(query).await?; let w = window.into_window_tuple(); let cloned = self.0.clone(); - blocking_io(move || Ok(cloned.edges_by_similarity(&vector, limit, w)?.into())).await + Ok(cloned.edges_by_similarity(&vector, limit, w).await?.into()) } } diff --git a/raphtory-graphql/src/python/server/mod.rs b/raphtory-graphql/src/python/server/mod.rs index a5bf483fe1..d6f409565c 100644 --- a/raphtory-graphql/src/python/server/mod.rs +++ b/raphtory-graphql/src/python/server/mod.rs @@ -15,14 +15,14 @@ pub(crate) enum BridgeCommand { StopServer, StopListening, } -pub fn take_server_ownership(mut server: PyRefMut) -> PyResult { - let new_server = server.0.take().ok_or_else(|| { - PyException::new_err( - "Server object has already been used, please create another one from scratch", - ) - })?; - Ok(new_server) -} +// pub fn take_server_ownership(mut server: PyRefMut) -> PyResult { +// let new_server = server.0.take().ok_or_else(|| { +// PyException::new_err( +// "Server object has already been used, please create another one from scratch", +// ) +// })?; +// Ok(new_server) +// } pub(crate) fn wait_server(running_server: &mut Option) -> PyResult<()> { let owned_running_server = running_server diff --git a/raphtory-graphql/src/python/server/server.rs b/raphtory-graphql/src/python/server/server.rs index 76f5ea69fc..188693c31f 100644 --- a/raphtory-graphql/src/python/server/server.rs +++ b/raphtory-graphql/src/python/server/server.rs @@ -1,23 +1,20 @@ use crate::{ config::{app_config::AppConfigBuilder, auth_config::PUBLIC_KEY_DECODING_ERR_MSG}, - python::server::{ - running_server::PyRunningGraphServer, take_server_ownership, wait_server, BridgeCommand, - }, + python::server::{running_server::PyRunningGraphServer, wait_server, BridgeCommand}, GraphServer, }; use pyo3::{ exceptions::{PyAttributeError, PyException, PyValueError}, prelude::*, - types::PyFunction, }; use raphtory::{ - python::packages::vectors::TemplateConfig, - vectors::{ - embeddings::{openai_embedding, EmbeddingFunction}, - template::{DocumentTemplate, DEFAULT_EDGE_TEMPLATE, DEFAULT_NODE_TEMPLATE}, + python::{ + packages::vectors::{PyOpenAIEmbeddings, TemplateConfig}, + utils::block_on, }, + vectors::template::{DocumentTemplate, DEFAULT_EDGE_TEMPLATE, DEFAULT_NODE_TEMPLATE}, }; -use std::{path::PathBuf, sync::Arc, thread}; +use std::{path::PathBuf, thread}; /// A class for defining and running a Raphtory GraphQL server /// @@ -35,7 +32,7 @@ use std::{path::PathBuf, sync::Arc, thread}; /// auth_enabled_for_reads: /// create_index: #[pyclass(name = "GraphServer", module = "raphtory.graphql")] -pub struct PyGraphServer(pub Option); +pub struct PyGraphServer(GraphServer); impl<'py> IntoPyObject<'py> for GraphServer { type Target = PyGraphServer; @@ -43,7 +40,7 @@ impl<'py> IntoPyObject<'py> for GraphServer { type Error = >::Error; fn into_pyobject(self, py: Python<'py>) -> Result { - PyGraphServer::new(self).into_pyobject(py) + PyGraphServer(self).into_pyobject(py) } } @@ -58,26 +55,6 @@ fn template_from_python(nodes: TemplateConfig, edges: TemplateConfig) -> Option< } } -impl PyGraphServer { - pub fn new(server: GraphServer) -> Self { - Self(Some(server)) - } - - fn set_generic_embeddings( - slf: PyRefMut, - cache: String, - embedding: F, - nodes: TemplateConfig, - edges: TemplateConfig, - ) -> PyResult { - let global_template = template_from_python(nodes, edges); - let server = take_server_ownership(slf)?; - let cache = PathBuf::from(cache); - let rt = tokio::runtime::Runtime::new().unwrap(); - Ok(rt.block_on(server.set_embeddings(embedding, &cache, global_template))?) - } -} - #[pymethods] impl PyGraphServer { #[new] @@ -134,74 +111,66 @@ impl PyGraphServer { } let app_config = Some(app_config_builder.build()); - let server = GraphServer::new(work_dir, app_config, config_path)?; - Ok(PyGraphServer::new(server)) + let server = block_on(GraphServer::new(work_dir, app_config, config_path))?; + Ok(PyGraphServer(server)) } + // TODO: remove this, should be config /// Turn off index for all graphs - /// - /// Returns: - /// GraphServer: The server with indexing disabled - fn turn_off_index(slf: PyRefMut) -> PyResult { - let server = take_server_ownership(slf)?; - Ok(server.turn_off_index()) + fn turn_off_index(mut slf: PyRefMut) { + slf.0.turn_off_index() } - /// Setup the server to vectorise graphs with a default template. + // TODO: remove + // /// Setup the server to vectorise graphs with a default template. + // /// + // /// Arguments: + // /// embedding (Callable, optional): the embedding function to translate documents to embeddings. + // fn enable_embeddings( + // mut slf: PyRefMut, + // cache: String, + // embedding: PyOpenAIEmbeddings, + // // nodes: TemplateConfig, + // // edges: TemplateConfig, + // ) -> PyResult<()> { + // let cache = PathBuf::from(cache); + // let rt = tokio::runtime::Runtime::new().unwrap(); + // Ok(rt.block_on(slf.0.enable_embeddings(embedding, &cache))?) + // } + + /// Vectorise the graph name in the server working directory. /// /// Arguments: - /// cache (str): the directory to use as cache for the embeddings. - /// embedding (Callable, optional): the embedding function to translate documents to embeddings. - /// nodes (bool | str): if nodes have to be embedded or not or the custom template to use if a str is provided. Defaults to True. - /// edges (bool | str): if edges have to be embedded or not or the custom template to use if a str is provided. Defaults to True. - /// - /// Returns: - /// GraphServer: A new server object with embeddings setup. - #[pyo3( - signature = (cache, embedding = None, nodes = TemplateConfig::Bool(true), edges = TemplateConfig::Bool(true)) - )] - fn set_embeddings( - slf: PyRefMut, - cache: String, - embedding: Option>, - nodes: TemplateConfig, - edges: TemplateConfig, - ) -> PyResult { - match embedding { - Some(embedding) => { - let embedding: Arc = Arc::new(embedding); - Self::set_generic_embeddings(slf, cache, embedding, nodes, edges) - } - None => Self::set_generic_embeddings(slf, cache, openai_embedding, nodes, edges), - } - } - - /// Vectorise a subset of the graphs of the server. - /// - /// Arguments: - /// graph_names (list[str]): the names of the graphs to vectorise. All by default. + /// name (list[str]): the name of the graph to vectorise. /// nodes (bool | str): if nodes have to be embedded or not or the custom template to use if a str is provided. Defaults to True. /// edges (bool | str): if edges have to be embedded or not or the custom template to use if a str is provided. Defaults to True. /// /// Returns: /// GraphServer: A new server object containing the vectorised graphs. #[pyo3( - signature = (graph_names, nodes = TemplateConfig::Bool(true), edges = TemplateConfig::Bool(true)) + signature = (name, embeddings, nodes = TemplateConfig::Bool(true), edges = TemplateConfig::Bool(true)) )] - fn with_vectorised_graphs( - slf: PyRefMut, - graph_names: Vec, - // TODO: support more models by just providing a string, For example, "openai", here and in the VectorisedGraph API + fn vectorise_graph( + &self, + name: &str, + embeddings: PyOpenAIEmbeddings, // FIXME: this will create a breaking change once there are more options nodes: TemplateConfig, edges: TemplateConfig, - ) -> PyResult { + ) -> PyResult<()> { let template = template_from_python(nodes, edges).ok_or(PyAttributeError::new_err( - "node_template and/or edge_template has to be set", + "at least one of nodes and edges has to be set to True or some string", ))?; - let server = take_server_ownership(slf)?; - Ok(server.with_vectorised_graphs(graph_names, template)) + let rt = tokio::runtime::Runtime::new().unwrap(); + rt.block_on(async move { + self.0 + .vectorise_graph(name, template, embeddings.into()) + .await?; + Ok(()) + }) } + // TODO: vectorise all graphs + /// Start the server and return a handle to it. /// /// Arguments: @@ -215,16 +184,10 @@ impl PyGraphServer { #[pyo3( signature = (port = 1736, timeout_ms = 5000) )] - pub fn start( - slf: PyRefMut, - py: Python, - port: u16, - timeout_ms: u64, - ) -> PyResult { + pub fn start(&self, py: Python, port: u16, timeout_ms: u64) -> PyResult { let (sender, receiver) = crossbeam_channel::bounded::(1); let cloned_sender = sender.clone(); - - let server = take_server_ownership(slf)?; + let server = self.0.clone(); let join_handle = thread::spawn(move || { let rt = tokio::runtime::Runtime::new().unwrap(); @@ -251,6 +214,7 @@ impl PyGraphServer { let url = format!("http://localhost:{port}"); // we need to release the GIL, otherwise the server will deadlock when trying to use python function as the embedding function // and wait_for_server_online will never return + // FIXME: this does not apply anymore, can remove let result = py.allow_threads(|| PyRunningGraphServer::wait_for_server_online(&url, timeout_ms)); match result { @@ -276,8 +240,8 @@ impl PyGraphServer { #[pyo3( signature = (port = 1736, timeout_ms = 180000) )] - pub fn run(slf: PyRefMut, py: Python, port: u16, timeout_ms: u64) -> PyResult<()> { - let mut server = Self::start(slf, py, port, timeout_ms)?.server_handler; - py.allow_threads(|| wait_server(&mut server)) + pub fn run(&self, py: Python, port: u16, timeout_ms: u64) -> PyResult<()> { + let mut server = self.start(py, port, timeout_ms)?.server_handler; + py.allow_threads(|| wait_server(&mut server)) // TODO: remove allow_threads, should not be necessary anymore } } diff --git a/raphtory-graphql/src/server.rs b/raphtory-graphql/src/server.rs index 892ec45fa4..c99b5dd88d 100644 --- a/raphtory-graphql/src/server.rs +++ b/raphtory-graphql/src/server.rs @@ -1,12 +1,13 @@ use crate::{ auth::{AuthenticatedGraphQL, MutationAuth}, config::app_config::{load_config, AppConfig}, - data::{Data, EmbeddingConf}, + data::Data, model::{ plugins::{entry_point::EntryPoint, operation::Operation}, App, }, observability::open_telemetry::OpenTelemetry, + paths::ExistingGraphFolder, routes::{health, version, PublicFilesEndpoint}, server::ServerError::SchemaError, }; @@ -22,7 +23,11 @@ use poem::{ }; use raphtory::{ errors::GraphResult, - vectors::{cache::VectorCache, embeddings::EmbeddingFunction, template::DocumentTemplate}, + vectors::{ + cache::{CachedEmbeddingModel, VectorCache}, + storage::OpenAIEmbeddings, + template::DocumentTemplate, + }, }; use serde_json::json; use std::{ @@ -78,6 +83,7 @@ impl From for io::Error { } /// A struct for defining and running a Raphtory GraphQL server +#[derive(Clone)] pub struct GraphServer { data: Data, config: AppConfig, @@ -108,7 +114,7 @@ impl GraphServer { /// /// Returns: /// IoResult: - pub fn new( + pub async fn new( work_dir: PathBuf, app_config: Option, config_path: Option, @@ -117,63 +123,76 @@ impl GraphServer { create_dir_all(&work_dir)?; } let config = load_config(app_config, config_path).map_err(ServerError::ConfigError)?; - let data = Data::new(work_dir.as_path(), &config); + let data = Data::new(work_dir.as_path(), &config).await?; Ok(Self { data, config }) } /// Turn off index for all graphs - pub fn turn_off_index(mut self) -> Self { - self.data.create_index = false; - self - } - - pub async fn set_embeddings( - mut self, - embedding: F, - cache: &Path, - // or maybe it could be in a standard location like /tmp/raphtory/embedding_cache - global_template: Option, - ) -> GraphResult { - self.data.embedding_conf = Some(EmbeddingConf { - cache: VectorCache::on_disk(cache, embedding).await?, // TODO: better do this lazily, actually do it when running the server - global_template, - individual_templates: Default::default(), - }); - Ok(self) + pub fn turn_off_index(&mut self) { + self.data.create_index = false; // FIXME: why does this exist yet? } - /// Vectorise a subset of the graphs of the server. + // FIXME: this should be config!!!!!!!!!!!!!!!! or nothing at all since its per graph + // pub async fn enable_embeddings( + // &mut self, + // embedding: F, + // cache: &Path, + // // or maybe it could be in a standard location like /tmp/raphtory/embedding_cache + // // global_template: Option, + // ) -> GraphResult<()> { + // self.data.vector_cache = Some(VectorCache::on_disk(cache, embedding).await?); + // Ok(()) + // } + + // FIXME: this function should fails if embeddings were not enabled, + // and if they were it should grab the vector cache and pass it down + /// Vectorise all the graphs in the server working directory. /// /// Arguments: - /// * graph_names - the names of the graphs to vectorise. All if None is provided. - /// * embedding - the embedding function to translate documents to embeddings. - /// * cache - the directory to use as cache for the embeddings. + /// * name - the name of the graph to vectorise. /// * template - the template to use for creating documents. /// /// Returns: /// A new server object containing the vectorised graphs. - pub fn with_vectorised_graphs( - mut self, - graph_names: Vec, - template: DocumentTemplate, - ) -> Self { - if let Some(embedding_conf) = &mut self.data.embedding_conf { - for graph_name in graph_names { - embedding_conf - .individual_templates - .insert(graph_name.into(), template.clone()); - } + pub async fn vectorise_all_graphs( + &self, + template: &DocumentTemplate, + embeddings: OpenAIEmbeddings, + ) -> GraphResult<()> { + let model = self.data.vector_cache.openai(embeddings).await?; + for folder in self.data.get_all_graph_folders() { + self.data + .vectorise_folder(&folder, template, model.clone()) // TODO: avoid clone, just ask for a ref + .await?; } - self + Ok(()) + } + + // FIXME: this function should fails if embeddings were not enabled, + // and if they were it should grab the vector cache and pass it down + /// Vectorise the graph 'name'in the server working directory. + /// + /// Arguments: + /// * name - the name of the graph to vectorise. + /// * template - the template to use for creating documents. + pub async fn vectorise_graph( + &self, + name: &str, + template: DocumentTemplate, + embeddings: OpenAIEmbeddings, + ) -> GraphResult<()> { + let model = self.data.vector_cache.openai(embeddings).await?; + let folder = ExistingGraphFolder::try_from(self.data.work_dir.clone(), name)?; + self.data.vectorise_folder(&folder, &template, model).await } /// Start the server on the default port and return a handle to it. - pub async fn start(self) -> IoResult { + pub async fn start(&self) -> IoResult { self.start_with_port(DEFAULT_PORT).await } /// Start the server on the given port and return a handle to it. - pub async fn start_with_port(self, port: u16) -> IoResult { + pub async fn start_with_port(&self, port: u16) -> IoResult { // set up opentelemetry first of all let config = self.config.clone(); let filter = config.logging.get_log_env(); @@ -197,7 +216,7 @@ impl GraphServer { } }; - self.data.vectorise_all_graphs_that_are_not().await?; + // self.data.vectorise_all_graphs_that_are_not().await?; let work_dir = self.data.work_dir.clone(); // it is important that this runs after algorithms have been pushed to PLUGIN_ALGOS static variable @@ -227,11 +246,11 @@ impl GraphServer { } async fn generate_endpoint( - self, + &self, tracer: Option, ) -> Result>, ServerError> { let schema_builder = App::create_schema(); - let schema_builder = schema_builder.data(self.data); + let schema_builder = schema_builder.data(self.data.clone()); let schema_builder = schema_builder.extension(MutationAuth); let schema = if let Some(t) = tracer { schema_builder.extension(OpenTelemetry::new(t)).finish() @@ -244,8 +263,8 @@ impl GraphServer { .nest( "/", PublicFilesEndpoint::new( - self.config.public_dir, - AuthenticatedGraphQL::new(schema, self.config.auth), + self.config.public_dir.clone(), + AuthenticatedGraphQL::new(schema, self.config.auth.clone()), ), ) .at("/health", get(health)) @@ -337,7 +356,10 @@ mod server_tests { use chrono::prelude::*; use raphtory::{ prelude::{AdditionOps, Graph, StableEncode, NO_PROPS}, - vectors::{embeddings::EmbeddingResult, template::DocumentTemplate, Embedding}, + vectors::{ + embeddings::EmbeddingResult, storage::OpenAIEmbeddings, template::DocumentTemplate, + Embedding, + }, }; use raphtory_api::core::utils::logging::global_info_logger; use tempfile::tempdir; @@ -348,7 +370,9 @@ mod server_tests { async fn test_server_start_stop() { global_info_logger(); let tmp_dir = tempdir().unwrap(); - let server = GraphServer::new(tmp_dir.path().to_path_buf(), None, None).unwrap(); + let server = GraphServer::new(tmp_dir.path().to_path_buf(), None, None) + .await + .unwrap(); info!("Calling start at time {}", Local::now()); let handler = server.start_with_port(0); sleep(Duration::from_secs(1)).await; @@ -362,10 +386,6 @@ mod server_tests { Variant, } - async fn failing_embedding(_texts: Vec) -> EmbeddingResult> { - Err(SomeError::Variant.into()) - } - #[tokio::test] async fn test_server_start_with_failing_embedding() { let tmp_dir = tempdir().unwrap(); @@ -374,17 +394,19 @@ mod server_tests { graph.encode(tmp_dir.path().join("g")).unwrap(); global_info_logger(); - let server = GraphServer::new(tmp_dir.path().to_path_buf(), None, None).unwrap(); + let server = GraphServer::new(tmp_dir.path().to_path_buf(), None, None) + .await + .unwrap(); let template = DocumentTemplate { node_template: Some("{{ name }}".to_owned()), ..Default::default() }; - let cache_dir = tempdir().unwrap(); - let handler = server - .set_embeddings(failing_embedding, cache_dir.path(), Some(template)) - .await - .unwrap() - .start_with_port(0); + let model = OpenAIEmbeddings { + api_base: Some("wrong-api-base".to_owned()), + ..Default::default() + }; + server.vectorise_all_graphs(&template, model).await.unwrap(); + let handler = server.start_with_port(0); sleep(Duration::from_secs(5)).await; handler.await.unwrap().stop().await } diff --git a/raphtory/Cargo.toml b/raphtory/Cargo.toml index 66a566b191..e1b602528b 100644 --- a/raphtory/Cargo.toml +++ b/raphtory/Cargo.toml @@ -85,9 +85,12 @@ async-openai = { workspace = true, optional = true } bincode = { workspace = true, optional = true } minijinja = { workspace = true, optional = true } minijinja-contrib = { workspace = true, optional = true } -arroy = { workspace = true, optional = true } +arroy = { workspace = true, optional = true } # TODO: remove heed = { workspace = true, optional = true } moka = { workspace = true, optional = true } +lancedb = {workspace = true, optional = true } +arrow-array = { workspace = true, features = ["chrono-tz"], optional = true } +axum = "0.8.4" # TODO put this in the proper place and make optional # python binding optional dependencies pyo3 = { workspace = true, optional = true } @@ -143,6 +146,9 @@ vectors = [ "dep:arroy", "dep:heed", "dep:moka", + "dep:lancedb", + "dep:arrow-array", + "dep:tokio", # also used for the io feature "dep:tempfile", # also used for the storage feature ] diff --git a/raphtory/src/errors.rs b/raphtory/src/errors.rs index 0570632543..5450760027 100644 --- a/raphtory/src/errors.rs +++ b/raphtory/src/errors.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "vectors")] +use crate::vectors::embeddings::EmbeddingError; use crate::{ core::storage::lazy_vec::IllegalSet, db::graph::views::filter::model::filter_operator::FilterOperator, prelude::GraphViewOps, @@ -15,6 +17,7 @@ use raphtory_core::{ utils::time::ParseTimeError, }; use raphtory_storage::mutation::MutationError; +#[cfg(feature = "vectors")] use std::{ fmt::Debug, io, @@ -284,7 +287,7 @@ pub enum GraphError { #[error("Embedding operation failed")] EmbeddingError { #[from] - source: Box, + source: EmbeddingError, }, #[cfg(feature = "search")] diff --git a/raphtory/src/python/packages/base_modules.rs b/raphtory/src/python/packages/base_modules.rs index 4a1d507a97..29cf715171 100644 --- a/raphtory/src/python/packages/base_modules.rs +++ b/raphtory/src/python/packages/base_modules.rs @@ -21,7 +21,7 @@ use crate::{ algorithms::*, graph_gen::*, graph_loader::*, - vectors::{PyVectorSelection, PyVectorisedGraph}, + vectors::{embedding_server, PyOpenAIEmbeddings, PyVectorSelection, PyVectorisedGraph}, }, types::wrappers::{ document::PyDocument, @@ -195,10 +195,15 @@ pub fn base_graph_gen_module(py: Python<'_>) -> Result, PyEr pub fn base_vectors_module(py: Python<'_>) -> Result, PyErr> { let vectors_module = PyModule::new(py, "vectors")?; - vectors_module.add_class::()?; - vectors_module.add_class::()?; - vectors_module.add_class::()?; - vectors_module.add_class::()?; + add_classes!( + &vectors_module, + PyVectorisedGraph, + PyDocument, + PyEmbedding, + PyVectorSelection, + PyOpenAIEmbeddings + ); + add_functions!(&vectors_module, embedding_server); Ok(vectors_module) } diff --git a/raphtory/src/python/packages/vectors.rs b/raphtory/src/python/packages/vectors.rs index db02094c8e..741f9cb009 100644 --- a/raphtory/src/python/packages/vectors.rs +++ b/raphtory/src/python/packages/vectors.rs @@ -4,11 +4,12 @@ use crate::{ python::{ graph::{edge::PyEdge, node::PyNode, views::graph_view::PyGraphView}, types::wrappers::document::PyDocument, - utils::{execute_async_task, PyNodeRef, PyTime}, + utils::{block_on, execute_async_task, PyNodeRef, PyTime}, }, vectors::{ cache::VectorCache, - embeddings::{EmbeddingFunction, EmbeddingResult}, + custom::{serve_custom_embedding, EmbeddingFunction, EmbeddingServer}, + storage::OpenAIEmbeddings, template::{DocumentTemplate, DEFAULT_EDGE_TEMPLATE, DEFAULT_NODE_TEMPLATE}, vector_selection::DynamicVectorSelection, vectorisable::Vectorisable, @@ -16,17 +17,162 @@ use crate::{ Document, DocumentEntity, Embedding, }, }; -use futures_util::future::BoxFuture; + use itertools::Itertools; use pyo3::{ exceptions::PyTypeError, prelude::*, types::{PyFunction, PyList}, }; -use std::path::PathBuf; +use std::{path::PathBuf, sync::Arc}; +use tokio::runtime::Runtime; type DynamicVectorisedGraph = VectorisedGraph; +#[pyclass(name = "OpenAIEmbeddings")] +#[derive(Clone)] +pub struct PyOpenAIEmbeddings { + model: String, + api_base: Option, + api_key_env: Option, + org_id: Option, + project_id: Option, +} + +// TODO text-embedding-3-small as default is duplicated, try to make it only in one place + +#[pymethods] +impl PyOpenAIEmbeddings { + #[new] + #[pyo3(signature = (model="text-embedding-3-small", api_base=None, api_key_env=None, org_id=None, project_id=None))] + fn new( + model: &str, + api_base: Option, + api_key_env: Option, + org_id: Option, + project_id: Option, + ) -> Self { + Self { + model: model.to_owned(), + api_base, + api_key_env, + org_id, + project_id, + } + } +} +impl From for OpenAIEmbeddings { + fn from(value: PyOpenAIEmbeddings) -> Self { + Self { + model: value.model.clone(), + api_base: value.api_base.clone(), + api_key_env: value.api_key_env.clone(), + org_id: value.org_id.clone(), + project_id: value.project_id.clone(), + } + } +} + +impl EmbeddingFunction for Arc> { + fn call(&self, text: &str) -> Vec { + Python::with_gil(|py| { + // TODO: remove unwraps? + let any = self + .call1(py, (text,)) + .inspect_err(|e| println!("{e:?}")) // TODO: remove + .unwrap(); + let list = any.downcast_bound::(py).unwrap(); + list.iter().map(|value| value.extract().unwrap()).collect() + }) + } +} + +#[pyfunction] +pub fn embedding_server(address: String) -> EmbeddingServerDecorator { + EmbeddingServerDecorator { address } +} + +#[pyclass] +struct EmbeddingServerDecorator { + address: String, +} + +#[pymethods] +impl EmbeddingServerDecorator { + fn __call__(&self, function: Py) -> PyEmbeddingServer { + PyEmbeddingServer { + function: function.into(), + address: self.address.clone(), + running: None, + } + } +} + +struct RunningServer { + runtime: Runtime, + server: EmbeddingServer, +} + +#[pyclass(name = "EmbeddingServer")] +pub struct PyEmbeddingServer { + function: Arc>, + address: String, + running: Option, // TODO: use all of these ideas for the GraphServer implementation +} +// TODO: ideally, I should allow users to provide this server object as embedding model, so the fact it has an OpenAI like API is transparent to the user + +impl PyEmbeddingServer { + fn create_running_server(&self) -> RunningServer { + assert!(self.running.is_none()); // TODO: return error + let runtime = build_runtime(); + let server = runtime.block_on(serve_custom_embedding(&self.address, self.function.clone())); + RunningServer { runtime, server } + } +} + +#[pymethods] +impl PyEmbeddingServer { + fn run(&self) { + let running = self.create_running_server(); + running.runtime.block_on(running.server.wait()); + } + + fn start(mut slf: PyRefMut<'_, Self>) { + let running = slf.create_running_server(); + slf.running = Some(running) + } + + fn stop(mut slf: PyRefMut<'_, Self>) { + if let Some(RunningServer { runtime, server }) = &mut slf.running { + runtime.block_on(server.stop()); + slf.running = None + } else { + panic!("nothing to stop") + } + } + + // fn __enter__(slf: PyRefMut<'_, Self>) -> PyRefMut<'_, Self> { + // PyEmbeddingServer::start(&slf); + // slf + // } + + // fn __exit__( + // mut slf: PyRefMut<'_, Self>, + // _exc_type: Option, + // _exc_value: Option, + // _traceback: Option, + // ) { + // PyEmbeddingServer::stop(slf); + // } +} + +fn build_runtime() -> Runtime { + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap() +} + pub type PyWindow = Option<(PyTime, PyTime)>; pub fn translate_window(window: PyWindow) -> Option<(i64, i64)> { @@ -46,9 +192,9 @@ impl PyQuery { ) -> PyResult { match self { Self::Raw(query) => { - let cache = graph.cache.clone(); + let graph = graph.clone(); let result = Ok(execute_async_task(move || async move { - cache.get_single(query).await + graph.embed_text(query).await })?); result } @@ -150,7 +296,7 @@ impl PyGraphView { #[pyo3(signature = (embedding, nodes = TemplateConfig::Bool(true), edges = TemplateConfig::Bool(true), cache = None, verbose = false))] fn vectorise( &self, - embedding: Bound, + embedding: PyOpenAIEmbeddings, nodes: TemplateConfig, edges: TemplateConfig, cache: Option, @@ -160,15 +306,15 @@ impl PyGraphView { node_template: nodes.get_template_or(DEFAULT_NODE_TEMPLATE), edge_template: edges.get_template_or(DEFAULT_EDGE_TEMPLATE), }; - let embedding = embedding.unbind(); let graph = self.graph.clone(); execute_async_task(move || async move { let cache = if let Some(cache) = cache { - VectorCache::on_disk(&PathBuf::from(cache), embedding).await? + VectorCache::on_disk(&PathBuf::from(cache)).await? } else { - VectorCache::in_memory(embedding) + VectorCache::in_memory() }; - Ok(graph.vectorise(cache, template, None, verbose).await?) + let model = cache.openai(embedding.into()).await?; + Ok(graph.vectorise(model, template, None, verbose).await?) }) } } @@ -217,10 +363,10 @@ impl PyVectorisedGraph { self.0.empty_selection() } - /// Search the top scoring entities according to `query` with no more than `limit` entities + /// Search the closest entities to `query` with no more than `limit` entities /// /// Args: - /// query (str | list): the text or the embedding to score against + /// query (str | list): the text or the embedding to calculate the distance from /// limit (int): the maximum number of new entities to search /// window (Tuple[int | str, int | str], optional): the window where documents need to belong to in order to be considered /// @@ -234,15 +380,15 @@ impl PyVectorisedGraph { window: PyWindow, ) -> PyResult { let embedding = query.into_embedding(&self.0)?; - Ok(self - .0 - .entities_by_similarity(&embedding, limit, translate_window(window))?) + let w = translate_window(window); + let s = block_on(self.0.entities_by_similarity(&embedding, limit, w))?; + Ok(s) } - /// Search the top scoring nodes according to `query` with no more than `limit` nodes + /// Search the closest nodes to `query` with no more than `limit` nodes /// /// Args: - /// query (str | list): the text or the embedding to score against + /// query (str | list): the text or the embedding to calculate the distance from /// limit (int): the maximum number of new nodes to search /// window (Tuple[int | str, int | str], optional): the window where documents need to belong to in order to be considered /// @@ -256,15 +402,14 @@ impl PyVectorisedGraph { window: PyWindow, ) -> PyResult { let embedding = query.into_embedding(&self.0)?; - Ok(self - .0 - .nodes_by_similarity(&embedding, limit, translate_window(window))?) + let w = translate_window(window); + Ok(block_on(self.0.nodes_by_similarity(&embedding, limit, w))?) } - /// Search the top scoring edges according to `query` with no more than `limit` edges + /// Search the closest edges to `query` with no more than `limit` edges /// /// Args: - /// query (str | list): the text or the embedding to score against + /// query (str | list): the text or the embedding to calculate the distance from /// limit (int): the maximum number of new edges to search /// window (Tuple[int | str, int | str], optional): the window where documents need to belong to in order to be considered /// @@ -278,9 +423,8 @@ impl PyVectorisedGraph { window: PyWindow, ) -> PyResult { let embedding = query.into_embedding(&self.0)?; - Ok(self - .0 - .edges_by_similarity(&embedding, limit, translate_window(window))?) + let w = translate_window(window); + Ok(block_on(self.0.edges_by_similarity(&embedding, limit, w))?) } } @@ -320,20 +464,20 @@ impl PyVectorSelection { /// Returns: /// list[Document]: list of documents in the current selection fn get_documents(&self) -> PyResult>> { - Ok(self.0.get_documents()?) + Ok(block_on(self.0.get_documents())?) } - /// Return the documents alongside their scores present in the current selection + /// Return the documents alongside their distances present in the current selection /// /// Returns: - /// list[Tuple[Document, float]]: list of documents and scores - fn get_documents_with_scores(&self) -> PyResult, f32)>> { - Ok(self.0.get_documents_with_scores()?) + /// list[Tuple[Document, float]]: list of documents and distances + fn get_documents_with_distances(&self) -> PyResult, f32)>> { + Ok(block_on(self.0.get_documents_with_distances())?) } /// Add all the documents associated with the `nodes` to the current selection /// - /// Documents added by this call are assumed to have a score of 0. + /// Documents added by this call are assumed to have a distance of 0. /// /// Args: /// nodes (list): a list of the node ids or nodes to add @@ -346,7 +490,7 @@ impl PyVectorSelection { /// Add all the documents associated with the `edges` to the current selection /// - /// Documents added by this call are assumed to have a score of 0. + /// Documents added by this call are assumed to have a distance of 0. /// /// Args: /// edges (list): a list of the edge ids or edges to add @@ -386,19 +530,18 @@ impl PyVectorSelection { self_.0.expand(hops, translate_window(window)) } - /// Add the top `limit` adjacent entities with higher score for `query` to the selection + /// Add to the selection the `limit` adjacent entities closest to `query` /// /// The expansion algorithm is a loop with two steps on each iteration: /// 1. All the entities 1 hop away of some of the entities included on the selection (and /// not already selected) are marked as candidates. - /// 2. Those candidates are added to the selection in descending order according to the - /// similarity score obtained against the `query`. + /// 2. Those candidates are added to the selection in ascending distance from `query`. /// /// This loops goes on until the number of new entities reaches a total of `limit` /// entities or until no more documents are available /// /// Args: - /// query (str | list): the text or the embedding to score against + /// query (str | list): the text or the embedding to calculate the distance from /// limit (int): the number of documents to add /// window (Tuple[int | str, int | str], optional): the window where documents need to belong to in order to be considered /// @@ -406,24 +549,24 @@ impl PyVectorSelection { /// None: #[pyo3(signature = (query, limit, window=None))] fn expand_entities_by_similarity( - mut self_: PyRefMut<'_, Self>, + mut slf: PyRefMut<'_, Self>, query: PyQuery, limit: usize, window: PyWindow, ) -> PyResult<()> { - let embedding = query.into_embedding(&self_.0.graph)?; - self_ - .0 - .expand_entities_by_similarity(&embedding, limit, translate_window(window))?; + let embedding = query.into_embedding(&slf.0.graph)?; + let w = translate_window(window); + block_on(slf.0.expand_entities_by_similarity(&embedding, limit, w))?; + Ok(()) } - /// Add the top `limit` adjacent nodes with higher score for `query` to the selection + /// Add to the selection the `limit` adjacent nodes closest to `query` /// /// This function has the same behavior as expand_entities_by_similarity but it only considers nodes. /// /// Args: - /// query (str | list): the text or the embedding to score against + /// query (str | list): the text or the embedding to calculate the distance from /// limit (int): the maximum number of new nodes to add /// window (Tuple[int | str, int | str], optional): the window where documents need to belong to in order to be considered /// @@ -431,24 +574,23 @@ impl PyVectorSelection { /// None: #[pyo3(signature = (query, limit, window=None))] fn expand_nodes_by_similarity( - mut self_: PyRefMut<'_, Self>, + mut slf: PyRefMut<'_, Self>, query: PyQuery, limit: usize, window: PyWindow, ) -> PyResult<()> { - let embedding = query.into_embedding(&self_.0.graph)?; - self_ - .0 - .expand_nodes_by_similarity(&embedding, limit, translate_window(window))?; + let embedding = query.into_embedding(&slf.0.graph)?; + let w = translate_window(window); + block_on(slf.0.expand_nodes_by_similarity(&embedding, limit, w))?; Ok(()) } - /// Add the top `limit` adjacent edges with higher score for `query` to the selection + /// Add to the selection the `limit` adjacent edges closest to `query` /// /// This function has the same behavior as expand_entities_by_similarity but it only considers edges. /// /// Args: - /// query (str | list): the text or the embedding to score against + /// query (str | list): the text or the embedding to calculate the distance from /// limit (int): the maximum number of new edges to add /// window (Tuple[int | str, int | str], optional): the window where documents need to belong to in order to be considered /// @@ -456,48 +598,14 @@ impl PyVectorSelection { /// None: #[pyo3(signature = (query, limit, window=None))] fn expand_edges_by_similarity( - mut self_: PyRefMut<'_, Self>, + mut slf: PyRefMut<'_, Self>, query: PyQuery, limit: usize, window: PyWindow, ) -> PyResult<()> { - let embedding = query.into_embedding(&self_.0.graph)?; - self_ - .0 - .expand_edges_by_similarity(&embedding, limit, translate_window(window))?; + let embedding = query.into_embedding(&slf.0.graph)?; + let w = translate_window(window); + block_on(slf.0.expand_edges_by_similarity(&embedding, limit, w))?; Ok(()) } } - -impl EmbeddingFunction for Py { - fn call(&self, texts: Vec) -> BoxFuture<'static, EmbeddingResult>> { - let embedding_function = Python::with_gil(|py| self.clone_ref(py)); - Box::pin(async move { - Python::with_gil(|py| { - let embedding_function = embedding_function.bind(py); - let python_texts = PyList::new(py, texts)?; - let result = embedding_function.call1((python_texts,))?; - let embeddings = result.downcast::().map_err(|_| { - PyTypeError::new_err( - "value returned by the embedding function was not a python list", - ) - })?; - - let embeddings: EmbeddingResult> = embeddings - .iter() - .map(|embedding| { - let pylist = embedding.downcast::().map_err(|_| { - PyTypeError::new_err("one of the values in the list returned by the embedding function was not a python list") - })?; - let embedding: EmbeddingResult = pylist - .iter() - .map(|element| Ok(element.extract::()?)) - .collect(); - embedding - }) - .collect(); - embeddings - }) - }) - } -} diff --git a/raphtory/src/python/utils/mod.rs b/raphtory/src/python/utils/mod.rs index 3bff14f05c..98bbf88816 100644 --- a/raphtory/src/python/utils/mod.rs +++ b/raphtory/src/python/utils/mod.rs @@ -460,7 +460,7 @@ impl<'py> IntoPyObject<'py> for NumpyArray { // This function takes a function that returns a future instead of taking just a future because // a task might return an unsendable future but what we can do is making a function returning that // future which is sendable itself -pub fn execute_async_task(task: T) -> O +pub(crate) fn execute_async_task(task: T) -> O where T: FnOnce() -> F + Send + 'static, F: Future + 'static, @@ -468,7 +468,8 @@ where { Python::with_gil(|py| { py.allow_threads(move || { - // we call `allow_threads` because the task might need to grab the GIL + // we call `allow_threads` because the task might need to grab the GIL // FIXME: this might not be the case anymore, also remember removing the imlpementation of EmbeddingFunction for a python function + // FIXME: why do we need a thread here??? DO I need it as well in the implementation for the VectorisedGraph functions thread::spawn(move || { tokio::runtime::Builder::new_multi_thread() .enable_all() @@ -481,3 +482,11 @@ where }) }) } + +pub fn block_on(future: F) -> F::Output { + tokio::runtime::Builder::new_multi_thread() // TODO: double-check this is fine, with no thread?? + .enable_all() + .build() + .unwrap() + .block_on(future) +} diff --git a/raphtory/src/vectors/cache.rs b/raphtory/src/vectors/cache.rs index 4eb7a2bb3c..82a8ffc3d4 100644 --- a/raphtory/src/vectors/cache.rs +++ b/raphtory/src/vectors/cache.rs @@ -1,5 +1,11 @@ -use super::embeddings::EmbeddingFunction; -use crate::{errors::GraphResult, vectors::Embedding}; +use crate::{ + errors::GraphResult, + vectors::{ + embeddings::{EmbeddingError, EmbeddingModel, ModelConfig}, + storage::OpenAIEmbeddings, + Embedding, + }, +}; use futures_util::StreamExt; use heed::{types::SerdeBincode, Database, Env, EnvOpenOptions}; use moka::future::Cache; @@ -8,18 +14,23 @@ use serde::{Deserialize, Serialize}; use std::{ collections::{HashMap, VecDeque}, hash::{DefaultHasher, Hash, Hasher}, + ops::Deref, path::Path, sync::Arc, + u64, }; +const CONTENT_SAMPLE: &str = "raphtory"; // DON'T CHANGE THIS STRING BY ANY MEANS + const MAX_DISK_ITEMS: usize = 1_000_000; const MAX_VECTOR_DIM: usize = 8960; const MAX_TEXT_LENGTH: usize = 200_000; #[derive(Debug, Serialize, Deserialize, Clone)] struct CacheEntry { - key: String, - value: Embedding, + model: EmbeddingModel, + text: String, + vector: Embedding, } type VectorDb = Database, SerdeBincode>; @@ -32,6 +43,7 @@ impl VectorStore { fn in_memory() -> Self { Self::Mem(Default::default()) } + fn on_disk(path: &Path) -> GraphResult { let _ = std::fs::create_dir_all(path); let page_size = 16384; @@ -102,30 +114,28 @@ impl VectorStore { #[derive(Clone)] pub struct VectorCache { store: Arc, - cache: Cache, - function: Arc, + cache: Arc>, + models: Arc>, // this always lives only in memory, precisely to force resampling from different environments } impl VectorCache { - pub fn in_memory(function: impl EmbeddingFunction + 'static) -> Self { + pub fn in_memory() -> Self { Self { store: VectorStore::in_memory().into(), - cache: Cache::new(10), - function: Arc::new(function), + cache: Cache::new(10).into(), + models: build_model_cache(), } } - pub async fn on_disk( - path: &Path, - function: impl EmbeddingFunction + 'static, - ) -> GraphResult { + pub async fn on_disk(path: &Path) -> GraphResult { let store: Arc<_> = VectorStore::on_disk(path)?.into(); let cloned = store.clone(); - let cache: Cache = Cache::builder() + let cache: Arc> = Cache::builder() .max_capacity(MAX_DISK_ITEMS as u64) .eviction_listener(move |key: Arc, _value: (), _cause| cloned.remove(key.as_ref())) - .build(); + .build() + .into(); for key in store.get_disk_keys()? { cache.insert(key, ()).await; @@ -133,31 +143,98 @@ impl VectorCache { Ok(Self { store, - cache, - function: Arc::new(function), + cache: cache.into(), + models: build_model_cache(), + }) + } + + pub async fn openai(&self, config: OpenAIEmbeddings) -> GraphResult { + self.sample_and_cache_model(ModelConfig::OpenAI(config)) + .await + } + + async fn sample_and_cache_model( + &self, + model: ModelConfig, + ) -> GraphResult { + Ok(CachedEmbeddingModel { + cache: self.clone(), + model: self.sample_model(model).await?, }) } - async fn get(&self, text: &str) -> Option { - let hash = hash(text); + pub(super) async fn validate_and_cache_model( + &self, + model: EmbeddingModel, + ) -> GraphResult { + let expected_model = self.sample_model(model.model.clone()).await?; + if model == expected_model { + Ok(CachedEmbeddingModel { + model, + cache: self.clone(), + }) + } else { + panic!("") // TODO: turn this into an error + } + } + + async fn sample_model(&self, config: ModelConfig) -> GraphResult { + let cloned_config = config.clone(); + let model = self + .models + .try_get_with(config, async { + let mut vectors = cloned_config.call(vec![CONTENT_SAMPLE.to_owned()]).await?; + let sample = vectors.remove(0); + Ok(EmbeddingModel { + model: cloned_config, + sample, + }) + }) + .await + .map_err(|error: Arc| { + let inner: &EmbeddingError = error.deref(); + inner.clone() + })?; + Ok(model) + } + + async fn get(&self, model: &EmbeddingModel, text: &str) -> Option { + let hash = hash(model, text); self.cache.get(&hash).await?; let entry = self.store.get(&hash)?; - if entry.key == text { - Some(entry.value) + if &entry.model == model && entry.text == text { + Some(entry.vector) } else { None } } - async fn insert(&self, text: String, vector: Embedding) { - let hash = hash(&text); + async fn insert(&self, model: EmbeddingModel, text: String, vector: Embedding) { + let hash = hash(&model, &text); let entry = CacheEntry { - key: text, - value: vector, + model, + text, + vector, }; self.store.insert(hash, entry); self.cache.insert(hash, ()).await; } +} + +fn build_model_cache() -> Arc> { + Cache::new(u64::MAX).into() +} + +#[derive(Clone)] +pub struct CachedEmbeddingModel { + cache: VectorCache, // TODO: review if ok using here a parking_lot::RwLock + pub(super) model: EmbeddingModel, // this is kind of duplicated, but enables skipping the rwlock +} + +impl CachedEmbeddingModel { + pub(super) fn get_sample(&self) -> &Embedding { + &self.model.sample + } pub(super) async fn get_embeddings( &self, @@ -166,7 +243,7 @@ impl VectorCache { // TODO: review, turned this into a vec only to make compute_embeddings work let results: Vec<_> = futures_util::stream::iter(texts) .then(|text| async move { - match self.get(&text).await { + match self.cache.get(&self.model, &text).await { Some(cached) => (text, Some(cached)), None => (text, None), } @@ -181,12 +258,12 @@ impl VectorCache { }) .collect(); let mut fresh_vectors: VecDeque<_> = if !misses.is_empty() { - self.function.call(misses.clone()).await?.into() + self.model.call(misses.clone()).await?.into() } else { vec![].into() }; futures_util::stream::iter(misses.into_iter().zip(fresh_vectors.iter().cloned())) - .for_each(|(text, vector)| self.insert(text, vector)) + .for_each(|(text, vector)| self.cache.insert(self.model.clone(), text, vector)) .await; let embeddings = results.into_iter().map(move |(_, vector)| match vector { Some(vector) => vector, @@ -195,14 +272,15 @@ impl VectorCache { Ok(embeddings) } - pub async fn get_single(&self, text: String) -> GraphResult { + pub(super) async fn get_single(&self, text: String) -> GraphResult { let mut embeddings = self.get_embeddings(vec![text]).await?; Ok(embeddings.next().unwrap()) } } -fn hash(text: &str) -> u64 { +fn hash(model: &EmbeddingModel, text: &str) -> u64 { let mut hasher = DefaultHasher::new(); + model.hash(&mut hasher); text.hash(&mut hasher); hasher.finish() } @@ -211,65 +289,107 @@ fn hash(text: &str) -> u64 { mod cache_tests { use tempfile::tempdir; - use crate::vectors::{embeddings::EmbeddingResult, Embedding}; + use crate::vectors::{ + cache::{CachedEmbeddingModel, CONTENT_SAMPLE}, + embeddings::{EmbeddingModel, ModelConfig}, + storage::OpenAIEmbeddings, + Embedding, + }; use super::VectorCache; - async fn placeholder_embedding(texts: Vec) -> EmbeddingResult> { - dbg!(texts); - todo!() + #[test] + fn test_vector_sample_remains_unchanged() { + assert_eq!(CONTENT_SAMPLE, "raphtory"); + } + + #[tokio::test] + async fn test_empty_request() { + let model = CachedEmbeddingModel { + cache: VectorCache::in_memory(), + model: EmbeddingModel { + // this model will definetely error out if called, as the api base is invalid + model: ModelConfig::OpenAI(OpenAIEmbeddings { + api_base: Some("invalid-api-base".to_owned()), + ..Default::default() + }), + sample: vec![1.0].into(), + }, + }; + let result: Vec<_> = model.get_embeddings(vec![]).await.unwrap().collect(); + assert_eq!(result, vec![]); } async fn test_abstract_cache(cache: VectorCache) { let vector_a: Embedding = [1.0].into(); + let vector_a_alt: Embedding = [1.0, 0.0].into(); let vector_b: Embedding = [0.5].into(); - assert_eq!(cache.get("a").await, None); - assert_eq!(cache.get("b").await, None); + // TOOD: try to do this using VectorCache::in_memory().openai() + let model_a = EmbeddingModel { + model: ModelConfig::OpenAI(Default::default()), + sample: vec![1.0].into(), + }; + let model_b = EmbeddingModel { + model: ModelConfig::OpenAI(Default::default()), + sample: vec![0.0, 1.0].into(), + }; + + assert_eq!(cache.get(&model_a, "a").await, None); + assert_eq!(cache.get(&model_b, "a").await, None); + assert_eq!(cache.get(&model_a, "b").await, None); + + cache + .insert(model_a.clone(), "a".to_owned(), vector_a.clone()) + .await; + assert_eq!(cache.get(&model_a, "a").await, Some(vector_a.clone())); + assert_eq!(cache.get(&model_b, "a").await, None); + assert_eq!(cache.get(&model_a, "b").await, None); - cache.insert("a".to_owned(), vector_a.clone()).await; - assert_eq!(cache.get("a").await, Some(vector_a.clone())); - assert_eq!(cache.get("b").await, None); + cache + .insert(model_b.clone(), "a".to_owned(), vector_a_alt.clone()) + .await; + assert_eq!(cache.get(&model_a, "a").await, Some(vector_a.clone())); + assert_eq!(cache.get(&model_b, "a").await, Some(vector_a_alt.clone())); + assert_eq!(cache.get(&model_a, "b").await, None); - cache.insert("b".to_owned(), vector_b.clone()).await; - assert_eq!(cache.get("a").await, Some(vector_a)); - assert_eq!(cache.get("b").await, Some(vector_b)); + cache + .insert(model_a.clone(), "b".to_owned(), vector_b.clone()) + .await; + assert_eq!(cache.get(&model_a, "a").await, Some(vector_a)); + assert_eq!(cache.get(&model_b, "a").await, Some(vector_a_alt)); + assert_eq!(cache.get(&model_a, "b").await, Some(vector_b)); } #[tokio::test] - async fn test_empty_request() { - let cache = VectorCache::in_memory(placeholder_embedding); - let result: Vec<_> = cache.get_embeddings(vec![]).await.unwrap().collect(); - assert_eq!(result, vec![]); + async fn test_in_memory_cache() { + let cache = VectorCache::in_memory(); + test_abstract_cache(cache).await; } #[tokio::test] - async fn test_cache() { - test_abstract_cache(VectorCache::in_memory(placeholder_embedding)).await; + async fn test_on_disk_cache() { let dir = tempdir().unwrap(); - test_abstract_cache( - VectorCache::on_disk(dir.path(), placeholder_embedding) - .await - .unwrap(), - ) - .await; + test_abstract_cache(VectorCache::on_disk(dir.path()).await.unwrap()).await; } #[tokio::test] - async fn test_on_disk_cache() { + async fn test_on_disk_cache_loading() { + let model = EmbeddingModel { + model: ModelConfig::OpenAI(Default::default()), + sample: vec![1.0].into(), + }; let vector: Embedding = [1.0].into(); let dir = tempdir().unwrap(); { - let cache = VectorCache::on_disk(dir.path(), placeholder_embedding) - .await - .unwrap(); - cache.insert("a".to_owned(), vector.clone()).await; - } // here the heed env gets closed - - let loaded_from_disk = VectorCache::on_disk(dir.path(), placeholder_embedding) - .await - .unwrap(); - assert_eq!(loaded_from_disk.get("a").await, Some(vector)) + let cache = VectorCache::on_disk(dir.path()).await.unwrap(); + cache + .insert(model.clone(), "a".to_owned(), vector.clone()) + .await; + } // here the heed env gets dropped, maybe we should find some key value store that doesn't need us to do this + + let loaded_from_disk = VectorCache::on_disk(dir.path()).await.unwrap(); + assert_eq!(loaded_from_disk.get(&model, "a").await, Some(vector)) } } diff --git a/raphtory/src/vectors/custom.rs b/raphtory/src/vectors/custom.rs new file mode 100644 index 0000000000..9635331d59 --- /dev/null +++ b/raphtory/src/vectors/custom.rs @@ -0,0 +1,103 @@ +use async_openai::types::{CreateEmbeddingResponse, Embedding, EmbeddingUsage}; +use axum::{ + extract::{Json, State}, + routing::post, + Router, +}; +use serde::Deserialize; +use std::sync::Arc; +use tokio::{sync::mpsc, task::JoinHandle}; + +#[derive(Deserialize, Debug)] +struct EmbeddingRequest { + input: Vec, +} + +// #[derive(Serialize)] +// struct EmbeddingResponse { +// object: String, +// data: Vec, +// } + +// #[derive(Serialize)] +// struct EmbeddingData { +// object: String, +// embedding: Vec, +// index: usize, +// } + +async fn embeddings( + State(function): State>, + Json(req): Json, +) -> Json { + let data = req + .input + .iter() + .enumerate() + .map(|(i, t)| Embedding { + index: i as u32, + object: "embedding".into(), + embedding: function.call(t), + }) + .collect(); + + Json(CreateEmbeddingResponse { + object: "list".into(), + data, + model: "".to_owned(), + usage: EmbeddingUsage { + prompt_tokens: 0, + total_tokens: 0, + }, + }) +} + +pub struct EmbeddingServer { + execution: JoinHandle<()>, + stop_signal: tokio::sync::mpsc::Sender<()>, +} + +impl EmbeddingServer { + pub async fn wait(self) { + self.execution.await.unwrap(); + } + + pub async fn stop(&self) { + self.stop_signal.send(()).await.unwrap(); + } +} + +/// Runs the embedding server on the given port based on the provided function. The address can be for instance "0.0.0.0:3000" +pub async fn serve_custom_embedding( + address: &str, + function: impl EmbeddingFunction, +) -> EmbeddingServer { + let state = Arc::new(function); + let app = Router::new() + .route("/embeddings", post(embeddings)) // TODO: this should be /v1/embeddings if we were to support multiple versions + .with_state(state); + let listener = tokio::net::TcpListener::bind(address).await.unwrap(); + let (sender, mut receiver) = mpsc::channel(1); + let execution = tokio::spawn(async { + axum::serve(listener, app) + .with_graceful_shutdown(async move { + receiver.recv().await; // TODO: add CTRL + C + }) + .await + .unwrap(); + }); + EmbeddingServer { + execution, + stop_signal: sender, + } +} + +pub trait EmbeddingFunction: Send + Sync + 'static { + fn call(&self, text: &str) -> Vec; +} + +impl Vec + Send + Sync + 'static> EmbeddingFunction for F { + fn call(&self, text: &str) -> Vec { + self(text) + } +} diff --git a/raphtory/src/vectors/db.rs b/raphtory/src/vectors/db.rs deleted file mode 100644 index 617e4e6a52..0000000000 --- a/raphtory/src/vectors/db.rs +++ /dev/null @@ -1,281 +0,0 @@ -use std::{ - collections::HashSet, - ops::Deref, - path::{Path, PathBuf}, - sync::{Arc, OnceLock}, -}; - -use arroy::{distances::Cosine, Database as ArroyDatabase, Reader, Writer}; -use futures_util::StreamExt; -use rand::{rngs::StdRng, SeedableRng}; -use tempfile::TempDir; - -use super::{ - entity_ref::{EntityRef, IntoDbId}, - Embedding, -}; -use crate::{ - db::api::view::StaticGraphViewOps, - errors::{GraphError, GraphResult}, - prelude::GraphViewOps, -}; - -const LMDB_MAX_SIZE: usize = 1024 * 1024 * 1024 * 1024; // 1TB - -#[derive(Clone)] -pub(super) struct NodeDb(pub(super) VectorDb); - -impl Deref for NodeDb { - type Target = VectorDb; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl EntityDb for NodeDb { - fn from_vector_db(db: VectorDb) -> Self { - Self(db) - } - - fn get_db(&self) -> &VectorDb { - &self.0 - } - - fn into_entity_ref(id: u32) -> EntityRef { - EntityRef::Node(id) - } - - fn view_has_entity(entity: &EntityRef, view: &G) -> bool { - view.has_node(entity.as_node_gid(view).unwrap()) - } - - fn all_valid_entities(view: G) -> impl Iterator { - view.nodes().into_iter().map(|node| node.into_db_id()) - } -} - -#[derive(Clone)] -pub(super) struct EdgeDb(pub(super) VectorDb); - -impl Deref for EdgeDb { - type Target = VectorDb; - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl EntityDb for EdgeDb { - fn from_vector_db(db: VectorDb) -> Self { - Self(db) - } - - fn get_db(&self) -> &VectorDb { - &self.0 - } - - fn into_entity_ref(id: u32) -> EntityRef { - EntityRef::Edge(id) - } - - fn view_has_entity(entity: &EntityRef, view: &G) -> bool { - let (src, dst) = entity.as_edge_gids(view).unwrap(); - view.has_edge(src, dst) // TODO: there should be a quicker way of chking of some edge exist by pid - } - - fn all_valid_entities(view: G) -> impl Iterator { - view.edges().into_iter().map(|edge| edge.into_db_id()) - } -} - -pub(super) trait EntityDb: Sized { - fn from_vector_db(db: VectorDb) -> Self; - fn get_db(&self) -> &VectorDb; - fn into_entity_ref(id: u32) -> EntityRef; - fn view_has_entity(entity: &EntityRef, view: &G) -> bool; - fn all_valid_entities(view: G) -> impl Iterator + 'static; - - async fn from_vectors( - vectors: impl futures_util::Stream> + Send, - path: Option, - ) -> GraphResult { - let db = VectorDb::from_vectors(vectors, path).await?; - Ok(Self::from_vector_db(db)) - } - - fn from_path(path: &Path) -> GraphResult { - VectorDb::from_path(path).map(Self::from_vector_db) - } - - fn top_k( - &self, - query: &Embedding, - k: usize, - view: Option, - filter: Option>, - ) -> GraphResult> { - let candidates: Option>> = match (view, filter) { - (None, None) => None, - (view, Some(filter)) => Some(Box::new( - filter - .into_iter() - .filter(move |entity| { - view.as_ref() - .is_none_or(|view| Self::view_has_entity(entity, view)) - }) - .map(|entity| entity.id()), - )), - (Some(view), None) => Some(Box::new(Self::all_valid_entities(view))), - }; - self.top_k_with_candidates(query, k, candidates) - } - - fn top_k_with_candidates( - &self, - query: &Embedding, - k: usize, - candidates: Option>, - ) -> GraphResult> { - let db = self.get_db(); - let rtxn = db.env.read_txn()?; - let vectors = match Reader::open(&rtxn, 0, db.vectors) { - Ok(reader) => { - let mut query_builder = reader.nns(k); - let candidates = candidates.map(|filter| roaring::RoaringBitmap::from_iter(filter)); - let query_builder = if let Some(filter) = &candidates { - query_builder.candidates(filter) - } else { - &query_builder - }; - query_builder.by_vector(&rtxn, query.as_ref())? - } - Err(arroy::Error::MissingMetadata(_)) => vec![], // this just means the db is empty - Err(error) => return Err(error.into()), - }; - Ok(vectors - .into_iter() - // for arroy, distance = (1.0 - score) / 2.0, where score is cosine: [-1, 1] - .map(|(id, distance)| (Self::into_entity_ref(id), 1.0 - 2.0 * distance))) - } -} - -#[derive(Clone)] -pub(crate) struct VectorDb { - pub(crate) vectors: ArroyDatabase, - pub(crate) env: heed::Env, - pub(crate) _tempdir: Option>, // do we really need this, is the file open not enough - pub(crate) dimensions: OnceLock, -} - -impl VectorDb { - /// Insert a collection of vectors into the database. - /// - /// # Arguments - /// - /// * `embeddings` - A vector of tuples containing the IDs and embeddings to insert. - pub(super) fn insert_vectors(&self, embeddings: Vec<(usize, Embedding)>) -> GraphResult<()> { - if embeddings.is_empty() { - return Ok(()); - } - - let mut wtxn = self.env.write_txn()?; - - let dimensions = self.dimensions.get_or_init(|| embeddings[0].1.len()); - let writer = Writer::::new(self.vectors, 0, *dimensions); - - for (id, embedding) in embeddings { - writer.add_item(&mut wtxn, id as u32, embedding.as_ref())?; - } - - let mut rng = StdRng::from_entropy(); - writer.builder(&mut rng).build(&mut wtxn)?; - - wtxn.commit()?; - Ok(()) - } - - pub(super) fn get_id(&self, id: u32) -> GraphResult> { - let rtxn = self.env.read_txn()?; - let reader = Reader::open(&rtxn, 0, self.vectors)?; - let vector = reader.item_vector(&rtxn, id)?; - Ok(vector.map(|vector| vector.into())) - } - - fn from_path(path: &Path) -> GraphResult { - let env = open_env(path)?; - let rtxn = env.read_txn()?; - let db: ArroyDatabase = env - .open_database(&rtxn, None)? - .ok_or_else(|| GraphError::VectorDbDoesntExist(path.display().to_string()))?; - let first_vector = Reader::open(&rtxn, 0, db) - .ok() - .and_then(|reader| reader.iter(&rtxn).ok()?.next()?.ok()); - let dimensions = if let Some((_, vector)) = first_vector { - vector.len().into() - } else { - OnceLock::new() - }; - rtxn.commit()?; - Ok(Self { - vectors: db, - env, - _tempdir: None, - dimensions, - }) - } - - async fn from_vectors( - vectors: impl futures_util::Stream> + Send, - path: Option, - ) -> GraphResult { - let (env, tempdir) = match path { - Some(path) => { - std::fs::create_dir_all(&path)?; - (open_env(&path)?, None) - } - None => { - let tempdir = tempfile::tempdir()?; - (open_env(tempdir.path())?, Some(tempdir.into())) - } - }; - - let mut wtxn = env.write_txn()?; - let db: ArroyDatabase = env.create_database(&mut wtxn, None)?; - - futures_util::pin_mut!(vectors); - let first_vector = vectors.next().await; - let dimensions = if let Some(Ok((first_id, first_vector))) = first_vector { - let dimensions = first_vector.len(); - let writer = Writer::::new(db, 0, dimensions); - - writer.add_item(&mut wtxn, first_id, &first_vector)?; - while let Some(result) = vectors.next().await { - let (id, vector) = result?; - writer.add_item(&mut wtxn, id, &vector)?; - } - - // TODO: review this -> You can specify the number of trees to use or specify None. - let mut rng = StdRng::seed_from_u64(42); - writer.builder(&mut rng).build(&mut wtxn)?; - dimensions.into() - } else { - OnceLock::new() - }; - - wtxn.commit()?; - - Ok(Self { - vectors: db, - env, - _tempdir: tempdir, - dimensions, - }) - } -} - -fn open_env(path: &Path) -> heed::Result { - unsafe { - heed::EnvOpenOptions::new() - .map_size(LMDB_MAX_SIZE) - .open(path) - } -} diff --git a/raphtory/src/vectors/embeddings.rs b/raphtory/src/vectors/embeddings.rs index cd9fa795d8..58d6ff7051 100644 --- a/raphtory/src/vectors/embeddings.rs +++ b/raphtory/src/vectors/embeddings.rs @@ -1,70 +1,116 @@ -use super::cache::VectorCache; -use crate::{errors::GraphResult, vectors::Embedding}; +use std::{ + hash::{Hash, Hasher}, + ops::Deref, + pin::Pin, + sync::Arc, +}; + use async_openai::{ types::{CreateEmbeddingRequest, EmbeddingInput}, Client, }; use futures_util::{future::BoxFuture, Stream, StreamExt}; -use std::{future::Future, ops::Deref, pin::Pin, sync::Arc}; -use tracing::info; +use serde::{Deserialize, Serialize}; + +use crate::{ + errors::GraphResult, + vectors::{cache::CachedEmbeddingModel, storage::OpenAIEmbeddings, Embedding}, +}; const CHUNK_SIZE: usize = 1000; -pub(crate) type EmbeddingError = Box; +// this is an Arc to allow cloning even if the underlying type doesn't allow it +// the underlying type depends on the embedding model implementation in use +pub(crate) type EmbeddingError = Arc; pub type EmbeddingResult = Result; -pub trait EmbeddingFunction: Send + Sync { - fn call(&self, texts: Vec) -> BoxFuture<'static, EmbeddingResult>>; +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Hash)] +pub enum ModelConfig { + OpenAI(OpenAIEmbeddings), } -impl EmbeddingFunction for T -where - T: Fn(Vec) -> F + Send + Sync, - F: Future>> + Send + 'static, -{ - fn call(&self, texts: Vec) -> BoxFuture<'static, EmbeddingResult>> { - Box::pin(self(texts)) +impl ModelConfig { + pub(super) async fn call(&self, texts: Vec) -> EmbeddingResult> { + match self { + ModelConfig::OpenAI(model) => model.call(texts).await, + } } } -impl EmbeddingFunction for Arc { - fn call(&self, texts: Vec) -> BoxFuture<'static, EmbeddingResult>> { - Box::pin(self.deref().call(texts)) +#[derive(Serialize, Deserialize, PartialEq, Clone, Debug)] +pub struct EmbeddingModel { + pub(super) model: ModelConfig, + pub(super) sample: Embedding, +} + +impl Hash for EmbeddingModel { + fn hash(&self, state: &mut H) { + self.model.hash(state); + for &x in self.sample.iter() { + // This way, embeddings with the same values (including +0.0 vs -0.0, different NaNs) hash consistently. + x.to_bits().hash(state); + } } } -pub async fn openai_embedding(texts: Vec) -> EmbeddingResult> { - info!("computing embeddings for {} texts", texts.len()); - let client = Client::new(); - let request = CreateEmbeddingRequest { - model: "text-embedding-ada-002".to_owned(), - input: EmbeddingInput::StringArray(texts), - user: None, - encoding_format: None, - dimensions: None, - }; - let response = client.embeddings().create(request).await?; - info!("Generated embeddings successfully"); - Ok(response - .data - .into_iter() - .map(|e| e.embedding.into()) - .collect()) +// this is just so that we can call model.call() on an embeddig model +impl Deref for EmbeddingModel { + type Target = ModelConfig; + fn deref(&self) -> &Self::Target { + &self.model + } +} + +impl EmbeddingModel { + pub(super) fn call( + &self, + texts: Vec, + ) -> BoxFuture<'static, EmbeddingResult>> { + match &self.model { + ModelConfig::OpenAI(embeddings) => embeddings.call(texts), + } + } +} + +impl OpenAIEmbeddings { + fn call(&self, texts: Vec) -> BoxFuture<'static, EmbeddingResult>> { + let client = Client::with_config(self.resolve_config()); + let request = CreateEmbeddingRequest { + model: self.model.clone(), + input: EmbeddingInput::StringArray(texts), + user: None, + encoding_format: None, + dimensions: None, + }; + + Box::pin(async move { + let response = client + .embeddings() + .create(request) + .await + .map_err(|err| Arc::new(err) as Arc)?; + Ok(response + .data + .into_iter() + .map(|e| e.embedding.into()) + .collect()) + }) + } } pub(super) fn compute_embeddings<'a, I>( documents: I, - cache: &'a VectorCache, -) -> impl Stream> + Send + 'a + model: &'a CachedEmbeddingModel, +) -> impl Stream> + Send + 'a where - I: Iterator + Send + 'a, + I: Iterator + Send + 'a, { futures_util::stream::iter(documents) .chunks(CHUNK_SIZE) .then(|chunk| async { let texts = chunk.iter().map(|(_, text)| text.clone()).collect(); - let stream: Pin> + Send>> = - match cache.get_embeddings(texts).await { + let stream: Pin> + Send>> = + match model.get_embeddings(texts).await { Ok(embeddings) => { let embedded: Vec<_> = chunk .into_iter() diff --git a/raphtory/src/vectors/entity_db.rs b/raphtory/src/vectors/entity_db.rs new file mode 100644 index 0000000000..0a5e7425c3 --- /dev/null +++ b/raphtory/src/vectors/entity_db.rs @@ -0,0 +1,150 @@ +use std::{collections::HashSet, ops::Deref, path::Path}; + +use arroy::Reader; +use futures_util::StreamExt; + +use super::{ + entity_ref::{EntityRef, IntoDbId}, + Embedding, +}; +use crate::{ + db::api::view::StaticGraphViewOps, errors::GraphResult, prelude::GraphViewOps, + vectors::vector_collection::VectorCollection, +}; + +const LMDB_MAX_SIZE: usize = 1024 * 1024 * 1024 * 1024; // 1TB + +#[derive(Clone)] +pub(super) struct NodeDb(pub(super) D); + +impl Deref for NodeDb { + type Target = D; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl EntityDb for NodeDb { + type VectorDb = D; + + fn get_db(&self) -> &Self::VectorDb { + &self.0 + } + + fn into_entity_ref(id: u64) -> EntityRef { + EntityRef::Node(id) + } + + fn view_has_entity(entity: &EntityRef, view: &G) -> bool { + view.has_node(entity.as_node_gid(view).unwrap()) + } + + fn all_valid_entities(view: G) -> impl Iterator + Send { + view.nodes().into_iter().map(|node| node.into_db_id()) + } +} + +#[derive(Clone)] +pub(super) struct EdgeDb(pub(super) D); + +impl Deref for EdgeDb { + type Target = D; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl EntityDb for EdgeDb { + type VectorDb = D; + + fn get_db(&self) -> &Self::VectorDb { + &self.0 + } + + fn into_entity_ref(id: u64) -> EntityRef { + EntityRef::Edge(id) + } + + fn view_has_entity(entity: &EntityRef, view: &G) -> bool { + let (src, dst) = entity.as_edge_gids(view).unwrap(); + view.has_edge(src, dst) // TODO: there should be a quicker way of chking of some edge exist by pid + } + + fn all_valid_entities(view: G) -> impl Iterator + Send { + view.edges().into_iter().map(|edge| edge.into_db_id()) + } +} + +pub(super) trait EntityDb: Sized { + type VectorDb: VectorCollection; + fn get_db(&self) -> &Self::VectorDb; + fn into_entity_ref(id: u64) -> EntityRef; + fn view_has_entity(entity: &EntityRef, view: &G) -> bool; + fn all_valid_entities( + view: G, + ) -> impl Iterator + Send + 'static; + + // async fn from_vectors( + // vectors: impl futures_util::Stream> + Send, + // path: Option, + // ) -> GraphResult { + // let db = VectorDb::from_vectors(vectors, path).await?; + // Ok(Self::from_vector_db(db)) + // } + + // fn from_path(path: &Path) -> GraphResult { + // VectorDb::from_path(path).map(Self::from_vector_db) + // } + + // fn from_path(path: &Path) -> GraphResult { + // todo!() // TODO: remove this function, only here for compilation + // } + + // maybe use this if the parallel version doesn't really improve things + async fn insert_vector_stream( + &self, + vectors: impl futures_util::Stream> + Send, + ) -> GraphResult<()> { + futures_util::pin_mut!(vectors); + + while let Some(result) = vectors.as_mut().chunks(1000).next().await { + let vector_result: Vec<(u64, Embedding)> = result + .into_iter() + .map(|result| result.unwrap()) + .map(|(id, vector)| (id, vector)) + .collect(); + let ids = vector_result.iter().map(|(id, _)| *id).collect(); + let vectors = vector_result.into_iter().map(|(_, vector)| vector); + self.get_db().insert_vectors(ids, vectors).await.unwrap() + } + Ok(()) + } + + // TODO: return here the cosine instead of the distance? + async fn top_k( + &self, + query: &Embedding, + k: usize, + view: Option, + filter: Option>, + ) -> GraphResult + Send> { + let candidates: Option + Send>> = match (view, filter) { + (None, None) => None, + (view, Some(filter)) => Some(Box::new( + filter + .into_iter() + .filter(move |entity| { + view.as_ref() + .is_none_or(|view| Self::view_has_entity(entity, view)) + }) + .map(|entity| entity.id()), + )), + (Some(view), None) => Some(Box::new(Self::all_valid_entities(view))), + }; + Ok(self + .get_db() + .top_k_with_distances(query, k, candidates) + .await? + .map(|(id, distance)| (Self::into_entity_ref(id), distance))) + } +} diff --git a/raphtory/src/vectors/entity_ref.rs b/raphtory/src/vectors/entity_ref.rs index e953c9d8a6..28797e0dc4 100644 --- a/raphtory/src/vectors/entity_ref.rs +++ b/raphtory/src/vectors/entity_ref.rs @@ -11,8 +11,8 @@ use raphtory_storage::graph::edges::edge_storage_ops::EdgeStorageOps; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub(super) enum EntityRef { - Node(u32), - Edge(u32), + Node(u64), + Edge(u64), } impl From> for EntityRef { @@ -28,7 +28,7 @@ impl From> for EntityRef { } impl EntityRef { - pub(super) fn id(&self) -> u32 { + pub(super) fn id(&self) -> u64 { match self { EntityRef::Node(id) => *id, EntityRef::Edge(id) => *id, @@ -79,18 +79,19 @@ impl EntityRef { } } +// TODO: make sure I use this everywhere pub(super) trait IntoDbId { - fn into_db_id(self) -> u32; + fn into_db_id(self) -> u64; } impl IntoDbId for NodeView<'static, G> { - fn into_db_id(self) -> u32 { - self.node.index() as u32 + fn into_db_id(self) -> u64 { + self.node.index() as u64 } } impl IntoDbId for EdgeView { - fn into_db_id(self) -> u32 { - self.edge.pid().0 as u32 + fn into_db_id(self) -> u64 { + self.edge.pid().0 as u64 } } diff --git a/raphtory/src/vectors/mod.rs b/raphtory/src/vectors/mod.rs index f0bfacaee8..17457b3407 100644 --- a/raphtory/src/vectors/mod.rs +++ b/raphtory/src/vectors/mod.rs @@ -5,14 +5,16 @@ use crate::db::{ use std::sync::Arc; pub mod cache; +pub mod custom; pub mod datetimeformat; -mod db; pub mod embeddings; +mod entity_db; mod entity_ref; pub mod splitting; -mod storage; +pub mod storage; // TODO: re-export Embeddings instead of making this public pub mod template; mod utils; +mod vector_collection; pub mod vector_selection; pub mod vectorisable; pub mod vectorised_graph; @@ -34,30 +36,60 @@ pub struct Document { #[cfg(test)] mod vector_tests { - use super::{embeddings::EmbeddingResult, *}; + use super::embeddings::EmbeddingResult; use crate::{ prelude::*, - vectors::{cache::VectorCache, embeddings::openai_embedding, vectorisable::Vectorisable}, + vectors::{ + cache::{CachedEmbeddingModel, VectorCache}, + custom::serve_custom_embedding, + embeddings::EmbeddingModel, + storage::OpenAIEmbeddings, + template::DocumentTemplate, + vectorisable::Vectorisable, + Embedding, + }, }; use itertools::Itertools; use raphtory_api::core::entities::properties::prop::Prop; - use std::{fs::remove_dir_all, path::PathBuf}; - use template::DocumentTemplate; + use std::{fs::remove_dir_all, path::PathBuf, sync::Arc, time::Duration}; use tokio; const NO_PROPS: [(&str, Prop); 0] = []; - async fn fake_embedding(texts: Vec) -> EmbeddingResult> { - Ok(texts - .into_iter() - .map(|_| vec![1.0, 0.0, 0.0].into()) - .collect_vec()) + fn fake_embedding(text: &str) -> Vec { + println!("Creating fake embedding for {text}"); + vec![1.0, 0.0, 0.0] } - async fn panicking_embedding(_texts: Vec) -> EmbeddingResult> { + async fn use_fake_model() -> CachedEmbeddingModel { + tokio::spawn(async { + let running = serve_custom_embedding("0.0.0.0:3070", fake_embedding).await; + running.wait().await; + }); + tokio::time::sleep(Duration::from_secs(1)).await; + VectorCache::in_memory() + .openai(OpenAIEmbeddings { + api_base: Some("http://localhost:3070".to_owned()), // FIXME: the fact that I need to write /v1 is not ideal? + ..Default::default() + }) + .await + .unwrap() + } + + fn panicking_embedding(_text: &str) -> Vec { panic!("embedding function was called") } + async fn use_panicking_model_config() -> OpenAIEmbeddings { + tokio::spawn(async { + serve_custom_embedding("0.0.0.0:3071", panicking_embedding).await; + }); + OpenAIEmbeddings { + api_base: Some("http://localhost:3071/v1".to_owned()), // FIXME: the fact that I need to write /v1 is not ideal? + ..Default::default() + } + } + fn custom_template() -> DocumentTemplate { DocumentTemplate { node_template: Some( @@ -70,49 +102,63 @@ mod vector_tests { } } - #[tokio::test] - async fn test_embedding_cache() { - let template = custom_template(); - let g = Graph::new(); - g.add_node(0, "test", NO_PROPS, None).unwrap(); - - let path = PathBuf::from("/tmp/raphtory/very/deep/path/embedding-cache-test"); - let _ = remove_dir_all(&path); - - // the following creates the embeddings, and store them on the cache - { - let cache = VectorCache::on_disk(&path, fake_embedding).await.unwrap(); - g.vectorise(cache, template.clone(), None, false) - .await - .unwrap(); - } // the cache gets dropped here and the heed env released - - // the following uses the embeddings from the cache, so it doesn't call the panicking - // embedding, which would make the test fail - let cache = VectorCache::on_disk(&path, panicking_embedding) - .await - .unwrap(); - g.vectorise(cache, template, None, false).await.unwrap(); - } + // TODO: bring this back + // the point of this test was double-checking when the same query comes in again, + // the cached result is used intead of calling the model again + // I need to find an alternative way + // might be having an embedding model that always returns something to "raphtory" + // but only returns some answer the first time it gets some text, errors out the second time + // Another option might be having a model that returns always the same embedding for "raphtory" + // but increments for the rest of the texts + // can then validate the answer didn't change, which means the cache was used + // #[tokio::test] + // async fn test_embedding_cache() { + // let template = custom_template(); + // let g = Graph::new(); + // g.add_node(0, "test", NO_PROPS, None).unwrap(); + + // let path = PathBuf::from("/tmp/raphtory/very/deep/path/embedding-cache-test"); + // let _ = remove_dir_all(&path); + // let config = use_panicking_model_config().await; + + // let cache = VectorCache::on_disk(&path).await.unwrap(); + // let model = cache.openai(config).await.unwrap(); + // g.vectorise(model, template.clone(), None, false) + // .await + // .unwrap(); + + // // the following uses the embeddings from the cache, so it doesn't call the panicking + // // embedding, which would make the test fail + // let cache = VectorCache::on_disk(&path).await.unwrap(); + // let model = cache + // .cache_model( + // EmbeddingModel::custom(panicking_embedding) + // .sampled() + // .await + // .unwrap(), + // ) + // .await + // .unwrap(); + // g.vectorise(model, template, None, false).await.unwrap(); + // } #[tokio::test] async fn test_empty_graph() { let template = custom_template(); let g = Graph::new(); - let cache = VectorCache::in_memory(fake_embedding); - let vectors = g.vectorise(cache, template, None, false).await.unwrap(); - let embedding: Embedding = fake_embedding(vec!["whatever".to_owned()]) - .await - .unwrap() - .remove(0); + let model = use_fake_model().await; + let vectors = g.vectorise(model, template, None, false).await.unwrap(); + let embedding = vectors.embed_text("whatever").await.unwrap(); let mut selection = vectors .entities_by_similarity(&embedding, 10, None) + .await .unwrap(); selection .expand_entities_by_similarity(&embedding, 10, None) + .await .unwrap(); selection.expand(2, None); - let docs = selection.get_documents().unwrap(); + let docs = selection.get_documents().await.unwrap(); assert!(docs.is_empty()) } @@ -186,47 +232,60 @@ mod vector_tests { .unwrap(); dotenv::dotenv().ok(); - let cache = VectorCache::in_memory(openai_embedding); - let vectors = g - .vectorise(cache.clone(), template, None, false) + + let model = VectorCache::in_memory() + .openai(OpenAIEmbeddings { + model: "text-embedding-3-small".to_owned(), + ..Default::default() + }) .await .unwrap(); + let vectors = g.vectorise(model, template, None, false).await.unwrap(); + let query = "Find a magician".to_owned(); - let embedding = cache.get_single(query).await.unwrap(); + let embedding = vectors.embed_text(query).await.unwrap(); let docs = vectors .nodes_by_similarity(&embedding, 1, None) + .await .unwrap() .get_documents() + .await .unwrap(); // TODO: use the ids instead in all of these cases assert!(docs[0].content.contains("Gandalf is a wizard")); let query = "Find a young person".to_owned(); - let embedding = cache.get_single(query).await.unwrap(); + let embedding = vectors.embed_text(query).await.unwrap(); let docs = vectors .nodes_by_similarity(&embedding, 1, None) + .await .unwrap() .get_documents() + .await .unwrap(); assert!(docs[0].content.contains("Frodo is a hobbit")); // this fails when using gte-small // with window! let query = "Find a young person".to_owned(); - let embedding = cache.get_single(query).await.unwrap(); + let embedding = vectors.embed_text(query).await.unwrap(); let docs = vectors .nodes_by_similarity(&embedding, 1, Some((1, 3))) + .await .unwrap() .get_documents() + .await .unwrap(); assert!(!docs[0].content.contains("Frodo is a hobbit")); // this fails when using gte-small let query = "Has anyone appeared with anyone else?".to_owned(); - let embedding = cache.get_single(query).await.unwrap(); + let embedding = vectors.embed_text(query).await.unwrap(); let docs = vectors .edges_by_similarity(&embedding, 1, None) + .await .unwrap() .get_documents() + .await .unwrap(); assert!(docs[0].content.contains("Frodo appeared with Gandalf")); } diff --git a/raphtory/src/vectors/storage.rs b/raphtory/src/vectors/storage.rs index f32e62bd59..a36946a59a 100644 --- a/raphtory/src/vectors/storage.rs +++ b/raphtory/src/vectors/storage.rs @@ -1,22 +1,68 @@ use super::{ cache::VectorCache, - db::{EdgeDb, EntityDb, NodeDb}, + entity_db::{EdgeDb, NodeDb}, template::DocumentTemplate, vectorised_graph::VectorisedGraph, }; use crate::{ db::api::view::StaticGraphViewOps, errors::{GraphError, GraphResult}, + vectors::{ + embeddings::EmbeddingModel, + vector_collection::{lancedb::LanceDb, VectorCollectionFactory}, + }, }; +use async_openai::config::{OpenAIConfig, OPENAI_API_BASE}; use serde::{Deserialize, Serialize}; use std::{ fs::File, path::{Path, PathBuf}, + sync::Arc, }; -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash)] +pub struct OpenAIEmbeddings { + pub model: String, + pub api_base: Option, + pub api_key_env: Option, + pub org_id: Option, + pub project_id: Option, +} + +impl Default for OpenAIEmbeddings { + fn default() -> Self { + Self { + model: "text-embedding-3-small".to_owned(), // TODO: double-check where am I really using this + api_base: Default::default(), + api_key_env: Default::default(), + org_id: Default::default(), + project_id: Default::default(), + } + } +} + +impl OpenAIEmbeddings { + pub(super) fn resolve_config(&self) -> OpenAIConfig { + let api_key_env = self + .api_key_env + .clone() + .unwrap_or("OPENAI_API_KEY".to_owned()); + let api_key = std::env::var(api_key_env).unwrap_or_default(); // TODO: raise error if api_key_env provided but not var defined + + let api_base = self.api_base.clone().unwrap_or(OPENAI_API_BASE.to_owned()); + + OpenAIConfig::new() + .with_api_base(api_base) + .with_api_key(api_key) + .with_org_id(self.org_id.clone().unwrap_or_default()) + .with_project_id(self.project_id.clone().unwrap_or_default()) + } +} + +#[derive(Serialize, Deserialize, Debug)] pub(super) struct VectorMeta { pub(super) template: DocumentTemplate, + pub(super) model: EmbeddingModel, } impl VectorMeta { @@ -25,20 +71,31 @@ impl VectorMeta { serde_json::to_writer(file, self)?; Ok(()) } + + pub(super) async fn read_from_path(path: &Path) -> GraphResult { + let meta_string = std::fs::read_to_string(path)?; + let meta: VectorMeta = serde_json::from_str(&meta_string)?; + Ok(meta) + } } impl VectorisedGraph { - pub fn read_from_path(path: &Path, graph: G, cache: VectorCache) -> GraphResult { - let meta_string = std::fs::read_to_string(meta_path(path))?; - let meta: VectorMeta = serde_json::from_str(&meta_string)?; + pub async fn read_from_path(path: &Path, graph: G, cache: &VectorCache) -> GraphResult { + let meta = VectorMeta::read_from_path(&meta_path(path)).await?; + + let factory = LanceDb; + let db_path = Arc::new(db_path(path)); + let dim = meta.model.sample.len(); + // TODO: put table names in common place? maybe some trait function for EntityDb that returns it + let node_db = NodeDb(factory.from_path(db_path.clone(), "nodes", dim).await?); + let edge_db = EdgeDb(factory.from_path(db_path, "edges", dim).await?); - let node_db = NodeDb::from_path(&node_vectors_path(path))?; - let edge_db = EdgeDb::from_path(&edge_vectors_path(path))?; + let model = cache.validate_and_cache_model(meta.model).await?.into(); Ok(VectorisedGraph { template: meta.template, source_graph: graph, - cache, + model, node_db, edge_db, }) @@ -49,10 +106,31 @@ fn meta_path(path: &Path) -> PathBuf { path.join("meta") } -pub(super) fn node_vectors_path(path: &Path) -> PathBuf { - path.join("nodes") +pub(super) fn db_path(path: &Path) -> PathBuf { + path.join("db") } -pub(super) fn edge_vectors_path(path: &Path) -> PathBuf { - path.join("edges") +#[cfg(test)] +mod vector_storage_tests { + // #[test] + // fn test_vector_meta() { + // let meta = VectorMeta { + // template: DocumentTemplate::default(), + // sample: vec![1.0].into(), + // embeddings: SampledModel::OpenAI(StoredOpenAIEmbeddings { + // model: "text-embedding-3-small".to_owned(), + // config: Default::default(), + // }), + // }; + // let serialised = serde_json::to_string_pretty(&meta).unwrap(); + // println!("{serialised}"); + + // if let SampledModel::OpenAI(embeddings) = meta.embeddings { + // let embeddings: OpenAIEmbeddings = embeddings.try_into().unwrap(); + // } else { + // panic!("should not be here"); + // } + + // // panic!("here"); + // } } diff --git a/raphtory/src/vectors/utils.rs b/raphtory/src/vectors/utils.rs index 7367486d47..50f19f0e4e 100644 --- a/raphtory/src/vectors/utils.rs +++ b/raphtory/src/vectors/utils.rs @@ -11,7 +11,7 @@ where T: 'static, { elements - .sorted_by(|(_, score1), (_, score2)| score2.partial_cmp(score1).unwrap()) // desc ordering, thus the invertion + .sorted_by(|(_, distance1), (_, distance2)| distance1.partial_cmp(distance2).unwrap()) // asc ordering .take(k) } diff --git a/raphtory/src/vectors/vector_collection/lancedb.rs b/raphtory/src/vectors/vector_collection/lancedb.rs new file mode 100644 index 0000000000..faacfdc69e --- /dev/null +++ b/raphtory/src/vectors/vector_collection/lancedb.rs @@ -0,0 +1,259 @@ +use std::{ops::Deref, path::Path, sync::Arc}; + +use arrow_array::{ + types::{Float32Type, UInt64Type}, + ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, PrimitiveArray, RecordBatch, + RecordBatchIterator, UInt64Array, +}; +use futures_util::TryStreamExt; +use itertools::Itertools; +use lancedb::{ + arrow::arrow_schema::{DataType, Field, Schema}, + index::{ + vector::{IvfFlatIndexBuilder, IvfPqIndexBuilder}, + Index, + }, + query::{ExecutableQuery, QueryBase}, + Connection, DistanceType, Table, +}; + +use crate::{ + errors::GraphResult, + vectors::{ + vector_collection::{CollectionPath, VectorCollection, VectorCollectionFactory}, + Embedding, + }, +}; + +const VECTOR_COL_NAME: &str = "vector"; + +pub(crate) struct LanceDb; + +impl VectorCollectionFactory for LanceDb { + type DbType = LanceDbCollection; + + async fn new_collection( + &self, + path: CollectionPath, + name: &str, + dim: usize, + ) -> GraphResult { + let db = connect(path.deref().as_ref()).await; + let schema = get_schema(dim); + let table = db.create_empty_table(name, schema).execute().await.unwrap(); // TODO: remove unwrap + Ok(Self::DbType { table, dim, path }) + } + + async fn from_path( + &self, + path: CollectionPath, + name: &str, + dim: usize, + ) -> GraphResult { + let db = connect(path.deref().as_ref()).await; + let table = db.open_table(name).execute().await.unwrap(); // TODO: remove unwrap + + // FIXME: if dim is wrong, bail from here with something like the following!!! + // let vector_field = table + // .schema() + // .await + // .unwrap() + // .field_with_name("vectors") + // .unwrap(); // and get the array size + Ok(Self::DbType { table, dim, path }) + } +} + +#[derive(Clone)] +pub(crate) struct LanceDbCollection { + table: Table, // maybe this should be built in every call to the collection from path? + dim: usize, + path: CollectionPath, // this is only necessary to avoid dropping temp dirs +} + +impl LanceDbCollection { + fn schema(&self) -> Arc { + get_schema(self.dim) + } +} + +impl VectorCollection for LanceDbCollection { + async fn insert_vectors( + &self, + ids: Vec, + vectors: impl IntoIterator, + ) -> crate::errors::GraphResult<()> { + let size = ids.len(); // TODO: remove? don't remember what was this for + let batches = RecordBatchIterator::new( + vec![RecordBatch::try_new( + self.schema(), + vec![ + Arc::new(UInt64Array::from(ids)), + Arc::new( + FixedSizeListArray::from_iter_primitive::( + vectors.into_iter().map(|vector| { + Some( + vector + .into_iter() + .map(|value| Some(*value)) + .collect::>(), // TODO: ideally avoid this collect + ) + }), + self.dim as i32, + ), + ), + ], + )], + self.schema(), + ); + self.table.add(batches).execute().await.unwrap(); // TODO: remove unwrap + Ok(()) + } + + async fn get_id(&self, id: u64) -> GraphResult> { + let query = self.table.query().only_if(format!("id = {id}")); + let result = query.execute().await.unwrap(); + let batches: Vec<_> = result.try_collect().await.unwrap(); + if let Some(batch) = batches.get(0) { + let col: &ArrayRef = batch.column_by_name("vector").unwrap(); + let array_list = col.as_any().downcast_ref::(); + let array = array_list.unwrap().value(0); + let downcasted = array.as_any().downcast_ref::>(); + let vector = downcasted.unwrap().values().iter().copied().collect(); + Ok(Some(vector)) + } else { + Ok(None) + } + } + + // TODO: make this return everything, the embedding itself, so that we don't + // need to go back to the vector collection to retrieve the embedding by id + // with get_id() + // I need get_id anyways for entities that are forced into the selection + async fn top_k_with_distances( + &self, + query: &crate::vectors::Embedding, + k: usize, + candidates: Option>, + ) -> GraphResult + Send> { + // TODO: return IntoIter? + let vector_query = self.table.query().nearest_to(query.as_ref()).unwrap(); + let limited = vector_query.limit(k); + let filtered = if let Some(candidates) = candidates { + let mut iter = candidates.into_iter().peekable(); + if let Some(_) = iter.peek() { + let id_list = iter.map(|id| id.to_string()).join(","); + limited.only_if(format!("id IN ({id_list})")) + } else { + limited.only_if("false") // this is a bit hacky, maybe the top layer shouldnt even call this one if the candidates list is empty + } + } else { + limited + }; + let stream = filtered.execute().await.unwrap(); + let result = stream.try_collect::>().await.unwrap(); + + let downcasted = result.into_iter().flat_map(|record| { + // TODO: merge both things + let ids = record + .column_by_name("id") + .unwrap() + .as_any() + .downcast_ref::>() + .unwrap() + .values() + .iter() + .copied(); + let scores = record + .column_by_name("_distance") + .unwrap() + .as_any() + .downcast_ref::>() + .unwrap() + .values() + .iter() + .copied(); + // TODO: try to avoid colect maybe using record.columns() instead of getting them independently + ids.zip(scores).collect::>() + }); + Ok(downcasted) + } + + async fn create_index(&self) { + let count = self.table.count_rows(None).await.unwrap(); // FIXME: remove unwrap + if count > 0 { + // we check the count because indexing with no rows errors out + self.table + .create_index( + &[VECTOR_COL_NAME], + Index::IvfFlat( + IvfFlatIndexBuilder::default().distance_type(DistanceType::Cosine), + ), + // Index::IvfPq(IvfPqIndexBuilder::default().distance_type(DistanceType::Cosine)), // TODO: bring this back for over 256 rows, or a greater value + ) + // .create_index(&[VECTOR_COL_NAME], Index::Auto) + .execute() + .await + .unwrap() // FIXME: remove unwrap + } + // FIXME: what happens if the rows are added later on??? + } +} + +async fn connect(path: &Path) -> Connection { + let url = path.display().to_string(); + lancedb::connect(&url).execute().await.unwrap() // TODO: remove unwrap +} + +fn get_schema(dim: usize) -> Arc { + Arc::new(Schema::new(vec![ + Field::new("id", DataType::UInt64, false), + Field::new( + VECTOR_COL_NAME, + DataType::FixedSizeList( + Arc::new(Field::new("item", DataType::Float32, true)), + dim as i32, + ), + true, + ), + ])) +} + +#[cfg(test)] +mod lancedb_tests { + use std::sync::Arc; + + use crate::vectors::{ + vector_collection::{lancedb::LanceDb, VectorCollection, VectorCollectionFactory}, + Embedding, + }; + + #[tokio::test] + async fn test_search() { + let factory = LanceDb; + let tempdir = tempfile::tempdir().unwrap(); + let path = Arc::new(tempdir); + let collection = factory.new_collection(path, "vectors", 2).await.unwrap(); + let ids = vec![0, 1]; + let vectors: Vec = vec![vec![1.0, 0.0].into(), vec![0.0, 1.0].into()]; + collection + .insert_vectors(ids, vectors.into_iter()) + .await + .unwrap(); + let result = collection + .top_k_with_distances(&[1.0, 0.0].into(), 1, None::>) + .await + .unwrap() + .collect::>(); + assert_eq!(result.len(), 1); + assert_eq!(result[0], (0, 0.0)); + + let result = collection + .top_k_with_distances(&[1.0, 0.0].into(), 1, Some(vec![1])) + .await + .unwrap() + .collect::>(); + assert_eq!(result.len(), 1); + assert_eq!(result[0], (1, 2.0)); + } +} diff --git a/raphtory/src/vectors/vector_collection/milvus.rs b/raphtory/src/vectors/vector_collection/milvus.rs new file mode 100644 index 0000000000..16fa40122f --- /dev/null +++ b/raphtory/src/vectors/vector_collection/milvus.rs @@ -0,0 +1,182 @@ +// use std::{borrow::Cow, path::Path}; + +// use milvus::{ +// client::Client, +// collection::{Collection, SearchOption}, +// data::FieldColumn, +// index::{IndexParams, IndexType, MetricType}, +// proto::{common::ErrorCode, schema::DataType}, +// schema::{CollectionSchemaBuilder, FieldSchema}, +// value::{Value, ValueVec}, +// }; + +// use crate::{ +// errors::GraphResult, +// vectors::{ +// vector_collection::{VectorCollection, VectorCollectionFactory}, +// Embedding, +// }, +// }; + +// const VECTOR_FIELD_NAME: &'static str = "vector"; + +// pub struct Milvus { +// url: String, +// } + +// impl Milvus { +// pub fn new(url: String) -> Self { +// Self { url } +// } +// } + +// impl VectorCollectionFactory for Milvus { +// type DbType = MilvusDb; + +// async fn new_collection(&self, dim: usize) -> Self::DbType { +// let collection = format!( +// "raphtory_{}", +// uuid::Uuid::new_v4().to_string().replace("-", "_") +// ); +// let client = Client::new(self.url.to_owned()).await.unwrap(); +// let schema = CollectionSchemaBuilder::new(&collection, "") +// .add_field(FieldSchema::new_int64("id", "")) +// .add_field(FieldSchema::new_float_vector( +// VECTOR_FIELD_NAME, +// "", +// dim as i64, +// )) +// .set_primary_key("id") +// .unwrap() +// .build() +// .unwrap(); +// client.create_collection(schema, None).await.unwrap(); // TODO: maybe set up somew options such as number of shards? + +// Self::DbType { +// url: self.url.clone(), +// collection, +// dim, +// } +// } + +// fn from_path(&self, path: &Path) -> Self::DbType { +// todo!() +// } +// } + +// #[derive(Clone)] +// pub(super) struct MilvusDb { +// url: String, +// collection: String, +// dim: usize, +// } + +// impl MilvusDb { +// async fn collection(&self) -> Collection { +// let client = Client::new(self.url.to_owned()).await.unwrap(); +// client.get_collection(&self.collection).await.unwrap() +// } +// } + +// impl VectorCollection for MilvusDb { +// async fn insert_vectors(&self, embeddings: Vec<(usize, Embedding)>) -> GraphResult<()> { +// let ids = embeddings.iter().map(|(id, _)| *id as i64).collect(); +// let values = embeddings +// .iter() +// .flat_map(|(_, vector)| vector.iter()) +// .copied() +// .collect(); + +// let data = vec![ +// FieldColumn { +// name: "id".to_owned(), +// dtype: DataType::Int64, +// value: ValueVec::Long(ids), // the id !!!!!!!!!!!!!!!!, +// dim: 1, +// max_length: 1, +// }, +// FieldColumn { +// name: "vector".to_owned(), +// dtype: DataType::FloatVector, +// value: ValueVec::Float(values), +// dim: self.dim as i64, +// max_length: 1, +// }, +// ]; + +// let result = self.collection().await.insert(data, None).await.unwrap(); +// let success = result +// .status +// .is_some_and(|status| status.error_code() == ErrorCode::Success); +// assert!(success); +// Ok(()) +// } + +// // FIXME: simply get the embeddings out of the search query so that I don't need to come back for them by using this function +// async fn get_id(&self, id: u32) -> GraphResult> { +// let result = self +// .collection() +// .await +// .query::>(format!("id == {id}"), vec![]) +// .await +// .unwrap(); + +// if let Some(vector_col) = result.into_iter().find(|col| col.name == VECTOR_FIELD_NAME) { +// if let ValueVec::Float(values) = vector_col.value { +// Ok(Some(values.into())) +// } else { +// Ok(None) +// } +// } else { +// Ok(None) +// } +// } + +// async fn create_index(&self) { +// self.collection() +// .await +// .create_index( +// "vector", +// IndexParams::new( +// "vector".to_owned(), // TODO: make sure the namespace for the index name is not global +// IndexType::IvfSQ8, +// MetricType::IP, +// Default::default(), +// ), +// ) +// .await +// .unwrap(); +// } + +// async fn top_k(&self, query: &Embedding, k: usize) -> impl Iterator { +// let collection = self.collection().await; +// collection.load(1).await.unwrap(); +// let mut result = collection +// .search( +// vec![Value::FloatArray(Cow::Borrowed(query))], +// VECTOR_FIELD_NAME, +// k as i32, +// MetricType::IP, // FIXME: why can't I use cosine? +// vec!["id"], // TODO: remove this? +// &SearchOption::new(), +// ) +// .await +// .unwrap(); + +// let mut search_result = result.remove(0); // careful + +// let ids = search_result.field.remove(0); +// if let ValueVec::Long(ids) = ids.value { +// ids.into_iter().zip(search_result.score) +// } else { +// panic!("no ids to get"); +// } +// } + +// // fn from_path(path: &str) -> GraphResult { +// // Ok(Self { +// // url: "http://localhost:19530".to_owned(), +// // collection: path.to_owned(), +// // }) +// // } +// } diff --git a/raphtory/src/vectors/vector_collection/mod.rs b/raphtory/src/vectors/vector_collection/mod.rs new file mode 100644 index 0000000000..76448d7205 --- /dev/null +++ b/raphtory/src/vectors/vector_collection/mod.rs @@ -0,0 +1,40 @@ +use std::{path::Path, sync::Arc}; + +pub(crate) mod lancedb; +mod milvus; + +use crate::{errors::GraphResult, vectors::Embedding}; + +pub(super) type CollectionPath = Arc + Send + Sync>; + +pub(super) trait VectorCollectionFactory { + type DbType: VectorCollection; + async fn new_collection( + &self, + path: CollectionPath, + name: &str, + dim: usize, + ) -> GraphResult; + async fn from_path( + &self, + path: CollectionPath, + name: &str, + dim: usize, + ) -> GraphResult; +} + +pub(super) trait VectorCollection: Sized { + async fn insert_vectors( + &self, + ids: Vec, + vectors: impl Iterator, + ) -> crate::errors::GraphResult<()>; + async fn get_id(&self, id: u64) -> GraphResult>; + async fn top_k_with_distances( + &self, + query: &Embedding, + k: usize, + candidates: Option>, + ) -> GraphResult + Send>; + async fn create_index(&self); +} diff --git a/raphtory/src/vectors/vector_selection.rs b/raphtory/src/vectors/vector_selection.rs index dc2dbe8a94..81e3b8e6e2 100644 --- a/raphtory/src/vectors/vector_selection.rs +++ b/raphtory/src/vectors/vector_selection.rs @@ -1,5 +1,5 @@ use super::{ - db::EntityDb, + entity_db::EntityDb, entity_ref::EntityRef, utils::{apply_window, find_top_k}, vectorised_graph::VectorisedGraph, @@ -13,8 +13,10 @@ use crate::{ }, errors::GraphResult, prelude::{EdgeViewOps, NodeViewOps, *}, + vectors::vector_collection::VectorCollection, }; use either::Either; +use futures_util::future::join_all; use itertools::Itertools; use std::collections::HashSet; @@ -36,7 +38,7 @@ impl ExpansionPath { } #[derive(Debug, Clone)] -struct Selected(Vec<(EntityRef, f32)>); +struct Selected(Vec<(EntityRef, f32)>); // TODO: add here the text/embedding use to calculate the distance impl From> for Selected { fn from(value: Vec<(EntityRef, f32)>) -> Self { @@ -96,6 +98,11 @@ impl VectorSelection { } } + /// Returns the vectorised graph instance behind this selection + pub fn get_vectorised_graph(&self) -> &VectorisedGraph { + &self.graph + } + /// Return the nodes present in the current selection pub fn nodes(&self) -> Vec> { let g = &self.graph.source_graph; @@ -115,25 +122,29 @@ impl VectorSelection { } /// Return the documents present in the current selection - pub fn get_documents(&self) -> GraphResult>> { + pub async fn get_documents(&self) -> GraphResult>> { Ok(self - .get_documents_with_scores()? + .get_documents_with_distances() + .await? .into_iter() .map(|(doc, _)| doc) .collect()) } - /// Return the documents alongside their scores present in the current selection - pub fn get_documents_with_scores(&self) -> GraphResult, f32)>> { - self.selected - .iter() - .map(|(entity, score)| self.regenerate_doc(*entity).map(|doc| (doc, *score))) - .collect() + /// Return the documents alongside their distances present in the current selection + pub async fn get_documents_with_distances(&self) -> GraphResult, f32)>> { + let futures = self.selected.iter().map(|(entity, distance)| async { + self.regenerate_doc(*entity) + .await + .map(|doc| (doc, *distance)) + .unwrap() // TODO: REMOVE UNWRAP + }); + Ok(join_all(futures).await) } /// Add all `nodes` to the current selection /// - /// Documents added by this call are assumed to have a score of 0. + /// Documents added by this call are assumed to have a distance of 0. /// If any node id doesn't exist it will be ignored /// /// # Arguments @@ -147,7 +158,7 @@ impl VectorSelection { /// Add all `edges` to the current selection /// - /// Documents added by this call are assumed to have a score of 0. + /// Documents added by this call are assumed to have a distance of 0. /// If any edge doesn't exist it will be ignored /// /// # Arguments @@ -192,64 +203,66 @@ impl VectorSelection { } } - /// Add the top `limit` adjacent entities with higher score for `query` to the selection + /// Add to the selection the `limit` adjacent entities closest to `query` /// /// The expansion algorithm is a loop with two steps on each iteration: /// 1. All the entities 1 hop away of some of the entities included on the selection (and /// not already selected) are marked as candidates. - /// 2. Those candidates are added to the selection in descending order according to the - /// similarity score obtained against the `query`. + /// 2. Those candidates are added to the selection in ascending distance from `query`. /// /// This loops goes on until the number of new entities reaches a total of `limit` /// entities or until no more documents are available /// /// # Arguments - /// * query - the embedding to score against + /// * query - the embedding to calculate the distance from /// * window - the window where documents need to belong to in order to be considered - pub fn expand_entities_by_similarity( + pub async fn expand_entities_by_similarity( &mut self, query: &Embedding, limit: usize, window: Option<(i64, i64)>, ) -> GraphResult<()> { self.expand_by_similarity(query, limit, window, ExpansionPath::Both) + .await } - /// Add the top `limit` adjacent nodes with higher score for `query` to the selection + /// Add to the selection the `limit` adjacent nodes closest to `query` /// /// This function has the same behavior as expand_entities_by_similarity but it only considers nodes. /// /// # Arguments - /// * query - the embedding to score against + /// * query - the embedding to calculate the distance from /// * limit - the maximum number of new nodes to add /// * window - the window where documents need to belong to in order to be considered - pub fn expand_nodes_by_similarity( + pub async fn expand_nodes_by_similarity( &mut self, query: &Embedding, limit: usize, window: Option<(i64, i64)>, ) -> GraphResult<()> { self.expand_by_similarity(query, limit, window, ExpansionPath::Nodes) + .await } - /// Add the top `limit` adjacent edges with higher score for `query` to the selection + /// Add to the selection the `limit` adjacent edges closest to `query` /// /// This function has the same behavior as expand_entities_by_similarity but it only considers edges. /// /// # Arguments - /// * query - the embedding to score against + /// * query - the embedding to calculate the distance from /// * limit - the maximum number of new edges to add /// * window - the window where documents need to belong to in order to be considered - pub fn expand_edges_by_similarity( + pub async fn expand_edges_by_similarity( &mut self, query: &Embedding, limit: usize, window: Option<(i64, i64)>, ) -> GraphResult<()> { self.expand_by_similarity(query, limit, window, ExpansionPath::Edges) + .await } - fn expand_by_similarity( + async fn expand_by_similarity( &mut self, query: &Embedding, limit: usize, @@ -260,22 +273,27 @@ impl VectorSelection { let view = apply_window(g, window); let initial_size = self.selected.len(); - let nodes: Box> = if path.includes_nodes() { + let nodes: Box + Send> = if path.includes_nodes() { let jump = matches!(path, ExpansionPath::Nodes); let filter = self.get_nodes_in_context(window, jump); let nodes = self .graph .node_db - .top_k(query, limit, view.clone(), Some(filter))?; + .top_k(query, limit, view.clone(), Some(filter)) + .await?; Box::new(nodes) } else { Box::new(std::iter::empty()) }; - let edges: Box> = if path.includes_edges() { + let edges: Box + Send> = if path.includes_edges() { let jump = matches!(path, ExpansionPath::Edges); let filter = self.get_edges_in_context(window, jump); - let edges = self.graph.edge_db.top_k(query, limit, view, Some(filter))?; + let edges = self + .graph + .edge_db + .top_k(query, limit, view, Some(filter)) + .await?; Box::new(edges) } else { Box::new(std::iter::empty()) @@ -286,7 +304,7 @@ impl VectorSelection { let increment = self.selected.len() - initial_size; if increment > 0 && increment < limit { - self.expand_by_similarity(query, limit, window, path)? + Box::pin(self.expand_by_similarity(query, limit, window, path)).await? } Ok(()) } @@ -327,17 +345,17 @@ impl VectorSelection { .collect() } - fn regenerate_doc(&self, entity: EntityRef) -> GraphResult> { + async fn regenerate_doc(&self, entity: EntityRef) -> GraphResult> { match entity.resolve_entity(&self.graph.source_graph).unwrap() { Either::Left(node) => Ok(Document { entity: DocumentEntity::Node(node.clone()), content: self.graph.template.node(node).unwrap(), - embedding: self.graph.node_db.get_id(entity.id())?.unwrap(), + embedding: self.graph.node_db.get_id(entity.id()).await?.unwrap(), }), Either::Right(edge) => Ok(Document { entity: DocumentEntity::Edge(edge.clone()), content: self.graph.template.edge(edge).unwrap(), - embedding: self.graph.edge_db.get_id(entity.id())?.unwrap(), + embedding: self.graph.edge_db.get_id(entity.id()).await?.unwrap(), }), } } diff --git a/raphtory/src/vectors/vectorisable.rs b/raphtory/src/vectors/vectorisable.rs index 4389ea0d81..66e22a6b6f 100644 --- a/raphtory/src/vectors/vectorisable.rs +++ b/raphtory/src/vectors/vectorisable.rs @@ -1,19 +1,24 @@ use super::{ - cache::VectorCache, - db::{EdgeDb, NodeDb}, - storage::{edge_vectors_path, node_vectors_path, VectorMeta}, + entity_db::{EdgeDb, NodeDb}, + storage::{db_path, VectorMeta}, }; use crate::{ db::api::view::{internal::IntoDynamic, StaticGraphViewOps}, errors::GraphResult, prelude::GraphViewOps, vectors::{ - db::EntityDb, embeddings::compute_embeddings, template::DocumentTemplate, + cache::CachedEmbeddingModel, + embeddings::compute_embeddings, + entity_db::EntityDb, + template::DocumentTemplate, + vector_collection::{ + lancedb::LanceDb, CollectionPath, VectorCollection, VectorCollectionFactory, + }, vectorised_graph::VectorisedGraph, }, }; use async_trait::async_trait; -use std::path::Path; +use std::{path::Path, sync::Arc}; use tracing::info; #[async_trait] @@ -21,9 +26,7 @@ pub trait Vectorisable { /// Create a VectorisedGraph from the current graph /// /// # Arguments: - /// * embedding - the embedding function to translate documents to embeddings - /// * cache - the file to be used as a cache to avoid calling the embedding function - /// * overwrite_cache - whether or not to overwrite the cache if there are new embeddings + /// * model - the embedding function to translate documents to embeddings /// * template - the template to use to translate entities into documents /// * verbose - whether or not to print logs reporting the progress /// @@ -31,7 +34,7 @@ pub trait Vectorisable { /// A VectorisedGraph with all the documents/embeddings computed and with an initial empty selection async fn vectorise( &self, - cache: VectorCache, + model: CachedEmbeddingModel, template: DocumentTemplate, path: Option<&Path>, verbose: bool, @@ -42,21 +45,31 @@ pub trait Vectorisable { impl Vectorisable for G { async fn vectorise( &self, - cache: VectorCache, + model: CachedEmbeddingModel, template: DocumentTemplate, path: Option<&Path>, verbose: bool, ) -> GraphResult> { + let db_path = path + .map(|path| Ok::(Arc::new(db_path(path)))) + .unwrap_or_else(|| Ok(Arc::new(tempfile::tempdir()?)))?; + let factory = LanceDb; + let dim = model.get_sample().len(); if verbose { info!("computing embeddings for nodes"); } let nodes = self.nodes(); let node_docs = nodes .iter() - .filter_map(|node| template.node(node).map(|doc| (node.node.0 as u32, doc))); - let node_path = path.map(node_vectors_path); - let node_vectors = compute_embeddings(node_docs, &cache); - let node_db = NodeDb::from_vectors(node_vectors, node_path).await?; + .filter_map(|node| template.node(node).map(|doc| (node.node.0 as u64, doc))); + let node_vectors = compute_embeddings(node_docs, &model); + let node_db = NodeDb( + factory + .new_collection(db_path.clone(), "nodes", dim) + .await?, + ); + node_db.insert_vector_stream(node_vectors).await.unwrap(); + node_db.create_index().await; if verbose { info!("computing embeddings for edges"); @@ -65,25 +78,32 @@ impl Vectorisable for G { let edge_docs = edges.iter().filter_map(|edge| { template .edge(edge) - .map(|doc| (edge.edge.pid().0 as u32, doc)) + .map(|doc| (edge.edge.pid().0 as u64, doc)) }); - let edge_path = path.map(edge_vectors_path); - let edge_vectors = compute_embeddings(edge_docs, &cache); - let edge_db = EdgeDb::from_vectors(edge_vectors, edge_path).await?; + let edge_vectors = compute_embeddings(edge_docs, &model); + let edge_db = EdgeDb(factory.new_collection(db_path, "edges", dim).await?); + edge_db.insert_vector_stream(edge_vectors).await.unwrap(); + edge_db.create_index().await; if let Some(path) = path { let meta = VectorMeta { template: template.clone(), + model: model.model.clone(), }; meta.write_to_path(path)?; } + // FIXME: here tempdir will be dropped and so the vector db will be destroyed!!!!!!!!!!!!!! + Ok(VectorisedGraph { source_graph: self.clone(), template, - cache, + model, node_db, edge_db, }) } } + +////////////////////////////////////////////////////////////// +// TODO: need to implement an alternative that can be used from graphql diff --git a/raphtory/src/vectors/vectorised_graph.rs b/raphtory/src/vectors/vectorised_graph.rs index e8194d349f..60548c0be5 100644 --- a/raphtory/src/vectors/vectorised_graph.rs +++ b/raphtory/src/vectors/vectorised_graph.rs @@ -1,6 +1,5 @@ use super::{ - cache::VectorCache, - db::{EdgeDb, EntityDb, NodeDb}, + entity_db::{EdgeDb, EntityDb, NodeDb}, utils::apply_window, vector_selection::VectorSelection, }; @@ -9,16 +8,22 @@ use crate::{ db::api::view::{DynamicGraph, IntoDynamic, StaticGraphViewOps}, errors::GraphResult, prelude::GraphViewOps, - vectors::{template::DocumentTemplate, utils::find_top_k, Embedding}, + vectors::{ + cache::CachedEmbeddingModel, + template::DocumentTemplate, + utils::find_top_k, + vector_collection::{lancedb::LanceDbCollection, VectorCollection}, + Embedding, + }, }; #[derive(Clone)] pub struct VectorisedGraph { pub(crate) source_graph: G, pub(crate) template: DocumentTemplate, - pub(crate) cache: VectorCache, - pub(super) node_db: NodeDb, - pub(super) edge_db: EdgeDb, + pub(crate) model: CachedEmbeddingModel, + pub(super) node_db: NodeDb, + pub(super) edge_db: EdgeDb, } impl VectorisedGraph { @@ -26,7 +31,7 @@ impl VectorisedGraph { VectorisedGraph { source_graph: self.source_graph.clone().into_dynamic(), template: self.template, - cache: self.cache, + model: self.model, node_db: self.node_db, edge_db: self.edge_db, } @@ -40,22 +45,13 @@ impl VectorisedGraph { .iter() .filter_map(|node| { self.source_graph.node(node).and_then(|node| { - let id = node.node.index(); - + let id = node.node.index() as u64; self.template.node(node).map(|doc| (id, doc)) }) }) .unzip(); - - let vectors = self.cache.get_embeddings(docs).await?; - - self.node_db.insert_vectors( - ids.iter() - .zip(vectors) - .map(|(id, vector)| (*id, vector)) - .collect(), - )?; - + let vectors = self.model.get_embeddings(docs).await?; + self.node_db.insert_vectors(ids, vectors).await?; Ok(()) } @@ -65,22 +61,13 @@ impl VectorisedGraph { .iter() .filter_map(|(src, dst)| { self.source_graph.edge(src, dst).and_then(|edge| { - let id = edge.edge.pid().0; - + let id = edge.edge.pid().0 as u64; self.template.edge(edge).map(|doc| (id, doc)) }) }) .unzip(); - - let vectors = self.cache.get_embeddings(docs).await?; - - self.edge_db.insert_vectors( - ids.iter() - .zip(vectors) - .map(|(id, vector)| (*id, vector)) - .collect(), - )?; - + let vectors = self.model.get_embeddings(docs).await?; + self.edge_db.insert_vectors(ids, vectors).await?; Ok(()) } @@ -89,65 +76,70 @@ impl VectorisedGraph { VectorSelection::empty(self.clone()) } - /// Search the top scoring entities according to `query` with no more than `limit` entities + /// Search the closest entities to `query` with no more than `limit` entities /// /// # Arguments - /// * query - the embedding to score against + /// * query - the embedding to calculate the distance from /// * limit - the maximum number of entities to search /// * window - the window where documents need to belong to in order to be considered /// /// # Returns /// The vector selection resulting from the search - pub fn entities_by_similarity( + pub async fn entities_by_similarity( &self, query: &Embedding, limit: usize, window: Option<(i64, i64)>, ) -> GraphResult> { let view = apply_window(&self.source_graph, window); - let nodes = self.node_db.top_k(query, limit, view.clone(), None)?; - let edges = self.edge_db.top_k(query, limit, view, None)?; + let nodes = self.node_db.top_k(query, limit, view.clone(), None).await?; + let edges = self.edge_db.top_k(query, limit, view, None).await?; let docs = find_top_k(nodes.chain(edges), limit).collect(); Ok(VectorSelection::new(self.clone(), docs)) } - /// Search the top scoring nodes according to `query` with no more than `limit` nodes + /// Search the closest nodes to `query` with no more than `limit` nodes /// /// # Arguments - /// * query - the embedding to score against + /// * query - the embedding to calculate the distance from /// * limit - the maximum number of nodes to search /// * window - the window where documents need to belong to in order to be considered /// /// # Returns /// The vector selection resulting from the search - pub fn nodes_by_similarity( + pub async fn nodes_by_similarity( &self, query: &Embedding, limit: usize, window: Option<(i64, i64)>, ) -> GraphResult> { let view = apply_window(&self.source_graph, window); - let docs = self.node_db.top_k(query, limit, view, None)?; + let docs = self.node_db.top_k(query, limit, view, None).await?; Ok(VectorSelection::new(self.clone(), docs.collect())) } - /// Search the top scoring edges according to `query` with no more than `limit` edges + /// Search the closest edges to `query` with no more than `limit` edges /// /// # Arguments - /// * query - the embedding to score against + /// * query - the embedding to calculate the distance from /// * limit - the maximum number of edges to search /// * window - the window where documents need to belong to in order to be considered /// /// # Returns /// The vector selection resulting from the search - pub fn edges_by_similarity( + pub async fn edges_by_similarity( &self, query: &Embedding, limit: usize, window: Option<(i64, i64)>, ) -> GraphResult> { let view = apply_window(&self.source_graph, window); - let docs = self.edge_db.top_k(query, limit, view, None)?; + let docs = self.edge_db.top_k(query, limit, view, None).await?; Ok(VectorSelection::new(self.clone(), docs.collect())) } + + /// Returns the embedding for the given text using the embedding model setup for this graph + pub async fn embed_text>(&self, text: T) -> GraphResult { + self.model.get_single(text.into()).await + } }