From dc2fcd9b3b00e0162a5f738b7888ef8c4ee876b5 Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Sun, 22 Mar 2026 20:03:30 -0400 Subject: [PATCH 1/3] `Client::join` API Signed-off-by: Andrew Stein # Conflicts: # tools/test/results.tar.gz --- rust/perspective-client/perspective.proto | 10 + rust/perspective-client/src/rust/client.rs | 43 +- rust/perspective-js/src/rust/client.rs | 28 ++ .../perspective-js/src/ts/perspective.node.ts | 18 + .../test/js/joins/inner_join.spec.ts | 217 +++++++++ .../test/js/joins/inner_join_indexed.spec.ts | 248 +++++++++++ .../perspective/__init__.py | 5 + .../src/client/client_async.rs | 29 ++ .../src/client/client_sync.rs | 25 ++ .../cpp/perspective/src/cpp/gnode.cpp | 7 + .../cpp/perspective/src/cpp/server.cpp | 410 ++++++++++++++++++ .../src/include/perspective/gnode.h | 1 + .../src/include/perspective/server.h | 29 ++ 13 files changed, 1068 insertions(+), 2 deletions(-) create mode 100644 rust/perspective-js/test/js/joins/inner_join.spec.ts create mode 100644 rust/perspective-js/test/js/joins/inner_join_indexed.spec.ts diff --git a/rust/perspective-client/perspective.proto b/rust/perspective-client/perspective.proto index b324483e93..2f7291f1af 100644 --- a/rust/perspective-client/perspective.proto +++ b/rust/perspective-client/perspective.proto @@ -157,6 +157,7 @@ message Request { TableUpdateReq table_update_req = 33; ViewOnDeleteReq view_on_delete_req = 34; ViewRemoveDeleteReq view_remove_delete_req = 35; + MakeJoinTableReq make_join_table_req = 38; } } @@ -199,6 +200,7 @@ message Response { TableUpdateResp table_update_resp = 33; ViewOnDeleteResp view_on_delete_resp = 34; ViewRemoveDeleteResp view_remove_delete_resp = 35; + MakeJoinTableResp make_join_table_resp = 38; ServerError server_error = 50; } } @@ -335,6 +337,14 @@ message MakeTableReq { } message MakeTableResp {} +// `Client::join` — create a read-only table from an INNER JOIN of two tables. +message MakeJoinTableReq { + string left_table_id = 1; + string right_table_id = 2; + string on_column = 3; +} +message MakeJoinTableResp {} + // `Table::delete` message TableDeleteReq { bool is_immediate = 1; diff --git a/rust/perspective-client/src/rust/client.rs b/rust/perspective-client/src/rust/client.rs index 041c9edcc3..dba507870b 100644 --- a/rust/perspective-client/src/rust/client.rs +++ b/rust/perspective-client/src/rust/client.rs @@ -26,8 +26,8 @@ use crate::proto::request::ClientReq; use crate::proto::response::ClientResp; use crate::proto::{ ColumnType, GetFeaturesReq, GetFeaturesResp, GetHostedTablesReq, GetHostedTablesResp, - HostedTable, MakeTableReq, RemoveHostedTablesUpdateReq, Request, Response, ServerError, - ServerSystemInfoReq, + HostedTable, MakeJoinTableReq, MakeTableReq, RemoveHostedTablesUpdateReq, Request, Response, + ServerError, ServerSystemInfoReq, }; use crate::table::{Table, TableInitOptions, TableOptions}; use crate::table_data::{TableData, UpdateData}; @@ -589,6 +589,45 @@ impl Client { } } + /// Create a new read-only [`Table`] by performing an INNER JOIN on two + /// source tables. The resulting table is reactive: when either source + /// table is updated, the join is automatically recomputed. + /// + /// # Arguments + /// + /// * `left` - The left source table. + /// * `right` - The right source table. + /// * `on` - The column name to join on. Must exist in both tables with the + /// same type. + /// * `name` - Optional name for the resulting table. + pub async fn join( + &self, + left: &Table, + right: &Table, + on: &str, + name: Option, + ) -> ClientResult { + let entity_id = name.unwrap_or_else(randid); + let msg = Request { + msg_id: self.gen_id(), + entity_id: entity_id.clone(), + client_req: Some(ClientReq::MakeJoinTableReq(MakeJoinTableReq { + left_table_id: left.get_name().to_owned(), + right_table_id: right.get_name().to_owned(), + on_column: on.to_owned(), + })), + }; + + let client = self.clone(); + match self.oneshot(&msg).await? { + ClientResp::MakeJoinTableResp(_) => Ok(Table::new(entity_id, client, TableOptions { + index: Some(on.to_owned()), + limit: None, + })), + resp => Err(resp.into()), + } + } + async fn get_table_infos(&self) -> ClientResult> { let msg = Request { msg_id: self.gen_id(), diff --git a/rust/perspective-js/src/rust/client.rs b/rust/perspective-js/src/rust/client.rs index 4ae7b16dbf..bb4305e9e1 100644 --- a/rust/perspective-js/src/rust/client.rs +++ b/rust/perspective-js/src/rust/client.rs @@ -382,6 +382,34 @@ impl Client { Ok(Table(self.client.table(args, options).await?)) } + /// Creates a new read-only [`Table`] by performing an INNER JOIN on two + /// source tables. The resulting table is reactive: when either source + /// table is updated, the join is automatically recomputed. + /// + /// # Arguments + /// + /// - `left` - The left source table. + /// - `right` - The right source table. + /// - `on` - The column name to join on. Must exist in both tables with the + /// same type. + /// - `name` - Optional name for the resulting table. + /// + /// # JavaScript Examples + /// + /// ```javascript + /// const joined = await client.join(orders_table, products_table, "Product ID"); + /// ``` + #[wasm_bindgen] + pub async fn join( + &self, + left: &Table, + right: &Table, + on: &str, + name: Option, + ) -> ApiResult
{ + Ok(Table(self.client.join(&left.0, &right.0, on, name).await?)) + } + /// Terminates this [`Client`], cleaning up any [`crate::View`] handles the /// [`Client`] has open as well as its callbacks. #[wasm_bindgen] diff --git a/rust/perspective-js/src/ts/perspective.node.ts b/rust/perspective-js/src/ts/perspective.node.ts index 44aae2361b..23248a0ac4 100644 --- a/rust/perspective-js/src/ts/perspective.node.ts +++ b/rust/perspective-js/src/ts/perspective.node.ts @@ -273,6 +273,23 @@ export function on_error(callback: Function) { return SYNC_CLIENT.on_error(callback); } +/** + * Create a read-only table from an INNER JOIN of two source tables. + * @param left + * @param right + * @param on + * @param name + * @returns + */ +export function join( + left: perspective_client.Table, + right: perspective_client.Table, + on: string, + name?: string, +) { + return SYNC_CLIENT.join(left, right, on, name); +} + /** * Create a table from the global Perspective instance. * @param init_data @@ -356,6 +373,7 @@ export { perspective_client as wasmModule }; export default { table, + join, websocket, worker, get_hosted_table_names, diff --git a/rust/perspective-js/test/js/joins/inner_join.spec.ts b/rust/perspective-js/test/js/joins/inner_join.spec.ts new file mode 100644 index 0000000000..a2a45e7aef --- /dev/null +++ b/rust/perspective-js/test/js/joins/inner_join.spec.ts @@ -0,0 +1,217 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +import { test, expect } from "@perspective-dev/test"; +import perspective from "../perspective_client.ts"; + +((perspective) => { + test.describe("Inner joins", function () { + test("inner joins two tables on a shared key", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + { id: 3, x: 30 }, + ]); + + const right = await perspective.table([ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + { id: 4, y: "d" }, + ]); + + const joined = await perspective.join(left, right, "id"); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toHaveLength(2); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("joined table has correct schema", async function () { + const left = await perspective.table({ id: "integer", x: "float" }); + + const right = await perspective.table({ + id: "integer", + y: "string", + }); + + const joined = await perspective.join(left, right, "id"); + const schema = await joined.schema(); + + expect(schema).toEqual({ + id: "integer", + x: "float", + y: "string", + }); + + joined.delete(); + right.delete(); + left.delete(); + }); + + test("joined table reacts to left table updates", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); + + const right = await perspective.table([ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + ]); + + const joined = await perspective.join(left, right, "id"); + const view = await joined.view(); + + let json = await view.to_json(); + expect(json).toHaveLength(2); + + await left.update([{ id: 1, x: 99 }]); + json = await view.to_json(); + + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: "b" }, + { id: 1, x: 99, y: "a" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("joined table reacts to right table updates", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); + + const right = await perspective.table([ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + ]); + + const joined = await perspective.join(left, right, "id"); + const view = await joined.view(); + + await right.update([{ id: 1, y: "c" }]); + const json = await view.to_json(); + + // id=3 only exists in right, so inner join should not include it + expect(json).toHaveLength(3); + + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 1, x: 10, y: "c" }, + { id: 2, x: 20, y: "b" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("joined table reacts to new matching rows", async function () { + const left = await perspective.table([{ id: 1, x: 10 }]); + + const right = await perspective.table([{ id: 2, y: "b" }]); + + const joined = await perspective.join(left, right, "id"); + const view = await joined.view(); + + let json = await view.to_json(); + expect(json).toHaveLength(0); + + // Add matching row to right + await right.update([{ id: 1, y: "a" }]); + json = await view.to_json(); + expect(json).toHaveLength(1); + expect(json).toEqual([{ id: 1, x: 10, y: "a" }]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("joined table supports views with group_by", async function () { + const left = await perspective.table([ + { id: 1, category: "A", x: 10 }, + { id: 2, category: "A", x: 20 }, + { id: 3, category: "B", x: 30 }, + ]); + + const right = await perspective.table([ + { id: 1, y: 100 }, + { id: 2, y: 200 }, + { id: 3, y: 300 }, + ]); + + const joined = await perspective.join(left, right, "id"); + const view = await joined.view({ + group_by: ["category"], + columns: ["x", "y"], + }); + + const json = await view.to_columns(); + expect(json["x"]).toEqual([60, 30, 30]); + expect(json["y"]).toEqual([600, 300, 300]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("rejects column name conflicts", async function () { + const left = await perspective.table([{ id: 1, value: 10 }]); + const right = await perspective.table([{ id: 1, value: 20 }]); + + let error; + try { + await perspective.join(left, right, "id"); + } catch (e) { + error = e; + } + + expect(error).toBeDefined(); + right.delete(); + left.delete(); + }); + + test("rejects updates on joined table", async function () { + const left = await perspective.table([{ id: 1, x: 10 }]); + const right = await perspective.table([{ id: 1, y: "a" }]); + + const joined = await perspective.join(left, right, "id"); + + let error; + try { + await joined.update([{ id: 1, x: 99, y: "z" }]); + } catch (e) { + error = e; + } + + expect(error).toBeDefined(); + + joined.delete(); + right.delete(); + left.delete(); + }); + }); +})(perspective); diff --git a/rust/perspective-js/test/js/joins/inner_join_indexed.spec.ts b/rust/perspective-js/test/js/joins/inner_join_indexed.spec.ts new file mode 100644 index 0000000000..1a533d28a4 --- /dev/null +++ b/rust/perspective-js/test/js/joins/inner_join_indexed.spec.ts @@ -0,0 +1,248 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +import { test, expect } from "@perspective-dev/test"; +import perspective from "../perspective_client.ts"; + +((perspective) => { + test.describe("Inner joins, indexed tables", function () { + test("inner joins two tables on a shared key", async function () { + const left = await perspective.table( + [ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + { id: 3, x: 30 }, + ], + { index: "id" }, + ); + + const right = await perspective.table( + [ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + { id: 4, y: "d" }, + ], + { index: "id" }, + ); + + const joined = await perspective.join(left, right, "id"); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toHaveLength(2); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("joined table has correct schema", async function () { + const left = await perspective.table( + { id: "integer", x: "float" }, + { index: "id" }, + ); + + const right = await perspective.table( + { id: "integer", y: "string" }, + { index: "id" }, + ); + + const joined = await perspective.join(left, right, "id"); + const schema = await joined.schema(); + + expect(schema).toEqual({ + id: "integer", + x: "float", + y: "string", + }); + + joined.delete(); + right.delete(); + left.delete(); + }); + + test("joined table reacts to left table updates", async function () { + const left = await perspective.table( + [ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ], + { index: "id" }, + ); + + const right = await perspective.table( + [ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + ], + { index: "id" }, + ); + + const joined = await perspective.join(left, right, "id"); + const view = await joined.view(); + + let json = await view.to_json(); + expect(json).toHaveLength(2); + + await left.update([{ id: 1, x: 99 }]); + json = await view.to_json(); + expect(json).toEqual([ + { id: 1, x: 99, y: "a" }, + { id: 2, x: 20, y: "b" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("joined table reacts to right table updates", async function () { + const left = await perspective.table( + [ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ], + { index: "id" }, + ); + + const right = await perspective.table( + [ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + ], + { index: "id" }, + ); + + const joined = await perspective.join(left, right, "id"); + const view = await joined.view(); + + await right.update([{ id: 3, y: "c" }]); + const json = await view.to_json(); + + // id=3 only exists in right, so inner join should not include it + expect(json).toHaveLength(2); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: "b" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("joined table reacts to new matching rows", async function () { + const left = await perspective.table([{ id: 1, x: 10 }], { + index: "id", + }); + + const right = await perspective.table([{ id: 2, y: "b" }], { + index: "id", + }); + + const joined = await perspective.join(left, right, "id"); + const view = await joined.view(); + + let json = await view.to_json(); + expect(json).toHaveLength(0); + + // Add matching row to right + await right.update([{ id: 1, y: "a" }]); + json = await view.to_json(); + expect(json).toHaveLength(1); + expect(json).toEqual([{ id: 1, x: 10, y: "a" }]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("joined table supports views with group_by", async function () { + const left = await perspective.table( + [ + { id: 1, category: "A", x: 10 }, + { id: 2, category: "A", x: 20 }, + { id: 3, category: "B", x: 30 }, + ], + { index: "id" }, + ); + + const right = await perspective.table( + [ + { id: 1, y: 100 }, + { id: 2, y: 200 }, + { id: 3, y: 300 }, + ], + { index: "id" }, + ); + + const joined = await perspective.join(left, right, "id"); + const view = await joined.view({ + group_by: ["category"], + columns: ["x", "y"], + }); + + const json = await view.to_columns(); + expect(json["x"]).toEqual([60, 30, 30]); + expect(json["y"]).toEqual([600, 300, 300]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("rejects column name conflicts", async function () { + const left = await perspective.table([{ id: 1, value: 10 }]); + const right = await perspective.table([{ id: 1, value: 20 }]); + + let error; + try { + await perspective.join(left, right, "id"); + } catch (e) { + error = e; + } + + expect(error).toBeDefined(); + right.delete(); + left.delete(); + }); + + test("rejects updates on joined table", async function () { + const left = await perspective.table([{ id: 1, x: 10 }], { + index: "id", + }); + const right = await perspective.table([{ id: 1, y: "a" }], { + index: "id", + }); + + const joined = await perspective.join(left, right, "id"); + + let error; + try { + await joined.update([{ id: 1, x: 99, y: "z" }]); + } catch (e) { + error = e; + } + + expect(error).toBeDefined(); + + joined.delete(); + right.delete(); + left.delete(); + }); + }); +})(perspective); diff --git a/rust/perspective-python/perspective/__init__.py b/rust/perspective-python/perspective/__init__.py index 8ef5978e94..ed1ef38220 100644 --- a/rust/perspective-python/perspective/__init__.py +++ b/rust/perspective-python/perspective/__init__.py @@ -383,6 +383,11 @@ def get_hosted_table_names(*args, **kwargs): return GLOBAL_CLIENT.get_hosted_table_names(*args, **kwargs) +@functools.wraps(Client.join) +def join(*args, **kwargs): + return GLOBAL_CLIENT.join(*args, **kwargs) + + @functools.wraps(Client.system_info) def system_info(*args, **kwargs): return GLOBAL_CLIENT.system_info(*args, **kwargs) diff --git a/rust/perspective-python/src/client/client_async.rs b/rust/perspective-python/src/client/client_async.rs index 4dbd80f18a..4138705bae 100644 --- a/rust/perspective-python/src/client/client_async.rs +++ b/rust/perspective-python/src/client/client_async.rs @@ -234,6 +234,35 @@ impl AsyncClient { }) } + /// Creates a new read-only [`Table`] by performing an INNER JOIN on two + /// source tables. The resulting table is reactive: when either source + /// table is updated, the join is automatically recomputed. + /// + /// # Python Examples + /// + /// ```python + /// joined = await client.join(orders_table, products_table, "Product ID") + /// ``` + #[pyo3(signature = (left, right, on, name=None))] + pub async fn join( + &self, + left: AsyncTable, + right: AsyncTable, + on: String, + name: Option, + ) -> PyResult { + let client = self.client.clone(); + let py_client = self.clone(); + let table = client + .join(&left.table, &right.table, &on, name) + .await + .into_pyerr()?; + Ok(AsyncTable { + table: Arc::new(table), + client: py_client, + }) + } + /// Retrieves the names of all tables that this client has access to. /// /// `name` is a string identifier unique to the [`Table`] (per [`Client`]), diff --git a/rust/perspective-python/src/client/client_sync.rs b/rust/perspective-python/src/client/client_sync.rs index 23a99052ba..9e3d603455 100644 --- a/rust/perspective-python/src/client/client_sync.rs +++ b/rust/perspective-python/src/client/client_sync.rs @@ -173,6 +173,31 @@ impl Client { Ok(Table(table)) } + /// Creates a new read-only [`Table`] by performing an INNER JOIN on two + /// source tables. The resulting table is reactive: when either source + /// table is updated, the join is automatically recomputed. + /// + /// # Python Examples + /// + /// ```python + /// joined = client.join(orders_table, products_table, "Product ID") + /// ``` + #[pyo3(signature = (left, right, on, name=None))] + pub fn join( + &self, + py: Python<'_>, + left: &Table, + right: &Table, + on: String, + name: Option, + ) -> PyResult
{ + Ok(Table( + self.0 + .join(left.0.clone(), right.0.clone(), on, name) + .py_block_on(py)?, + )) + } + /// Retrieves the names of all tables that this client has access to. /// /// `name` is a string identifier unique to the [`Table`] (per [`Client`]), diff --git a/rust/perspective-server/cpp/perspective/src/cpp/gnode.cpp b/rust/perspective-server/cpp/perspective/src/cpp/gnode.cpp index 70640e9ed3..7f115b5910 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/gnode.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/gnode.cpp @@ -782,6 +782,13 @@ t_gnode::get_table_sptr() const { return m_gstate->get_table(); } +std::shared_ptr +t_gnode::get_pkeyed_table() const { + PSP_TRACE_SENTINEL(); + PSP_VERBOSE_ASSERT(m_init, "Cannot `get_pkeyed_table` on an uninited gnode."); + return m_gstate->get_pkeyed_table(); +} + /** * Convenience method for promoting a column. This is a hack used to * interop with javascript more efficiently, and does not handle all diff --git a/rust/perspective-server/cpp/perspective/src/cpp/server.cpp b/rust/perspective-server/cpp/perspective/src/cpp/server.cpp index 8d9c7d9eb6..9cc9e13978 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/server.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/server.cpp @@ -561,6 +561,76 @@ ServerResources::mark_all_tables_clean() { m_dirty_tables.clear(); } +void +ServerResources::register_join( + const t_id& join_table_id, + const t_id& left_table_id, + const t_id& right_table_id, + const std::string& on_column +) { + PSP_WRITE_LOCK(m_write_lock); + JoinDef def{left_table_id, right_table_id, on_column}; + m_join_defs.emplace(join_table_id, def); + m_table_to_join_tables.emplace(left_table_id, join_table_id); + m_table_to_join_tables.emplace(right_table_id, join_table_id); + m_readonly_tables.insert(join_table_id); +} + +void +ServerResources::unregister_join(const t_id& join_table_id) { + PSP_WRITE_LOCK(m_write_lock); + auto it = m_join_defs.find(join_table_id); + if (it == m_join_defs.end()) { + return; + } + + auto& def = it->second; + + // Remove from m_table_to_join_tables for both source tables + for (const auto& source_id : {def.left_table_id, def.right_table_id}) { + auto range = m_table_to_join_tables.equal_range(source_id); + for (auto jt = range.first; jt != range.second;) { + if (jt->second == join_table_id) { + jt = m_table_to_join_tables.erase(jt); + } else { + ++jt; + } + } + } + + m_join_defs.erase(it); + m_readonly_tables.erase(join_table_id); +} + +bool +ServerResources::is_join_table(const t_id& id) { + PSP_READ_LOCK(m_write_lock); + return m_join_defs.contains(id); +} + +bool +ServerResources::is_readonly_table(const t_id& id) { + PSP_READ_LOCK(m_write_lock); + return m_readonly_tables.contains(id); +} + +std::vector +ServerResources::get_dependent_join_tables(const t_id& source_table_id) { + PSP_READ_LOCK(m_write_lock); + std::vector result; + auto range = m_table_to_join_tables.equal_range(source_table_id); + for (auto it = range.first; it != range.second; ++it) { + result.push_back(it->second); + } + return result; +} + +ServerResources::JoinDef +ServerResources::get_join_def(const t_id& join_table_id) { + PSP_READ_LOCK(m_write_lock); + return m_join_defs.at(join_table_id); +} + void ServerResources::create_table_on_delete_sub( const t_id& table_id, Subscription sub_id @@ -1077,6 +1147,7 @@ needs_poll(const proto::Request::ClientReqCase proto_case) { case ReqCase::kViewRemoveOnUpdateReq: case ReqCase::kServerSystemInfoReq: case ReqCase::kGetFeaturesReq: + case ReqCase::kMakeJoinTableReq: return false; case proto::Request::CLIENT_REQ_NOT_SET: throw std::runtime_error("Unhandled request type 2"); @@ -1104,6 +1175,7 @@ entity_type_is_table(const proto::Request::ClientReqCase proto_case) { case ReqCase::kTableReplaceReq: case ReqCase::kTableDeleteReq: case ReqCase::kTableMakeViewReq: + case ReqCase::kMakeJoinTableReq: return true; case ReqCase::kViewOnDeleteReq: case ReqCase::kViewRemoveDeleteReq: @@ -1667,6 +1739,150 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { break; } + case proto::Request::kMakeJoinTableReq: { + const auto& r = req.make_join_table_req(); + if (m_resources.has_table(entity_id)) { + proto::Response resp; + auto* err = resp.mutable_server_error()->mutable_message(); + std::stringstream ss; + ss << "Table \"" << entity_id << "\" already exists"; + *err = ss.str(); + push_resp(std::move(resp)); + break; + } + + if (!m_resources.has_table(r.left_table_id())) { + proto::Response resp; + auto* err = resp.mutable_server_error()->mutable_message(); + std::stringstream ss; + ss << "Table \"" << r.left_table_id() << "\" not found"; + *err = ss.str(); + push_resp(std::move(resp)); + break; + } + + if (!m_resources.has_table(r.right_table_id())) { + proto::Response resp; + auto* err = resp.mutable_server_error()->mutable_message(); + std::stringstream ss; + ss << "Table \"" << r.right_table_id() << "\" not found"; + *err = ss.str(); + push_resp(std::move(resp)); + break; + } + + auto left_table = m_resources.get_table(r.left_table_id()); + auto right_table = m_resources.get_table(r.right_table_id()); + auto left_schema = left_table->get_schema(); + auto right_schema = right_table->get_schema(); + + // Validate join column exists in both tables + if (!left_schema.has_column(r.on_column())) { + proto::Response resp; + auto* err = resp.mutable_server_error()->mutable_message(); + std::stringstream ss; + ss << "Column \"" << r.on_column() + << "\" not found in table \"" << r.left_table_id() << "\""; + *err = ss.str(); + push_resp(std::move(resp)); + break; + } + + if (!right_schema.has_column(r.on_column())) { + proto::Response resp; + auto* err = resp.mutable_server_error()->mutable_message(); + std::stringstream ss; + ss << "Column \"" << r.on_column() + << "\" not found in table \"" << r.right_table_id() << "\""; + *err = ss.str(); + push_resp(std::move(resp)); + break; + } + + // Validate type match + if (left_schema.get_dtype(r.on_column()) + != right_schema.get_dtype(r.on_column())) { + proto::Response resp; + auto* err = resp.mutable_server_error()->mutable_message(); + *err = "Join column type mismatch"; + push_resp(std::move(resp)); + break; + } + + // Check for column name conflicts (excluding join key) + bool has_conflict = false; + for (const auto& rcol : right_schema.columns()) { + if (rcol == r.on_column()) { + continue; + } + if (left_schema.has_column(rcol)) { + proto::Response resp; + auto* err = resp.mutable_server_error()->mutable_message(); + std::stringstream ss; + ss << "Column \"" << rcol << "\" exists in both tables"; + *err = ss.str(); + push_resp(std::move(resp)); + has_conflict = true; + break; + } + } + if (has_conflict) { + break; + } + + // Build merged schema: all left columns + right columns + // (excluding join key from right) + std::vector merged_columns; + std::vector merged_types; + for (t_uindex i = 0; i < left_schema.columns().size(); ++i) { + merged_columns.push_back(left_schema.columns()[i]); + merged_types.push_back(left_schema.types()[i]); + } + for (t_uindex i = 0; i < right_schema.columns().size(); ++i) { + if (right_schema.columns()[i] == r.on_column()) { + continue; + } + merged_columns.push_back(right_schema.columns()[i]); + merged_types.push_back(right_schema.types()[i]); + } + + t_schema merged_schema(merged_columns, merged_types); + auto join_table = + Table::from_schema(r.on_column(), merged_schema); + + m_resources.host_table(entity_id, join_table); + m_resources.register_join( + entity_id, + r.left_table_id(), + r.right_table_id(), + r.on_column() + ); + + // Compute initial join + _recompute_join(entity_id, proto_resp); + + // Process the join table so its gnode state is up to date + auto jt = m_resources.get_table(entity_id); + jt->get_pool()->_process(); + m_resources.mark_table_clean(entity_id); + + proto::Response resp; + resp.mutable_make_join_table_resp(); + push_resp(std::move(resp)); + + // Notify on_hosted_tables_update listeners + auto subscriptions = m_resources.get_on_hosted_tables_update_sub(); + for (auto& subscription : subscriptions) { + Response out; + out.set_msg_id(subscription.id); + ProtoServerResp resp2; + resp2.data = std::move(out); + resp2.client_id = subscription.client_id; + proto_resp.emplace_back(std::move(resp2)); + } + + break; + } case proto::Request::kTableSizeReq: { auto table = m_resources.get_table(req.entity_id()); proto::Response resp; @@ -1768,6 +1984,13 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { break; } case proto::Request::kTableReplaceReq: { + if (m_resources.is_readonly_table(req.entity_id())) { + proto::Response resp; + *resp.mutable_server_error()->mutable_message() = + "Cannot update a read-only join table"; + push_resp(std::move(resp)); + break; + } auto table = m_resources.get_table(req.entity_id()); table->clear(); const auto& r = req.table_replace_req(); @@ -1802,6 +2025,13 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { break; } case proto::Request::kTableRemoveReq: { + if (m_resources.is_readonly_table(req.entity_id())) { + proto::Response resp; + *resp.mutable_server_error()->mutable_message() = + "Cannot update a read-only join table"; + push_resp(std::move(resp)); + break; + } const auto& r = req.table_remove_req(); auto table = m_resources.get_table(req.entity_id()); switch (r.data().data_case()) { @@ -1831,6 +2061,13 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { break; } case proto::Request::kTableUpdateReq: { + if (m_resources.is_readonly_table(req.entity_id())) { + proto::Response resp; + *resp.mutable_server_error()->mutable_message() = + "Cannot update a read-only join table"; + push_resp(std::move(resp)); + break; + } const auto& r = req.table_update_req(); auto table = m_resources.get_table(req.entity_id()); switch (r.data().data_case()) { @@ -2624,6 +2861,25 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { break; } case proto::Request::kTableDeleteReq: { + // Prevent deleting a source table that feeds a join table + auto dependents = + m_resources.get_dependent_join_tables(req.entity_id()); + if (!dependents.empty() + && !m_resources.is_join_table(req.entity_id())) { + proto::Response resp; + std::stringstream ss; + ss << "Cannot delete table: it is a source for join table \"" + << dependents[0] << "\""; + *resp.mutable_server_error()->mutable_message() = ss.str(); + push_resp(std::move(resp)); + break; + } + + // If this is a join table being deleted, clean up join metadata + if (m_resources.is_join_table(req.entity_id())) { + m_resources.unregister_join(req.entity_id()); + } + const auto is_immediate = req.table_delete_req().is_immediate(); if (is_immediate || m_resources.get_table_view_count(req.entity_id()) == 0) { @@ -2895,6 +3151,125 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { return proto_resp; } +void +ProtoServer::_recompute_join( + const ServerResources::t_id& join_table_id, + std::vector>& outs +) { + auto def = m_resources.get_join_def(join_table_id); + auto left_table = m_resources.get_table(def.left_table_id); + auto right_table = m_resources.get_table(def.right_table_id); + auto join_table = m_resources.get_table(join_table_id); + + // Get the raw master data tables and pkey maps + auto left_data = left_table->get_gnode()->get_table_sptr(); + auto right_data = right_table->get_gnode()->get_table_sptr(); + const auto& left_pkey_map = left_table->get_gnode()->get_pkey_map(); + const auto& right_pkey_map = right_table->get_gnode()->get_pkey_map(); + + // Get the join key columns from both tables + auto left_key_col = left_data->get_column(def.on_column); + auto right_key_col = right_data->get_column(def.on_column); + + // Build multimap: join_key_value → list of right row indices, + // sorted by pkey so that right-side ordering is deterministic. + tsl::hopscotch_map> right_join_key_to_rows; + right_join_key_to_rows.reserve(right_pkey_map.size()); + { + // Sort right pkey entries so rows are grouped in insertion order. + std::vector> right_entries( + right_pkey_map.begin(), right_pkey_map.end() + ); + std::sort(right_entries.begin(), right_entries.end(), + [](const auto& a, const auto& b) { return a.first < b.first; } + ); + for (const auto& [pkey, row_idx] : right_entries) { + auto join_key = right_key_col->get_scalar(row_idx); + if (!join_key.is_none()) { + right_join_key_to_rows[join_key].push_back(row_idx); + } + } + } + + // Sort left pkey entries so the join result preserves left-table + // insertion order (pkeys are auto-incremented integers for non-indexed + // tables, so sorting by pkey gives insertion order). + std::vector> left_entries( + left_pkey_map.begin(), left_pkey_map.end() + ); + std::sort(left_entries.begin(), left_entries.end(), + [](const auto& a, const auto& b) { return a.first < b.first; } + ); + + // Find matching rows by iterating left rows in order. + // For each left row, pair it with every matching right row (cross product + // per key value) to handle duplicate join keys in non-indexed tables. + std::vector> matched_rows; + matched_rows.reserve(left_entries.size()); + for (const auto& [pkey, row_idx] : left_entries) { + auto join_key = left_key_col->get_scalar(row_idx); + if (join_key.is_none()) { + continue; + } + auto it = right_join_key_to_rows.find(join_key); + if (it != right_join_key_to_rows.end()) { + for (auto right_row_idx : it->second) { + matched_rows.emplace_back(row_idx, right_row_idx); + } + } + } + + t_uindex num_matched = matched_rows.size(); + + // Build the joined schema and data table + auto join_schema = join_table->get_schema(); + t_data_table joined_data(join_schema); + joined_data.init(); + joined_data.extend(num_matched); + + auto left_schema = left_table->get_schema(); + auto right_schema = right_table->get_schema(); + + // Copy data column-by-column + for (const auto& col_name : join_schema.columns()) { + auto dst_col = joined_data.get_column(col_name); + if (left_schema.has_column(col_name)) { + auto src_col = left_data->get_column(col_name); + for (t_uindex i = 0; i < num_matched; ++i) { + dst_col->set_scalar( + i, src_col->get_scalar(matched_rows[i].first) + ); + } + } else if (right_schema.has_column(col_name)) { + auto src_col = right_data->get_column(col_name); + for (t_uindex i = 0; i < num_matched; ++i) { + dst_col->set_scalar( + i, src_col->get_scalar(matched_rows[i].second) + ); + } + } + } + + joined_data.set_size(num_matched); + + // Add psp_pkey and psp_okey columns with synthetic integer keys. + // We cannot use the join column as pkey because duplicate join key + // values (from non-indexed source tables) would cause rows to collapse. + auto* pkey_col = joined_data.add_column("psp_pkey", DTYPE_INT32, true); + auto* okey_col = joined_data.add_column("psp_okey", DTYPE_INT32, true); + for (t_uindex i = 0; i < num_matched; ++i) { + t_tscalar key; + key.set(static_cast(i)); + pkey_col->set_scalar(i, key); + okey_col->set_scalar(i, key); + } + + // Clear the join table and push the new data + join_table->clear(); + join_table->init(joined_data, num_matched, t_op::OP_INSERT, 0); + m_resources.mark_table_dirty(join_table_id); +} + std::vector> ProtoServer::_poll() { std::vector> resp_envs; @@ -2904,6 +3279,41 @@ ProtoServer::_poll() { } m_resources.mark_all_tables_clean(); + + // Recompute join tables whose sources were dirty, using a worklist + // to handle chained joins (join of join) in dependency order. + tsl::hopscotch_set processed_joins; + std::vector worklist; + for (auto& [_, table_id] : tables) { + auto dependents = m_resources.get_dependent_join_tables(table_id); + for (auto& join_id : dependents) { + if (processed_joins.find(join_id) == processed_joins.end()) { + worklist.push_back(join_id); + } + } + } + + while (!worklist.empty()) { + auto join_id = worklist.back(); + worklist.pop_back(); + if (!processed_joins.insert(join_id).second) { + continue; + } + + _recompute_join(join_id, resp_envs); + auto join_table = m_resources.get_table(join_id); + _process_table_unchecked(join_table, join_id, resp_envs); + m_resources.mark_table_clean(join_id); + + // Check for chained joins (join tables that depend on this join) + auto chained = m_resources.get_dependent_join_tables(join_id); + for (auto& chained_id : chained) { + if (processed_joins.find(chained_id) == processed_joins.end()) { + worklist.push_back(chained_id); + } + } + } + return resp_envs; } diff --git a/rust/perspective-server/cpp/perspective/src/include/perspective/gnode.h b/rust/perspective-server/cpp/perspective/src/include/perspective/gnode.h index 5db5910a20..5d28387e84 100644 --- a/rust/perspective-server/cpp/perspective/src/include/perspective/gnode.h +++ b/rust/perspective-server/cpp/perspective/src/include/perspective/gnode.h @@ -158,6 +158,7 @@ class PERSPECTIVE_EXPORT t_gnode { t_data_table* get_table(); std::shared_ptr get_table_sptr() const; + std::shared_ptr get_pkeyed_table() const; t_data_table* _get_otable(t_uindex port_id); t_data_table* _get_itable(t_uindex port_id); diff --git a/rust/perspective-server/cpp/perspective/src/include/perspective/server.h b/rust/perspective-server/cpp/perspective/src/include/perspective/server.h index acbae6b5a1..119a63c466 100644 --- a/rust/perspective-server/cpp/perspective/src/include/perspective/server.h +++ b/rust/perspective-server/cpp/perspective/src/include/perspective/server.h @@ -593,6 +593,25 @@ namespace server { std::uint32_t sub_id, std::uint32_t client_id ); + // Join table tracking + struct JoinDef { + t_id left_table_id; + t_id right_table_id; + std::string on_column; + }; + + void register_join( + const t_id& join_table_id, + const t_id& left_table_id, + const t_id& right_table_id, + const std::string& on_column + ); + void unregister_join(const t_id& join_table_id); + bool is_join_table(const t_id& id); + bool is_readonly_table(const t_id& id); + std::vector get_dependent_join_tables(const t_id& source_table_id); + JoinDef get_join_def(const t_id& join_table_id); + void mark_table_dirty(const t_id& id); void mark_table_clean(const t_id& id); void mark_all_tables_clean(); @@ -630,6 +649,11 @@ namespace server { tsl::hopscotch_set m_dirty_tables; tsl::hopscotch_map m_deleted_tables; + // Join dependency tracking + tsl::hopscotch_map m_join_defs; + std::multimap m_table_to_join_tables; + tsl::hopscotch_set m_readonly_tables; + #ifdef PSP_PARALLEL_FOR std::shared_mutex m_write_lock; #endif @@ -676,6 +700,11 @@ namespace server { std::vector>& outs ); + void _recompute_join( + const ServerResources::t_id& join_table_id, + std::vector>& outs + ); + static std::uint32_t m_client_id; bool m_realtime_mode; std::atomic From 78363c0cdd68b9c27e20579bdf86c9e9d9343d1b Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Wed, 1 Apr 2026 11:27:28 -0400 Subject: [PATCH 2/3] Outer and Left joins Signed-off-by: Andrew Stein --- rust/metadata/main.rs | 5 +- rust/perspective-client/build.rs | 4 + rust/perspective-client/perspective.proto | 10 +- rust/perspective-client/src/rust/client.rs | 35 +- rust/perspective-client/src/rust/lib.rs | 6 +- rust/perspective-client/src/rust/table.rs | 16 + rust/perspective-client/src/rust/table_ref.rs | 53 ++ rust/perspective-js/src/rust/client.rs | 59 ++- rust/perspective-js/src/rust/lib.rs | 4 + rust/perspective-js/src/rust/utils/errors.rs | 4 + .../perspective-js/src/ts/perspective.node.ts | 16 +- .../test/js/joins/inner_join.spec.ts | 381 ++++++++------- .../test/js/joins/left_join.spec.ts | 183 +++++++ .../test/js/joins/outer_join.spec.ts | 208 ++++++++ .../test/js/joins/right_on.spec.ts | 235 +++++++++ .../perspective/tests/table/test_join.py | 115 +++++ .../src/client/client_async.rs | 52 +- .../src/client/client_sync.rs | 65 ++- .../cpp/perspective/CMakeLists.txt | 1 + .../cpp/perspective/src/cpp/join_engine.cpp | 451 ++++++++++++++++++ .../cpp/perspective/src/cpp/server.cpp | 321 ++----------- .../src/include/perspective/join_engine.h | 101 ++++ .../src/include/perspective/server.h | 31 +- rust/perspective-viewer/src/rust/lib.rs | 1 + 24 files changed, 1836 insertions(+), 521 deletions(-) create mode 100644 rust/perspective-client/src/rust/table_ref.rs create mode 100644 rust/perspective-js/test/js/joins/left_join.spec.ts create mode 100644 rust/perspective-js/test/js/joins/outer_join.spec.ts create mode 100644 rust/perspective-js/test/js/joins/right_on.spec.ts create mode 100644 rust/perspective-python/perspective/tests/table/test_join.py create mode 100644 rust/perspective-server/cpp/perspective/src/cpp/join_engine.cpp create mode 100644 rust/perspective-server/cpp/perspective/src/include/perspective/join_engine.h diff --git a/rust/metadata/main.rs b/rust/metadata/main.rs index 5a054b1b9e..b8723b6fb8 100644 --- a/rust/metadata/main.rs +++ b/rust/metadata/main.rs @@ -32,8 +32,8 @@ use std::fs; use perspective_client::config::*; use perspective_client::{ - ColumnWindow, DeleteOptions, OnUpdateData, OnUpdateOptions, SystemInfo, TableInitOptions, - UpdateOptions, ViewWindow, + ColumnWindow, DeleteOptions, JoinOptions, OnUpdateData, OnUpdateOptions, SystemInfo, + TableInitOptions, UpdateOptions, ViewWindow, }; use perspective_viewer::config::ViewerConfigUpdate; use ts_rs::TS; @@ -71,6 +71,7 @@ pub fn generate_type_bindings_js() -> Result<(), Box> { ViewConfigUpdate::export_all_to(&path)?; OnUpdateData::export_all_to(&path)?; OnUpdateOptions::export_all_to(&path)?; + JoinOptions::export_all_to(&path)?; UpdateOptions::export_all_to(&path)?; DeleteOptions::export_all_to(&path)?; ViewWindow::export_all_to(&path)?; diff --git a/rust/perspective-client/build.rs b/rust/perspective-client/build.rs index b3bef4885a..3fa3f6b08f 100644 --- a/rust/perspective-client/build.rs +++ b/rust/perspective-client/build.rs @@ -57,6 +57,10 @@ fn prost_build() -> Result<()> { .field_attribute("ViewOnUpdateResp.delta", "#[ts(as = \"Vec::\")]") .field_attribute("ViewOnUpdateResp.delta", "#[serde(with = \"serde_bytes\")]") .type_attribute("ColumnType", "#[derive(ts_rs::TS)]") + .type_attribute( + "JoinType", + "#[derive(serde::Deserialize, ts_rs::TS)] #[serde(rename_all = \"snake_case\")]", + ) .field_attribute("ViewToArrowResp.arrow", "#[serde(skip)]") .field_attribute("from_arrow", "#[serde(skip)]") .type_attribute(".", "#[derive(serde::Serialize)]") diff --git a/rust/perspective-client/perspective.proto b/rust/perspective-client/perspective.proto index 2f7291f1af..c66939c764 100644 --- a/rust/perspective-client/perspective.proto +++ b/rust/perspective-client/perspective.proto @@ -337,11 +337,19 @@ message MakeTableReq { } message MakeTableResp {} -// `Client::join` — create a read-only table from an INNER JOIN of two tables. +enum JoinType { + INNER = 0; + LEFT = 1; + OUTER = 2; +} + +// `Client::join` — create a read-only table from a JOIN of two tables. message MakeJoinTableReq { string left_table_id = 1; string right_table_id = 2; string on_column = 3; + JoinType join_type = 4; + string right_on_column = 5; } message MakeJoinTableResp {} diff --git a/rust/perspective-client/src/rust/client.rs b/rust/perspective-client/src/rust/client.rs index dba507870b..8fe22283f0 100644 --- a/rust/perspective-client/src/rust/client.rs +++ b/rust/perspective-client/src/rust/client.rs @@ -26,11 +26,12 @@ use crate::proto::request::ClientReq; use crate::proto::response::ClientResp; use crate::proto::{ ColumnType, GetFeaturesReq, GetFeaturesResp, GetHostedTablesReq, GetHostedTablesResp, - HostedTable, MakeJoinTableReq, MakeTableReq, RemoveHostedTablesUpdateReq, Request, Response, - ServerError, ServerSystemInfoReq, + HostedTable, JoinType, MakeJoinTableReq, MakeTableReq, RemoveHostedTablesUpdateReq, Request, + Response, ServerError, ServerSystemInfoReq, }; -use crate::table::{Table, TableInitOptions, TableOptions}; +use crate::table::{JoinOptions, Table, TableInitOptions, TableOptions}; use crate::table_data::{TableData, UpdateData}; +use crate::table_ref::TableRef; use crate::utils::*; use crate::view::{OnUpdateData, ViewWindow}; use crate::{OnUpdateMode, OnUpdateOptions, asyncfn, clone}; @@ -589,32 +590,36 @@ impl Client { } } - /// Create a new read-only [`Table`] by performing an INNER JOIN on two - /// source tables. The resulting table is reactive: when either source - /// table is updated, the join is automatically recomputed. + /// Create a new read-only [`Table`] by performing a JOIN on two source + /// tables. The resulting table is reactive: when either source table is + /// updated, the join is automatically recomputed. /// /// # Arguments /// - /// * `left` - The left source table. - /// * `right` - The right source table. + /// * `left` - The left source table (as a [`Table`] or name string). + /// * `right` - The right source table (as a [`Table`] or name string). /// * `on` - The column name to join on. Must exist in both tables with the /// same type. - /// * `name` - Optional name for the resulting table. + /// * `options` - Join configuration (join type, table name). pub async fn join( &self, - left: &Table, - right: &Table, + left: TableRef, + right: TableRef, on: &str, - name: Option, + options: JoinOptions, ) -> ClientResult
{ - let entity_id = name.unwrap_or_else(randid); + let entity_id = options.name.unwrap_or_else(randid); + let join_type: JoinType = options.join_type.unwrap_or_default(); + let right_on_column = options.right_on.unwrap_or_default(); let msg = Request { msg_id: self.gen_id(), entity_id: entity_id.clone(), client_req: Some(ClientReq::MakeJoinTableReq(MakeJoinTableReq { - left_table_id: left.get_name().to_owned(), - right_table_id: right.get_name().to_owned(), + left_table_id: left.table_name().to_owned(), + right_table_id: right.table_name().to_owned(), on_column: on.to_owned(), + join_type: join_type.into(), + right_on_column, })), }; diff --git a/rust/perspective-client/src/rust/lib.rs b/rust/perspective-client/src/rust/lib.rs index 2a7784a654..faaf031cc5 100644 --- a/rust/perspective-client/src/rust/lib.rs +++ b/rust/perspective-client/src/rust/lib.rs @@ -38,6 +38,7 @@ mod client; mod session; mod table; mod table_data; +mod table_ref; mod view; pub mod virtual_server; @@ -51,11 +52,14 @@ pub mod utils; pub use crate::client::{Client, ClientHandler, Features, ReconnectCallback, SystemInfo}; use crate::proto::HostedTable; +pub use crate::proto::JoinType; pub use crate::session::{ProxySession, Session}; pub use crate::table::{ - DeleteOptions, ExprValidationResult, Table, TableInitOptions, TableReadFormat, UpdateOptions, + DeleteOptions, ExprValidationResult, JoinOptions, Table, TableInitOptions, TableReadFormat, + UpdateOptions, }; pub use crate::table_data::{TableData, UpdateData}; +pub use crate::table_ref::TableRef; pub use crate::view::{ ColumnWindow, OnUpdateData, OnUpdateMode, OnUpdateOptions, View, ViewWindow, }; diff --git a/rust/perspective-client/src/rust/table.rs b/rust/perspective-client/src/rust/table.rs index f499e6d9e5..02df8e76ce 100644 --- a/rust/perspective-client/src/rust/table.rs +++ b/rust/perspective-client/src/rust/table.rs @@ -137,6 +137,22 @@ impl From for TableOptions { } } +/// Options for [`Client::join`]. +#[derive(Clone, Debug, Default, Serialize, Deserialize, TS)] +pub struct JoinOptions { + #[serde(default)] + #[ts(optional)] + pub join_type: Option, + + #[serde(default)] + #[ts(optional)] + pub name: Option, + + #[serde(default)] + #[ts(optional)] + pub right_on: Option, +} + /// Options for [`Table::delete`]. #[derive(Clone, Debug, Default, Deserialize, TS)] pub struct DeleteOptions { diff --git a/rust/perspective-client/src/rust/table_ref.rs b/rust/perspective-client/src/rust/table_ref.rs new file mode 100644 index 0000000000..bf0b1697ca --- /dev/null +++ b/rust/perspective-client/src/rust/table_ref.rs @@ -0,0 +1,53 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +use crate::Table; + +/// A reference to a table, either by handle or by name. +#[derive(Clone)] +pub enum TableRef { + Table(Table), + Name(String), +} + +impl TableRef { + pub fn table_name(&self) -> &str { + match self { + TableRef::Table(table) => table.get_name(), + TableRef::Name(name) => name, + } + } +} + +impl From<&Table> for TableRef { + fn from(table: &Table) -> Self { + TableRef::Table(table.clone()) + } +} + +impl From
for TableRef { + fn from(table: Table) -> Self { + TableRef::Table(table) + } +} + +impl From for TableRef { + fn from(name: String) -> Self { + TableRef::Name(name) + } +} + +impl From<&str> for TableRef { + fn from(name: &str) -> Self { + TableRef::Name(name.to_owned()) + } +} diff --git a/rust/perspective-js/src/rust/client.rs b/rust/perspective-js/src/rust/client.rs index bb4305e9e1..3d8359db02 100644 --- a/rust/perspective-js/src/rust/client.rs +++ b/rust/perspective-js/src/rust/client.rs @@ -20,7 +20,7 @@ use js_sys::{Function, Uint8Array}; #[cfg(doc)] use perspective_client::SystemInfo; use perspective_client::{ - ClientError, ReconnectCallback, Session, TableData, TableInitOptions, asyncfn, + ClientError, ReconnectCallback, Session, TableData, TableInitOptions, TableRef, asyncfn, }; use wasm_bindgen::prelude::*; use wasm_bindgen_derive::TryFromJsValue; @@ -35,6 +35,35 @@ extern "C" { #[derive(Clone)] #[wasm_bindgen(typescript_type = "TableInitOptions")] pub type JsTableInitOptions; + + #[derive(Clone)] + #[wasm_bindgen(typescript_type = "JoinOptions")] + pub type JsJoinOptions; +} + +async fn js_to_table_ref(val: &JsValue) -> ApiResult { + if let Some(name) = val.as_string() { + Ok(TableRef::from(name)) + } else { + let get_name = js_sys::Reflect::get(val, &wasm_bindgen::intern("get_name").into()) + .map_err(|_| apierror!(TableRefError))? + .dyn_into::() + .map_err(|_| apierror!(TableRefError))?; + + let promise = get_name + .call0(val) + .map_err(|_| apierror!(TableRefError))? + .dyn_into::() + .map_err(|_| apierror!(TableRefError))?; + + let name = wasm_bindgen_futures::JsFuture::from(promise) + .await + .map_err(|_| apierror!(TableRefError))? + .as_string() + .ok_or_else(|| apierror!(TableRefError))?; + + Ok(TableRef::from(name)) + } } #[wasm_bindgen] @@ -388,26 +417,38 @@ impl Client { /// /// # Arguments /// - /// - `left` - The left source table. - /// - `right` - The right source table. + /// - `left` - The left source table (a [`Table`] instance or a table name + /// string). + /// - `right` - The right source table (a [`Table`] instance or a table name + /// string). /// - `on` - The column name to join on. Must exist in both tables with the /// same type. - /// - `name` - Optional name for the resulting table. + /// - `options` - Optional join configuration: `{ join_type?: "inner" | + /// "left" | "outer", name?: string }`. /// /// # JavaScript Examples /// /// ```javascript - /// const joined = await client.join(orders_table, products_table, "Product ID"); + /// const joined = await client.join(orders_table, products_table, "Product ID", { join_type: "left" }); + /// const joined = await client.join("orders", "products", "Product ID", { join_type: "left" }); /// ``` #[wasm_bindgen] pub async fn join( &self, - left: &Table, - right: &Table, + left: JsValue, + right: JsValue, on: &str, - name: Option, + options: Option, ) -> ApiResult
{ - Ok(Table(self.client.join(&left.0, &right.0, on, name).await?)) + let options = options + .into_serde_ext::>()? + .unwrap_or_default(); + + let left_ref = js_to_table_ref(&left).await?; + let right_ref = js_to_table_ref(&right).await?; + Ok(Table( + self.client.join(left_ref, right_ref, on, options).await?, + )) } /// Terminates this [`Client`], cleaning up any [`crate::View`] handles the diff --git a/rust/perspective-js/src/rust/lib.rs b/rust/perspective-js/src/rust/lib.rs index b259466712..5d3214a90d 100644 --- a/rust/perspective-js/src/rust/lib.rs +++ b/rust/perspective-js/src/rust/lib.rs @@ -59,11 +59,15 @@ export type * from "../../src/ts/ts-rs/SystemInfo.d.ts"; export type * from "../../src/ts/ts-rs/SortDir.d.ts"; export type * from "../../src/ts/ts-rs/Filter.d.ts"; export type * from "../../src/ts/ts-rs/ViewConfig.d.ts"; +export type * from "../../src/ts/ts-rs/JoinOptions.ts"; +export type * from "../../src/ts/ts-rs/JoinType.ts"; import type {ColumnWindow} from "../../src/ts/ts-rs/ColumnWindow.d.ts"; import type {ColumnType} from "../../src/ts/ts-rs/ColumnType.d.ts"; import type {ViewWindow} from "../../src/ts/ts-rs/ViewWindow.d.ts"; import type {TableInitOptions} from "../../src/ts/ts-rs/TableInitOptions.d.ts"; +import type {JoinOptions} from "../../src/ts/ts-rs/JoinOptions.ts"; +import type {JoinType} from "../../src/ts/ts-rs/JoinType.ts"; import type {ViewConfigUpdate} from "../../src/ts/ts-rs/ViewConfigUpdate.d.ts"; import type * as on_update_args from "../../src/ts/ts-rs/ViewOnUpdateResp.d.ts"; import type {OnUpdateOptions} from "../../src/ts/ts-rs/OnUpdateOptions.d.ts"; diff --git a/rust/perspective-js/src/rust/utils/errors.rs b/rust/perspective-js/src/rust/utils/errors.rs index fca4b31d24..fa0e452325 100644 --- a/rust/perspective-js/src/rust/utils/errors.rs +++ b/rust/perspective-js/src/rust/utils/errors.rs @@ -97,6 +97,9 @@ pub enum ApiErrorType { #[error("Invalid `expressions` {}", format_valid_exprs(.0))] InvalidViewerConfigExpressionsError(Rc), + #[error("Expected a Table or string table name")] + TableRefError, + #[error("No `Table` attached")] NoTableError, @@ -134,6 +137,7 @@ impl ApiError { ApiErrorType::ProstError(_) => "[ProstError]", ApiErrorType::InvalidViewerConfigError(..) => "[InvalidViewerConfigError]", ApiErrorType::InvalidViewerConfigExpressionsError(_) => "[InvalidViewerConfigError]", + ApiErrorType::TableRefError => "[TableRefError]", ApiErrorType::NoTableError => "[NoTableError]", ApiErrorType::SerdeWasmBindgenError(_) => "[SerdeWasmBindgenError]", ApiErrorType::Utf8Error(_) => "[FromUtf8Error]", diff --git a/rust/perspective-js/src/ts/perspective.node.ts b/rust/perspective-js/src/ts/perspective.node.ts index 23248a0ac4..8be2e92c1b 100644 --- a/rust/perspective-js/src/ts/perspective.node.ts +++ b/rust/perspective-js/src/ts/perspective.node.ts @@ -274,20 +274,20 @@ export function on_error(callback: Function) { } /** - * Create a read-only table from an INNER JOIN of two source tables. - * @param left - * @param right + * Create a read-only table from a JOIN of two source tables. + * @param left - The left source table (a Table instance or a table name string). + * @param right - The right source table (a Table instance or a table name string). * @param on - * @param name + * @param options - Optional join configuration: { join_type?: "inner"|"left"|"outer", name?: string } * @returns */ export function join( - left: perspective_client.Table, - right: perspective_client.Table, + left: perspective_client.Table | string, + right: perspective_client.Table | string, on: string, - name?: string, + options?: perspective_client.JoinOptions, ) { - return SYNC_CLIENT.join(left, right, on, name); + return SYNC_CLIENT.join(left as any, right as any, on, options); } /** diff --git a/rust/perspective-js/test/js/joins/inner_join.spec.ts b/rust/perspective-js/test/js/joins/inner_join.spec.ts index a2a45e7aef..4623011e32 100644 --- a/rust/perspective-js/test/js/joins/inner_join.spec.ts +++ b/rust/perspective-js/test/js/joins/inner_join.spec.ts @@ -13,205 +13,264 @@ import { test, expect } from "@perspective-dev/test"; import perspective from "../perspective_client.ts"; -((perspective) => { - test.describe("Inner joins", function () { - test("inner joins two tables on a shared key", async function () { - const left = await perspective.table([ - { id: 1, x: 10 }, - { id: 2, x: 20 }, - { id: 3, x: 30 }, - ]); +test.describe("Inner joins", function () { + test("inner joins two tables on a shared key", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + { id: 3, x: 30 }, + ]); + + const right = await perspective.table([ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + { id: 4, y: "d" }, + ]); + + const joined = await perspective.join(left, right, "id"); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toHaveLength(2); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); - const right = await perspective.table([ - { id: 1, y: "a" }, - { id: 2, y: "b" }, - { id: 4, y: "d" }, - ]); + test("joined table has correct schema", async function () { + const left = await perspective.table({ id: "integer", x: "float" }); - const joined = await perspective.join(left, right, "id"); - const view = await joined.view(); - const json = await view.to_json(); + const right = await perspective.table({ + id: "integer", + y: "string", + }); - expect(json).toHaveLength(2); + const joined = await perspective.join(left, right, "id"); + const schema = await joined.schema(); - view.delete(); - joined.delete(); - right.delete(); - left.delete(); + expect(schema).toEqual({ + id: "integer", + x: "float", + y: "string", }); - test("joined table has correct schema", async function () { - const left = await perspective.table({ id: "integer", x: "float" }); + joined.delete(); + right.delete(); + left.delete(); + }); - const right = await perspective.table({ - id: "integer", - y: "string", - }); + test("joined table reacts to left table updates", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); - const joined = await perspective.join(left, right, "id"); - const schema = await joined.schema(); + const right = await perspective.table([ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + ]); - expect(schema).toEqual({ - id: "integer", - x: "float", - y: "string", - }); + const joined = await perspective.join(left, right, "id"); + const view = await joined.view(); - joined.delete(); - right.delete(); - left.delete(); - }); + let json = await view.to_json(); + expect(json).toHaveLength(2); - test("joined table reacts to left table updates", async function () { - const left = await perspective.table([ - { id: 1, x: 10 }, - { id: 2, x: 20 }, - ]); + await left.update([{ id: 1, x: 99 }]); + json = await view.to_json(); - const right = await perspective.table([ - { id: 1, y: "a" }, - { id: 2, y: "b" }, - ]); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: "b" }, + { id: 1, x: 99, y: "a" }, + ]); - const joined = await perspective.join(left, right, "id"); - const view = await joined.view(); + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); - let json = await view.to_json(); - expect(json).toHaveLength(2); + test("joined table reacts to right table updates", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); - await left.update([{ id: 1, x: 99 }]); - json = await view.to_json(); + const right = await perspective.table([ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + ]); - expect(json).toEqual([ - { id: 1, x: 10, y: "a" }, - { id: 2, x: 20, y: "b" }, - { id: 1, x: 99, y: "a" }, - ]); + const joined = await perspective.join(left, right, "id"); + const view = await joined.view(); - view.delete(); - joined.delete(); - right.delete(); - left.delete(); - }); + await right.update([{ id: 1, y: "c" }]); + const json = await view.to_json(); - test("joined table reacts to right table updates", async function () { - const left = await perspective.table([ - { id: 1, x: 10 }, - { id: 2, x: 20 }, - ]); + // id=3 only exists in right, so inner join should not include it + expect(json).toHaveLength(3); - const right = await perspective.table([ - { id: 1, y: "a" }, - { id: 2, y: "b" }, - ]); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 1, x: 10, y: "c" }, + { id: 2, x: 20, y: "b" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); - const joined = await perspective.join(left, right, "id"); - const view = await joined.view(); + test("joined table reacts to new matching rows", async function () { + const left = await perspective.table([{ id: 1, x: 10 }]); - await right.update([{ id: 1, y: "c" }]); - const json = await view.to_json(); + const right = await perspective.table([{ id: 2, y: "b" }]); - // id=3 only exists in right, so inner join should not include it - expect(json).toHaveLength(3); + const joined = await perspective.join(left, right, "id"); + const view = await joined.view(); - expect(json).toEqual([ - { id: 1, x: 10, y: "a" }, - { id: 1, x: 10, y: "c" }, - { id: 2, x: 20, y: "b" }, - ]); + let json = await view.to_json(); + expect(json).toHaveLength(0); - view.delete(); - joined.delete(); - right.delete(); - left.delete(); + // Add matching row to right + await right.update([{ id: 1, y: "a" }]); + json = await view.to_json(); + expect(json).toHaveLength(1); + expect(json).toEqual([{ id: 1, x: 10, y: "a" }]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("joined table supports views with group_by", async function () { + const left = await perspective.table([ + { id: 1, category: "A", x: 10 }, + { id: 2, category: "A", x: 20 }, + { id: 3, category: "B", x: 30 }, + ]); + + const right = await perspective.table([ + { id: 1, y: 100 }, + { id: 2, y: 200 }, + { id: 3, y: 300 }, + ]); + + const joined = await perspective.join(left, right, "id"); + const view = await joined.view({ + group_by: ["category"], + columns: ["x", "y"], }); - test("joined table reacts to new matching rows", async function () { - const left = await perspective.table([{ id: 1, x: 10 }]); + const json = await view.to_columns(); + expect(json["x"]).toEqual([60, 30, 30]); + expect(json["y"]).toEqual([600, 300, 300]); - const right = await perspective.table([{ id: 2, y: "b" }]); + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); - const joined = await perspective.join(left, right, "id"); - const view = await joined.view(); + test("inner joins two tables by name strings", async function () { + const left = await perspective.table( + [ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + { id: 3, x: 30 }, + ], + { name: "left_named" }, + ); - let json = await view.to_json(); - expect(json).toHaveLength(0); + const right = await perspective.table( + [ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + { id: 4, y: "d" }, + ], + { name: "right_named" }, + ); + + const joined = await perspective.join( + "left_named", + "right_named", + "id", + ); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toHaveLength(2); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); - // Add matching row to right - await right.update([{ id: 1, y: "a" }]); - json = await view.to_json(); - expect(json).toHaveLength(1); - expect(json).toEqual([{ id: 1, x: 10, y: "a" }]); + test("inner joins with mixed Table and string args", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); - view.delete(); - joined.delete(); - right.delete(); - left.delete(); - }); + const right = await perspective.table( + [ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + ], + { name: "right_mixed" }, + ); - test("joined table supports views with group_by", async function () { - const left = await perspective.table([ - { id: 1, category: "A", x: 10 }, - { id: 2, category: "A", x: 20 }, - { id: 3, category: "B", x: 30 }, - ]); - - const right = await perspective.table([ - { id: 1, y: 100 }, - { id: 2, y: 200 }, - { id: 3, y: 300 }, - ]); - - const joined = await perspective.join(left, right, "id"); - const view = await joined.view({ - group_by: ["category"], - columns: ["x", "y"], - }); - - const json = await view.to_columns(); - expect(json["x"]).toEqual([60, 30, 30]); - expect(json["y"]).toEqual([600, 300, 300]); - - view.delete(); - joined.delete(); - right.delete(); - left.delete(); - }); + const joined = await perspective.join(left, "right_mixed", "id"); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toHaveLength(2); - test("rejects column name conflicts", async function () { - const left = await perspective.table([{ id: 1, value: 10 }]); - const right = await perspective.table([{ id: 1, value: 20 }]); + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); - let error; - try { - await perspective.join(left, right, "id"); - } catch (e) { - error = e; - } + test("rejects column name conflicts", async function () { + const left = await perspective.table([{ id: 1, value: 10 }]); + const right = await perspective.table([{ id: 1, value: 20 }]); - expect(error).toBeDefined(); - right.delete(); - left.delete(); - }); + let error; + try { + await perspective.join(left, right, "id"); + } catch (e) { + error = e; + } - test("rejects updates on joined table", async function () { - const left = await perspective.table([{ id: 1, x: 10 }]); - const right = await perspective.table([{ id: 1, y: "a" }]); + expect(error).toBeDefined(); + right.delete(); + left.delete(); + }); - const joined = await perspective.join(left, right, "id"); + test("rejects updates on joined table", async function () { + const left = await perspective.table([{ id: 1, x: 10 }]); + const right = await perspective.table([{ id: 1, y: "a" }]); - let error; - try { - await joined.update([{ id: 1, x: 99, y: "z" }]); - } catch (e) { - error = e; - } + const joined = await perspective.join(left, right, "id"); - expect(error).toBeDefined(); + let error; + try { + await joined.update([{ id: 1, x: 99, y: "z" }]); + } catch (e) { + error = e; + } - joined.delete(); - right.delete(); - left.delete(); - }); + expect(error).toBeDefined(); + + joined.delete(); + right.delete(); + left.delete(); }); -})(perspective); +}); diff --git a/rust/perspective-js/test/js/joins/left_join.spec.ts b/rust/perspective-js/test/js/joins/left_join.spec.ts new file mode 100644 index 0000000000..3a750fb384 --- /dev/null +++ b/rust/perspective-js/test/js/joins/left_join.spec.ts @@ -0,0 +1,183 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +import { test, expect } from "@perspective-dev/test"; +import perspective from "../perspective_client.ts"; + +((perspective) => { + test.describe("Left joins", function () { + test("left joins two tables on a shared key", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + { id: 3, x: 30 }, + ]); + + const right = await perspective.table([ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + { id: 4, y: "d" }, + ]); + + const joined = await perspective.join(left, right, "id", { + join_type: "left", + }); + const view = await joined.view(); + const json = await view.to_json(); + + // Left join: all left rows, matched right rows, id=3 has null y + expect(json).toHaveLength(3); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: "b" }, + { id: 3, x: 30, y: null }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("left join includes unmatched left rows with nulls", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); + + const right = await perspective.table([{ id: 1, y: "a" }]); + + const joined = await perspective.join(left, right, "id", { + join_type: "left", + }); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toHaveLength(2); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: null }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("left join does not include unmatched right rows", async function () { + const left = await perspective.table([{ id: 1, x: 10 }]); + + const right = await perspective.table([ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + ]); + + const joined = await perspective.join(left, right, "id", { + join_type: "left", + }); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toHaveLength(1); + expect(json).toEqual([{ id: 1, x: 10, y: "a" }]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("left join reacts to right table updates", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); + + const right = await perspective.table([{ id: 1, y: "a" }]); + + const joined = await perspective.join(left, right, "id", { + join_type: "left", + }); + const view = await joined.view(); + + // Add matching row for id=2 + await right.update([{ id: 2, y: "b" }]); + const json = await view.to_json(); + + // Now both rows match + expect(json).toHaveLength(2); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: "b" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("left join with no matching rows", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); + + const right = await perspective.table([ + { id: 3, y: "c" }, + { id: 4, y: "d" }, + ]); + + const joined = await perspective.join(left, right, "id", { + join_type: "left", + }); + const view = await joined.view(); + const json = await view.to_json(); + + // All left rows present with null right columns + expect(json).toHaveLength(2); + expect(json).toEqual([ + { id: 1, x: 10, y: null }, + { id: 2, x: 20, y: null }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("left join has correct schema", async function () { + const left = await perspective.table({ id: "integer", x: "float" }); + const right = await perspective.table({ + id: "integer", + y: "string", + }); + + const joined = await perspective.join(left, right, "id", { + join_type: "left", + }); + const schema = await joined.schema(); + + expect(schema).toEqual({ + id: "integer", + x: "float", + y: "string", + }); + + joined.delete(); + right.delete(); + left.delete(); + }); + }); +})(perspective); diff --git a/rust/perspective-js/test/js/joins/outer_join.spec.ts b/rust/perspective-js/test/js/joins/outer_join.spec.ts new file mode 100644 index 0000000000..d7490d009c --- /dev/null +++ b/rust/perspective-js/test/js/joins/outer_join.spec.ts @@ -0,0 +1,208 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +import { test, expect } from "@perspective-dev/test"; +import perspective from "../perspective_client.ts"; + +((perspective) => { + test.describe("Outer joins", function () { + test("outer joins two tables on a shared key", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + { id: 3, x: 30 }, + ]); + + const right = await perspective.table([ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + { id: 4, y: "d" }, + ]); + + const joined = await perspective.join(left, right, "id", { + join_type: "outer", + }); + const view = await joined.view(); + const json = await view.to_json(); + + // Outer join: all left + matched right + unmatched right + expect(json).toHaveLength(4); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: "b" }, + { id: 3, x: 30, y: null }, + { id: 4, x: null, y: "d" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("outer join includes all rows when no keys match", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); + + const right = await perspective.table([ + { id: 3, y: "c" }, + { id: 4, y: "d" }, + ]); + + const joined = await perspective.join(left, right, "id", { + join_type: "outer", + }); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toHaveLength(4); + expect(json).toEqual([ + { id: 1, x: 10, y: null }, + { id: 2, x: 20, y: null }, + { id: 3, x: null, y: "c" }, + { id: 4, x: null, y: "d" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("outer join with all keys matching is same as inner", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); + + const right = await perspective.table([ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + ]); + + const joined = await perspective.join(left, right, "id", { + join_type: "outer", + }); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toHaveLength(2); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: "b" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("outer join reacts to left table updates", async function () { + const left = await perspective.table([{ id: 1, x: 10 }]); + + const right = await perspective.table([ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + ]); + + const joined = await perspective.join(left, right, "id", { + join_type: "outer", + }); + const view = await joined.view(); + + let json = await view.to_json(); + expect(json).toHaveLength(2); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: null, y: "b" }, + ]); + + // Add matching row for id=2 + await left.update([{ id: 2, x: 20 }]); + json = await view.to_json(); + + // Both left rows now match right rows + expect(json).toHaveLength(2); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: "b" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("outer join reacts to right table updates", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); + + const right = await perspective.table([{ id: 1, y: "a" }]); + + const joined = await perspective.join(left, right, "id", { + join_type: "outer", + }); + const view = await joined.view(); + + let json = await view.to_json(); + expect(json).toHaveLength(2); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: null }, + ]); + + await right.update([{ id: 2, y: "b" }]); + json = await view.to_json(); + + // Now both match, but non-indexed tables append + expect(json).toHaveLength(2); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: "b" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("outer join has correct schema", async function () { + const left = await perspective.table({ id: "integer", x: "float" }); + const right = await perspective.table({ + id: "integer", + y: "string", + }); + + const joined = await perspective.join(left, right, "id", { + join_type: "outer", + }); + const schema = await joined.schema(); + + expect(schema).toEqual({ + id: "integer", + x: "float", + y: "string", + }); + + joined.delete(); + right.delete(); + left.delete(); + }); + }); +})(perspective); diff --git a/rust/perspective-js/test/js/joins/right_on.spec.ts b/rust/perspective-js/test/js/joins/right_on.spec.ts new file mode 100644 index 0000000000..27f85feb4a --- /dev/null +++ b/rust/perspective-js/test/js/joins/right_on.spec.ts @@ -0,0 +1,235 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +import { test, expect } from "@perspective-dev/test"; +import perspective from "../perspective_client.ts"; + +test.describe("right_on option", function () { + test("joins on differently-named columns", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + { id: 3, x: 30 }, + ]); + + const right = await perspective.table([ + { user_id: 1, y: "a" }, + { user_id: 2, y: "b" }, + { user_id: 4, y: "d" }, + ]); + + const joined = await perspective.join(left, right, "id", { + right_on: "user_id", + }); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toHaveLength(2); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: "b" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("output schema uses left key column name", async function () { + const left = await perspective.table({ id: "integer", x: "float" }); + const right = await perspective.table({ + user_id: "integer", + y: "string", + }); + + const joined = await perspective.join(left, right, "id", { + right_on: "user_id", + }); + const schema = await joined.schema(); + + expect(schema).toEqual({ + id: "integer", + x: "float", + y: "string", + }); + + joined.delete(); + right.delete(); + left.delete(); + }); + + test("errors on type mismatch between on and right_on", async function () { + const left = await perspective.table({ id: "integer", x: "float" }); + const right = await perspective.table({ + user_id: "string", + y: "float", + }); + + let error; + try { + await perspective.join(left, right, "id", { + right_on: "user_id", + }); + } catch (e) { + error = e; + } + + expect(error).toBeDefined(); + right.delete(); + left.delete(); + }); + + test("errors when right_on column not found", async function () { + const left = await perspective.table({ id: "integer", x: "float" }); + const right = await perspective.table({ + user_id: "integer", + y: "string", + }); + + let error; + try { + await perspective.join(left, right, "id", { + right_on: "nonexistent", + }); + } catch (e) { + error = e; + } + + expect(error).toBeDefined(); + right.delete(); + left.delete(); + }); + + test("right_on same as on behaves identically to omitting it", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); + + const right = await perspective.table([ + { id: 1, y: "a" }, + { id: 2, y: "b" }, + ]); + + const joined = await perspective.join(left, right, "id", { + right_on: "id", + }); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: "b" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("reacts to right table updates with right_on", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); + + const right = await perspective.table([ + { user_id: 1, y: "a" }, + { user_id: 2, y: "b" }, + ]); + + const joined = await perspective.join(left, right, "id", { + right_on: "user_id", + }); + const view = await joined.view(); + + await right.update([{ user_id: 1, y: "c" }]); + const json = await view.to_json(); + + expect(json).toHaveLength(3); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 1, x: 10, y: "c" }, + { id: 2, x: 20, y: "b" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("left join with right_on", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + { id: 3, x: 30 }, + ]); + + const right = await perspective.table([ + { user_id: 1, y: "a" }, + { user_id: 2, y: "b" }, + ]); + + const joined = await perspective.join(left, right, "id", { + join_type: "left", + right_on: "user_id", + }); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toHaveLength(3); + expect(json).toEqual([ + { id: 1, x: 10, y: "a" }, + { id: 2, x: 20, y: "b" }, + { id: 3, x: 30, y: null }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); + + test("outer join with right_on", async function () { + const left = await perspective.table([ + { id: 1, x: 10 }, + { id: 2, x: 20 }, + ]); + + const right = await perspective.table([ + { user_id: 2, y: "b" }, + { user_id: 3, y: "c" }, + ]); + + const joined = await perspective.join(left, right, "id", { + join_type: "outer", + right_on: "user_id", + }); + const view = await joined.view(); + const json = await view.to_json(); + + expect(json).toHaveLength(3); + expect(json).toEqual([ + { id: 1, x: 10, y: null }, + { id: 2, x: 20, y: "b" }, + { id: 3, x: null, y: "c" }, + ]); + + view.delete(); + joined.delete(); + right.delete(); + left.delete(); + }); +}); diff --git a/rust/perspective-python/perspective/tests/table/test_join.py b/rust/perspective-python/perspective/tests/table/test_join.py new file mode 100644 index 0000000000..d29ef4b998 --- /dev/null +++ b/rust/perspective-python/perspective/tests/table/test_join.py @@ -0,0 +1,115 @@ +# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +# ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +# ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +# ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +# ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +# ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +# ┃ Copyright (c) 2017, the Perspective Authors. ┃ +# ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +# ┃ This file is part of the Perspective library, distributed under the terms ┃ +# ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +# ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +import perspective as psp + +client = psp.Server().new_local_client() + + +class TestJoin: + def test_inner_join_two_tables(self): + left = client.table( + [{"id": 1, "x": 10}, {"id": 2, "x": 20}, {"id": 3, "x": 30}] + ) + right = client.table( + [{"id": 1, "y": "a"}, {"id": 2, "y": "b"}, {"id": 4, "y": "d"}] + ) + joined = client.join(left, right, "id") + view = joined.view() + json = view.to_json() + assert len(json) == 2 + view.delete() + joined.delete() + right.delete() + left.delete() + + def test_join_has_correct_schema(self): + left = client.table({"id": "integer", "x": "float"}) + right = client.table({"id": "integer", "y": "string"}) + joined = client.join(left, right, "id") + schema = joined.schema() + assert schema == {"id": "integer", "x": "float", "y": "string"} + joined.delete() + right.delete() + left.delete() + + def test_join_reacts_to_left_updates(self): + left = client.table( + [{"id": 1, "x": 10}, {"id": 2, "x": 20}] + ) + right = client.table( + [{"id": 1, "y": "a"}, {"id": 2, "y": "b"}] + ) + joined = client.join(left, right, "id") + view = joined.view() + left.update([{"id": 1, "x": 99}]) + json = view.to_json() + assert json == [ + {"id": 1, "x": 10, "y": "a"}, + {"id": 2, "x": 20, "y": "b"}, + {"id": 1, "x": 99, "y": "a"}, + ] + view.delete() + joined.delete() + right.delete() + left.delete() + + def test_left_join(self): + left = client.table( + [{"id": 1, "x": 10}, {"id": 2, "x": 20}, {"id": 3, "x": 30}] + ) + right = client.table( + [{"id": 1, "y": "a"}, {"id": 2, "y": "b"}] + ) + joined = client.join(left, right, "id", "left") + view = joined.view() + json = view.to_json() + assert len(json) == 3 + view.delete() + joined.delete() + right.delete() + left.delete() + + def test_join_by_table_names(self): + left = client.table( + [{"id": 1, "x": 10}, {"id": 2, "x": 20}], + name="left_py", + ) + right = client.table( + [{"id": 1, "y": "a"}, {"id": 2, "y": "b"}], + name="right_py", + ) + joined = client.join("left_py", "right_py", "id") + view = joined.view() + json = view.to_json() + assert len(json) == 2 + view.delete() + joined.delete() + right.delete() + left.delete() + + def test_join_mixed_table_and_string(self): + left = client.table( + [{"id": 1, "x": 10}, {"id": 2, "x": 20}] + ) + right = client.table( + [{"id": 1, "y": "a"}, {"id": 2, "y": "b"}], + name="right_py_mixed", + ) + joined = client.join(left, "right_py_mixed", "id") + view = joined.view() + json = view.to_json() + assert len(json) == 2 + view.delete() + joined.delete() + right.delete() + left.delete() diff --git a/rust/perspective-python/src/client/client_async.rs b/rust/perspective-python/src/client/client_async.rs index 4138705bae..d53be5b0c1 100644 --- a/rust/perspective-python/src/client/client_async.rs +++ b/rust/perspective-python/src/client/client_async.rs @@ -18,8 +18,8 @@ use std::sync::Arc; use futures::FutureExt; use perspective_client::{ Client, ColumnWindow, DeleteOptions, OnUpdateData, OnUpdateMode, OnUpdateOptions, Table, - TableData, TableInitOptions, TableReadFormat, UpdateData, UpdateOptions, View, ViewWindow, - assert_table_api, assert_view_api, asyncfn, + TableData, TableInitOptions, TableReadFormat, TableRef, UpdateData, UpdateOptions, View, + ViewWindow, assert_table_api, assert_view_api, asyncfn, }; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; @@ -34,6 +34,23 @@ use super::{pandas, polars, pyarrow}; use crate::py_async::{self, AllowThreads}; use crate::py_err::{PyPerspectiveError, ResultTClientErrorExt}; +fn py_to_table_ref_async(val: &Bound<'_, PyAny>) -> PyResult { + if let Ok(t) = val.extract::() { + Ok(TableRef::from(t.table.as_ref())) + } else if let Ok(name) = val.extract::() { + Ok(TableRef::from(name)) + } else { + Err(pyo3::exceptions::PyTypeError::new_err( + "Expected a Table or string table name", + )) + } +} + +fn py_to_table_ref_from_owned(py: Python<'_>, val: &Py) -> PyResult { + let bound = val.bind(py); + py_to_table_ref_async(bound) +} + /// An instance of a [`Client`] is a connection to a single /// `perspective_server::Server`, whether locally in-memory or remote over some /// transport like a WebSocket. @@ -241,20 +258,33 @@ impl AsyncClient { /// # Python Examples /// /// ```python - /// joined = await client.join(orders_table, products_table, "Product ID") + /// joined = await client.join(orders_table, products_table, "Product ID", "left") /// ``` - #[pyo3(signature = (left, right, on, name=None))] + #[pyo3(signature = (left, right, on, join_type=None, name=None, right_on=None))] pub async fn join( &self, - left: AsyncTable, - right: AsyncTable, + left: Py, + right: Py, on: String, + join_type: Option, name: Option, + right_on: Option, ) -> PyResult { - let client = self.client.clone(); + let (left_ref, right_ref) = Python::with_gil(|py| { + let left_ref = py_to_table_ref_from_owned(py, &left)?; + let right_ref = py_to_table_ref_from_owned(py, &right)?; + Ok::<_, PyErr>((left_ref, right_ref)) + })?; + let jt = super::client_sync::parse_join_type(join_type.as_deref())?; + let options = perspective_client::JoinOptions { + join_type: Some(jt), + name, + right_on, + }; let py_client = self.clone(); - let table = client - .join(&left.table, &right.table, &on, name) + let table = self + .client + .join(left_ref, right_ref, &on, options) .await .into_pyerr()?; Ok(AsyncTable { @@ -354,8 +384,8 @@ impl AsyncClient { #[pyclass] #[derive(Clone)] pub struct AsyncTable { - table: Arc
, - client: AsyncClient, + pub(super) table: Arc
, + pub(super) client: AsyncClient, } assert_table_api!(AsyncTable); diff --git a/rust/perspective-python/src/client/client_sync.rs b/rust/perspective-python/src/client/client_sync.rs index 9e3d603455..f0d448bc5b 100644 --- a/rust/perspective-python/src/client/client_sync.rs +++ b/rust/perspective-python/src/client/client_sync.rs @@ -12,19 +12,46 @@ use std::collections::HashMap; use std::future::Future; +use std::sync::Arc; use perspective_client::config::Scalar; +use perspective_client::{JoinType, TableRef, assert_table_api, assert_view_api}; #[cfg(doc)] use perspective_client::{TableInitOptions, UpdateOptions, config::ViewConfigUpdate}; -use perspective_client::{assert_table_api, assert_view_api}; use pyo3::exceptions::PyTypeError; use pyo3::marker::Ungil; use pyo3::prelude::*; use pyo3::types::*; use super::client_async::*; +use crate::py_err::ResultTClientErrorExt; use crate::server::Server; +pub(crate) fn py_to_table_ref(val: &Bound<'_, PyAny>) -> PyResult { + if let Ok(t) = val.downcast::
() { + let table_ref = t.borrow(); + Ok(TableRef::from(&*table_ref.0.table)) + } else if let Ok(name) = val.extract::() { + Ok(TableRef::from(name)) + } else { + Err(PyTypeError::new_err( + "Expected a Table or string table name", + )) + } +} + +pub(crate) fn parse_join_type(join_type: Option<&str>) -> PyResult { + match join_type { + Some("left") => Ok(JoinType::Left), + Some("outer") => Ok(JoinType::Outer), + None | Some("inner") => Ok(JoinType::Inner), + Some(other) => Err(pyo3::exceptions::PyValueError::new_err(format!( + "Unknown join type: \"{}\"", + other + ))), + } +} + pub(crate) fn scalar_to_py(py: Python<'_>, scalar: &Scalar) -> PyObject { match scalar { Scalar::Float(x) => x.into_pyobject(py).unwrap().into_any().unbind(), @@ -173,29 +200,45 @@ impl Client { Ok(Table(table)) } - /// Creates a new read-only [`Table`] by performing an INNER JOIN on two + /// Creates a new read-only [`Table`] by performing a JOIN on two /// source tables. The resulting table is reactive: when either source /// table is updated, the join is automatically recomputed. /// /// # Python Examples /// /// ```python - /// joined = client.join(orders_table, products_table, "Product ID") + /// joined = client.join(orders_table, products_table, "Product ID", "left") /// ``` - #[pyo3(signature = (left, right, on, name=None))] + #[pyo3(signature = (left, right, on, join_type=None, name=None, right_on=None))] + #[allow(clippy::too_many_arguments, reason = "This is a Python API")] pub fn join( &self, py: Python<'_>, - left: &Table, - right: &Table, + left: &Bound<'_, PyAny>, + right: &Bound<'_, PyAny>, on: String, + join_type: Option, name: Option, + right_on: Option, ) -> PyResult
{ - Ok(Table( - self.0 - .join(left.0.clone(), right.0.clone(), on, name) - .py_block_on(py)?, - )) + let left_ref = py_to_table_ref(left)?; + let right_ref = py_to_table_ref(right)?; + let jt = parse_join_type(join_type.as_deref())?; + let options = perspective_client::JoinOptions { + join_type: Some(jt), + name, + right_on, + }; + let table = self + .0 + .client + .join(left_ref, right_ref, &on, options) + .py_block_on(py) + .into_pyerr()?; + Ok(Table(AsyncTable { + table: Arc::new(table), + client: self.0.clone(), + })) } /// Retrieves the names of all tables that this client has access to. diff --git a/rust/perspective-server/cpp/perspective/CMakeLists.txt b/rust/perspective-server/cpp/perspective/CMakeLists.txt index 1d8dd5c8b6..c7c74376bc 100644 --- a/rust/perspective-server/cpp/perspective/CMakeLists.txt +++ b/rust/perspective-server/cpp/perspective/CMakeLists.txt @@ -511,6 +511,7 @@ set(SOURCE_FILES ${PSP_CPP_SRC}/src/cpp/view_config.cpp ${PSP_CPP_SRC}/src/cpp/vocab.cpp ${PSP_CPP_SRC}/src/cpp/arrow_csv.cpp + ${PSP_CPP_SRC}/src/cpp/join_engine.cpp ${PSP_CPP_SRC}/src/cpp/server.cpp ${PSP_CPP_SRC}/src/cpp/binding_api.cpp ) diff --git a/rust/perspective-server/cpp/perspective/src/cpp/join_engine.cpp b/rust/perspective-server/cpp/perspective/src/cpp/join_engine.cpp new file mode 100644 index 0000000000..86b94ffca6 --- /dev/null +++ b/rust/perspective-server/cpp/perspective/src/cpp/join_engine.cpp @@ -0,0 +1,451 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +#include "perspective/join_engine.h" +#include "perspective/column.h" +#include "perspective/data_table.h" +#include "perspective/gnode.h" +#include "perspective/scalar.h" +#include +#include +#include +#include + +namespace perspective::server { + +namespace { + +// Typed column copy for fixed-size types. +template +void +copy_column_typed( + t_column* dst, + const t_column* src, + const std::vector>& matched_rows, + t_uindex num_matched, + bool use_first +) { + for (t_uindex i = 0; i < num_matched; ++i) { + auto src_idx = use_first ? matched_rows[i].first : matched_rows[i].second; + if (src_idx == static_cast(-1)) { + dst->clear(i); + } else { + const T* val = src->get_nth(src_idx); + const t_status* st = + src->is_status_enabled() ? src->get_nth_status(src_idx) + : nullptr; + dst->set_nth(i, *val, st ? *st : STATUS_VALID); + } + } +} + +void +copy_column_str( + t_column* dst, + const t_column* src, + const std::vector>& matched_rows, + t_uindex num_matched, + bool use_first +) { + for (t_uindex i = 0; i < num_matched; ++i) { + auto src_idx = use_first ? matched_rows[i].first : matched_rows[i].second; + if (src_idx == static_cast(-1)) { + dst->clear(i); + } else { + dst->set_scalar(i, src->get_scalar(src_idx)); + } + } +} + +void +copy_column_dispatch( + t_column* dst, + const t_column* src, + const std::vector>& matched_rows, + t_uindex num_matched, + bool use_first +) { + switch (dst->get_dtype()) { + case DTYPE_INT64: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_INT32: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_INT16: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_INT8: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_UINT64: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_UINT32: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_UINT16: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_UINT8: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_FLOAT64: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_FLOAT32: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_BOOL: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_TIME: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_DATE: + copy_column_typed( + dst, src, matched_rows, num_matched, use_first + ); + break; + case DTYPE_STR: + copy_column_str( + dst, src, matched_rows, num_matched, use_first + ); + break; + default: + // Fallback for any other types (OBJECT, F64PAIR, etc.) + for (t_uindex i = 0; i < num_matched; ++i) { + auto src_idx = use_first ? matched_rows[i].first + : matched_rows[i].second; + if (src_idx == static_cast(-1)) { + dst->clear(i); + } else { + dst->set_scalar(i, src->get_scalar(src_idx)); + } + } + break; + } +} + +// Copy the join key column for OUTER join rows where the left side has no +// match but the right side does. Uses the right key column as the source +// for just those rows; all other rows are already filled from the left. +void +copy_join_key_fallback( + t_column* dst, + const t_column* right_key_col, + const std::vector>& matched_rows, + t_uindex num_matched +) { + for (t_uindex i = 0; i < num_matched; ++i) { + if (matched_rows[i].first == static_cast(-1) + && matched_rows[i].second != static_cast(-1)) { + dst->set_scalar( + i, right_key_col->get_scalar(matched_rows[i].second) + ); + } + } +} + +} // anonymous namespace + +void +JoinEngine::register_join( + const t_id& join_table_id, + const t_id& left_table_id, + const t_id& right_table_id, + const std::string& on_column, + const std::string& right_on_column, + proto::JoinType join_type +) { + auto effective_right_on = right_on_column.empty() ? on_column : right_on_column; + JoinDef def{left_table_id, right_table_id, on_column, effective_right_on, join_type}; + m_join_defs.emplace(join_table_id, def); + m_table_to_join_tables.emplace(left_table_id, join_table_id); + m_table_to_join_tables.emplace(right_table_id, join_table_id); +} + +void +JoinEngine::unregister_join(const t_id& join_table_id) { + auto it = m_join_defs.find(join_table_id); + if (it == m_join_defs.end()) { + return; + } + + auto& def = it->second; + + for (const auto& source_id : {def.left_table_id, def.right_table_id}) { + auto range = m_table_to_join_tables.equal_range(source_id); + for (auto jt = range.first; jt != range.second;) { + if (jt->second == join_table_id) { + jt = m_table_to_join_tables.erase(jt); + } else { + ++jt; + } + } + } + + m_join_defs.erase(it); + m_caches.erase(join_table_id); +} + +bool +JoinEngine::is_join_table(const t_id& id) const { + return m_join_defs.contains(id); +} + +std::vector +JoinEngine::get_dependent_join_tables(const t_id& source_table_id) const { + std::vector result; + auto range = m_table_to_join_tables.equal_range(source_table_id); + for (auto it = range.first; it != range.second; ++it) { + result.push_back(it->second); + } + + return result; +} + +const JoinDef& +JoinEngine::get_join_def(const t_id& join_table_id) const { + return m_join_defs.at(join_table_id); +} + +MakeJoinResult +JoinEngine::make_join_table( + const std::string& on_column, + const std::string& right_on_column, + proto::JoinType join_type, + const std::shared_ptr
& left_table, + const std::shared_ptr
& right_table +) { + auto effective_right_on = right_on_column.empty() ? on_column : right_on_column; + auto left_schema = left_table->get_schema(); + auto right_schema = right_table->get_schema(); + if (!left_schema.has_column(on_column)) { + std::stringstream ss; + ss << "Column \"" << on_column << "\" not found in left table"; + return {nullptr, ss.str()}; + } + + if (!right_schema.has_column(effective_right_on)) { + std::stringstream ss; + ss << "Column \"" << effective_right_on << "\" not found in right table"; + return {nullptr, ss.str()}; + } + + if (left_schema.get_dtype(on_column) != right_schema.get_dtype(effective_right_on)) { + return {nullptr, "Join column type mismatch"}; + } + + for (const auto& rcol : right_schema.columns()) { + if (rcol == effective_right_on) { + continue; + } + + if (left_schema.has_column(rcol)) { + std::stringstream ss; + ss << "Column \"" << rcol << "\" exists in both tables"; + return {nullptr, ss.str()}; + } + } + + std::vector merged_columns; + std::vector merged_types; + for (t_uindex i = 0; i < left_schema.columns().size(); ++i) { + merged_columns.push_back(left_schema.columns()[i]); + merged_types.push_back(left_schema.types()[i]); + } + + for (t_uindex i = 0; i < right_schema.columns().size(); ++i) { + if (right_schema.columns()[i] == effective_right_on) { + continue; + } + + merged_columns.push_back(right_schema.columns()[i]); + merged_types.push_back(right_schema.types()[i]); + } + + t_schema merged_schema(merged_columns, merged_types); + auto join_table = Table::from_schema("", merged_schema); + return {join_table, ""}; +} + +void +JoinEngine::build_right_index( + JoinCache& cache, + const std::shared_ptr
& right_table, + const std::string& on_column +) { + auto right_data = right_table->get_gnode()->get_table_sptr(); + const auto& right_pkey_map = right_table->get_gnode()->get_pkey_map(); + auto right_key_col = right_data->get_column(on_column); + cache.right_entries.assign(right_pkey_map.begin(), right_pkey_map.end()); + std::sort( + cache.right_entries.begin(), + cache.right_entries.end(), + [](const auto& a, const auto& b) { return a.first < b.first; } + ); + + cache.right_index.clear(); + cache.right_index.reserve(right_pkey_map.size()); + for (const auto& [pkey, row_idx] : cache.right_entries) { + auto join_key = right_key_col->get_scalar(row_idx); + if (!join_key.is_none()) { + cache.right_index[join_key].push_back(row_idx); + } + } + + cache.valid = true; +} + +void +JoinEngine::recompute( + const t_id& join_table_id, + const std::shared_ptr
& left_table, + const std::shared_ptr
& right_table, + const std::shared_ptr
& join_table, + bool left_changed, + bool right_changed +) { + const auto& def = m_join_defs.at(join_table_id); + auto& cache = m_caches[join_table_id]; + auto left_data = left_table->get_gnode()->get_table_sptr(); + auto right_data = right_table->get_gnode()->get_table_sptr(); + const auto& left_pkey_map = left_table->get_gnode()->get_pkey_map(); + auto left_key_col = left_data->get_column(def.on_column); + auto right_key_col = right_data->get_column(def.right_on_column); + + // Rebuild right-side index only when the right table has changed, + // or on the first recompute when no cache exists yet. + if (right_changed || !cache.valid) { + build_right_index(cache, right_table, def.right_on_column); + } + + const auto& right_join_key_to_rows = cache.right_index; + const auto& right_entries = cache.right_entries; + + // Sort left pkey entries so the join result preserves left-table + // insertion order. + std::vector> left_entries( + left_pkey_map.begin(), left_pkey_map.end() + ); + + std::sort( + left_entries.begin(), + left_entries.end(), + [](const auto& a, const auto& b) { return a.first < b.first; } + ); + + const t_uindex NO_MATCH = static_cast(-1); + std::vector> matched_rows; + matched_rows.reserve(left_entries.size()); + tsl::hopscotch_set matched_right_rows; + for (const auto& [pkey, row_idx] : left_entries) { + auto join_key = left_key_col->get_scalar(row_idx); + if (join_key.is_none()) { + if (def.join_type == proto::LEFT + || def.join_type == proto::OUTER) { + matched_rows.emplace_back(row_idx, NO_MATCH); + } + + continue; + } + + auto it = right_join_key_to_rows.find(join_key); + if (it != right_join_key_to_rows.end()) { + for (auto right_row_idx : it->second) { + matched_rows.emplace_back(row_idx, right_row_idx); + if (def.join_type == proto::OUTER) { + matched_right_rows.insert(right_row_idx); + } + } + } else if (def.join_type == proto::LEFT + || def.join_type == proto::OUTER) { + matched_rows.emplace_back(row_idx, NO_MATCH); + } + } + + if (def.join_type == proto::OUTER) { + for (const auto& [pkey, row_idx] : right_entries) { + if (matched_right_rows.find(row_idx) + == matched_right_rows.end()) { + matched_rows.emplace_back(NO_MATCH, row_idx); + } + } + } + + t_uindex num_matched = matched_rows.size(); + auto join_schema = join_table->get_schema(); + t_data_table joined_data(join_schema); + joined_data.init(); + joined_data.extend(num_matched); + auto left_schema = left_table->get_schema(); + auto right_schema = right_table->get_schema(); + for (const auto& col_name : join_schema.columns()) { + auto dst_col = joined_data.get_column(col_name); + bool is_join_col = (col_name == def.on_column); + if (left_schema.has_column(col_name)) { + auto left_src_col = left_data->get_column(col_name); + copy_column_dispatch( + dst_col.get(), left_src_col.get(), matched_rows, num_matched, true + ); + + if (is_join_col) { + copy_join_key_fallback( + dst_col.get(), right_key_col.get(), matched_rows, num_matched + ); + } + } else if (right_schema.has_column(col_name)) { + auto src_col = right_data->get_column(col_name); + copy_column_dispatch( + dst_col.get(), src_col.get(), matched_rows, num_matched, false + ); + } + } + + joined_data.set_size(num_matched); + auto* pkey_col = joined_data.add_column("psp_pkey", DTYPE_INT32, true); + auto* okey_col = joined_data.add_column("psp_okey", DTYPE_INT32, true); + for (t_uindex i = 0; i < num_matched; ++i) { + pkey_col->set_nth(i, static_cast(i), STATUS_VALID); + okey_col->set_nth(i, static_cast(i), STATUS_VALID); + } + + join_table->clear(); + join_table->init(joined_data, num_matched, t_op::OP_INSERT, 0); +} + +} // namespace perspective::server diff --git a/rust/perspective-server/cpp/perspective/src/cpp/server.cpp b/rust/perspective-server/cpp/perspective/src/cpp/server.cpp index 9cc9e13978..391c449748 100644 --- a/rust/perspective-server/cpp/perspective/src/cpp/server.cpp +++ b/rust/perspective-server/cpp/perspective/src/cpp/server.cpp @@ -561,76 +561,6 @@ ServerResources::mark_all_tables_clean() { m_dirty_tables.clear(); } -void -ServerResources::register_join( - const t_id& join_table_id, - const t_id& left_table_id, - const t_id& right_table_id, - const std::string& on_column -) { - PSP_WRITE_LOCK(m_write_lock); - JoinDef def{left_table_id, right_table_id, on_column}; - m_join_defs.emplace(join_table_id, def); - m_table_to_join_tables.emplace(left_table_id, join_table_id); - m_table_to_join_tables.emplace(right_table_id, join_table_id); - m_readonly_tables.insert(join_table_id); -} - -void -ServerResources::unregister_join(const t_id& join_table_id) { - PSP_WRITE_LOCK(m_write_lock); - auto it = m_join_defs.find(join_table_id); - if (it == m_join_defs.end()) { - return; - } - - auto& def = it->second; - - // Remove from m_table_to_join_tables for both source tables - for (const auto& source_id : {def.left_table_id, def.right_table_id}) { - auto range = m_table_to_join_tables.equal_range(source_id); - for (auto jt = range.first; jt != range.second;) { - if (jt->second == join_table_id) { - jt = m_table_to_join_tables.erase(jt); - } else { - ++jt; - } - } - } - - m_join_defs.erase(it); - m_readonly_tables.erase(join_table_id); -} - -bool -ServerResources::is_join_table(const t_id& id) { - PSP_READ_LOCK(m_write_lock); - return m_join_defs.contains(id); -} - -bool -ServerResources::is_readonly_table(const t_id& id) { - PSP_READ_LOCK(m_write_lock); - return m_readonly_tables.contains(id); -} - -std::vector -ServerResources::get_dependent_join_tables(const t_id& source_table_id) { - PSP_READ_LOCK(m_write_lock); - std::vector result; - auto range = m_table_to_join_tables.equal_range(source_table_id); - for (auto it = range.first; it != range.second; ++it) { - result.push_back(it->second); - } - return result; -} - -ServerResources::JoinDef -ServerResources::get_join_def(const t_id& join_table_id) { - PSP_READ_LOCK(m_write_lock); - return m_join_defs.at(join_table_id); -} - void ServerResources::create_table_on_delete_sub( const t_id& table_id, Subscription sub_id @@ -1773,97 +1703,37 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { auto left_table = m_resources.get_table(r.left_table_id()); auto right_table = m_resources.get_table(r.right_table_id()); - auto left_schema = left_table->get_schema(); - auto right_schema = right_table->get_schema(); - - // Validate join column exists in both tables - if (!left_schema.has_column(r.on_column())) { - proto::Response resp; - auto* err = resp.mutable_server_error()->mutable_message(); - std::stringstream ss; - ss << "Column \"" << r.on_column() - << "\" not found in table \"" << r.left_table_id() << "\""; - *err = ss.str(); - push_resp(std::move(resp)); - break; - } - if (!right_schema.has_column(r.on_column())) { - proto::Response resp; - auto* err = resp.mutable_server_error()->mutable_message(); - std::stringstream ss; - ss << "Column \"" << r.on_column() - << "\" not found in table \"" << r.right_table_id() << "\""; - *err = ss.str(); - push_resp(std::move(resp)); - break; - } + auto result = m_join_engine.make_join_table( + r.on_column(), r.right_on_column(), r.join_type(), left_table, right_table + ); - // Validate type match - if (left_schema.get_dtype(r.on_column()) - != right_schema.get_dtype(r.on_column())) { + if (!result.ok()) { proto::Response resp; - auto* err = resp.mutable_server_error()->mutable_message(); - *err = "Join column type mismatch"; + *resp.mutable_server_error()->mutable_message() = result.error; push_resp(std::move(resp)); break; } - // Check for column name conflicts (excluding join key) - bool has_conflict = false; - for (const auto& rcol : right_schema.columns()) { - if (rcol == r.on_column()) { - continue; - } - if (left_schema.has_column(rcol)) { - proto::Response resp; - auto* err = resp.mutable_server_error()->mutable_message(); - std::stringstream ss; - ss << "Column \"" << rcol << "\" exists in both tables"; - *err = ss.str(); - push_resp(std::move(resp)); - has_conflict = true; - break; - } - } - if (has_conflict) { - break; - } - - // Build merged schema: all left columns + right columns - // (excluding join key from right) - std::vector merged_columns; - std::vector merged_types; - for (t_uindex i = 0; i < left_schema.columns().size(); ++i) { - merged_columns.push_back(left_schema.columns()[i]); - merged_types.push_back(left_schema.types()[i]); - } - for (t_uindex i = 0; i < right_schema.columns().size(); ++i) { - if (right_schema.columns()[i] == r.on_column()) { - continue; - } - merged_columns.push_back(right_schema.columns()[i]); - merged_types.push_back(right_schema.types()[i]); - } - - t_schema merged_schema(merged_columns, merged_types); - auto join_table = - Table::from_schema(r.on_column(), merged_schema); - - m_resources.host_table(entity_id, join_table); - m_resources.register_join( + m_resources.host_table(entity_id, result.table); + m_join_engine.register_join( entity_id, r.left_table_id(), r.right_table_id(), - r.on_column() + r.on_column(), + r.right_on_column(), + r.join_type() ); // Compute initial join - _recompute_join(entity_id, proto_resp); + m_join_engine.recompute( + entity_id, left_table, right_table, result.table + ); // Process the join table so its gnode state is up to date auto jt = m_resources.get_table(entity_id); jt->get_pool()->_process(); + m_resources.mark_table_dirty(entity_id); m_resources.mark_table_clean(entity_id); proto::Response resp; @@ -1984,7 +1854,7 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { break; } case proto::Request::kTableReplaceReq: { - if (m_resources.is_readonly_table(req.entity_id())) { + if (m_join_engine.is_join_table(req.entity_id())) { proto::Response resp; *resp.mutable_server_error()->mutable_message() = "Cannot update a read-only join table"; @@ -2025,7 +1895,7 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { break; } case proto::Request::kTableRemoveReq: { - if (m_resources.is_readonly_table(req.entity_id())) { + if (m_join_engine.is_join_table(req.entity_id())) { proto::Response resp; *resp.mutable_server_error()->mutable_message() = "Cannot update a read-only join table"; @@ -2061,7 +1931,7 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { break; } case proto::Request::kTableUpdateReq: { - if (m_resources.is_readonly_table(req.entity_id())) { + if (m_join_engine.is_join_table(req.entity_id())) { proto::Response resp; *resp.mutable_server_error()->mutable_message() = "Cannot update a read-only join table"; @@ -2863,9 +2733,9 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { case proto::Request::kTableDeleteReq: { // Prevent deleting a source table that feeds a join table auto dependents = - m_resources.get_dependent_join_tables(req.entity_id()); + m_join_engine.get_dependent_join_tables(req.entity_id()); if (!dependents.empty() - && !m_resources.is_join_table(req.entity_id())) { + && !m_join_engine.is_join_table(req.entity_id())) { proto::Response resp; std::stringstream ss; ss << "Cannot delete table: it is a source for join table \"" @@ -2876,8 +2746,8 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { } // If this is a join table being deleted, clean up join metadata - if (m_resources.is_join_table(req.entity_id())) { - m_resources.unregister_join(req.entity_id()); + if (m_join_engine.is_join_table(req.entity_id())) { + m_join_engine.unregister_join(req.entity_id()); } const auto is_immediate = req.table_delete_req().is_immediate(); @@ -3151,125 +3021,6 @@ ProtoServer::_handle_request(std::uint32_t client_id, Request&& req) { return proto_resp; } -void -ProtoServer::_recompute_join( - const ServerResources::t_id& join_table_id, - std::vector>& outs -) { - auto def = m_resources.get_join_def(join_table_id); - auto left_table = m_resources.get_table(def.left_table_id); - auto right_table = m_resources.get_table(def.right_table_id); - auto join_table = m_resources.get_table(join_table_id); - - // Get the raw master data tables and pkey maps - auto left_data = left_table->get_gnode()->get_table_sptr(); - auto right_data = right_table->get_gnode()->get_table_sptr(); - const auto& left_pkey_map = left_table->get_gnode()->get_pkey_map(); - const auto& right_pkey_map = right_table->get_gnode()->get_pkey_map(); - - // Get the join key columns from both tables - auto left_key_col = left_data->get_column(def.on_column); - auto right_key_col = right_data->get_column(def.on_column); - - // Build multimap: join_key_value → list of right row indices, - // sorted by pkey so that right-side ordering is deterministic. - tsl::hopscotch_map> right_join_key_to_rows; - right_join_key_to_rows.reserve(right_pkey_map.size()); - { - // Sort right pkey entries so rows are grouped in insertion order. - std::vector> right_entries( - right_pkey_map.begin(), right_pkey_map.end() - ); - std::sort(right_entries.begin(), right_entries.end(), - [](const auto& a, const auto& b) { return a.first < b.first; } - ); - for (const auto& [pkey, row_idx] : right_entries) { - auto join_key = right_key_col->get_scalar(row_idx); - if (!join_key.is_none()) { - right_join_key_to_rows[join_key].push_back(row_idx); - } - } - } - - // Sort left pkey entries so the join result preserves left-table - // insertion order (pkeys are auto-incremented integers for non-indexed - // tables, so sorting by pkey gives insertion order). - std::vector> left_entries( - left_pkey_map.begin(), left_pkey_map.end() - ); - std::sort(left_entries.begin(), left_entries.end(), - [](const auto& a, const auto& b) { return a.first < b.first; } - ); - - // Find matching rows by iterating left rows in order. - // For each left row, pair it with every matching right row (cross product - // per key value) to handle duplicate join keys in non-indexed tables. - std::vector> matched_rows; - matched_rows.reserve(left_entries.size()); - for (const auto& [pkey, row_idx] : left_entries) { - auto join_key = left_key_col->get_scalar(row_idx); - if (join_key.is_none()) { - continue; - } - auto it = right_join_key_to_rows.find(join_key); - if (it != right_join_key_to_rows.end()) { - for (auto right_row_idx : it->second) { - matched_rows.emplace_back(row_idx, right_row_idx); - } - } - } - - t_uindex num_matched = matched_rows.size(); - - // Build the joined schema and data table - auto join_schema = join_table->get_schema(); - t_data_table joined_data(join_schema); - joined_data.init(); - joined_data.extend(num_matched); - - auto left_schema = left_table->get_schema(); - auto right_schema = right_table->get_schema(); - - // Copy data column-by-column - for (const auto& col_name : join_schema.columns()) { - auto dst_col = joined_data.get_column(col_name); - if (left_schema.has_column(col_name)) { - auto src_col = left_data->get_column(col_name); - for (t_uindex i = 0; i < num_matched; ++i) { - dst_col->set_scalar( - i, src_col->get_scalar(matched_rows[i].first) - ); - } - } else if (right_schema.has_column(col_name)) { - auto src_col = right_data->get_column(col_name); - for (t_uindex i = 0; i < num_matched; ++i) { - dst_col->set_scalar( - i, src_col->get_scalar(matched_rows[i].second) - ); - } - } - } - - joined_data.set_size(num_matched); - - // Add psp_pkey and psp_okey columns with synthetic integer keys. - // We cannot use the join column as pkey because duplicate join key - // values (from non-indexed source tables) would cause rows to collapse. - auto* pkey_col = joined_data.add_column("psp_pkey", DTYPE_INT32, true); - auto* okey_col = joined_data.add_column("psp_okey", DTYPE_INT32, true); - for (t_uindex i = 0; i < num_matched; ++i) { - t_tscalar key; - key.set(static_cast(i)); - pkey_col->set_scalar(i, key); - okey_col->set_scalar(i, key); - } - - // Clear the join table and push the new data - join_table->clear(); - join_table->init(joined_data, num_matched, t_op::OP_INSERT, 0); - m_resources.mark_table_dirty(join_table_id); -} - std::vector> ProtoServer::_poll() { std::vector> resp_envs; @@ -3280,12 +3031,20 @@ ProtoServer::_poll() { m_resources.mark_all_tables_clean(); + // Build the set of dirty source table IDs so we can tell the join + // engine which side(s) changed, allowing it to skip rebuilding the + // right-side index when only the left table was updated. + tsl::hopscotch_set dirty_ids; + for (auto& [_, table_id] : tables) { + dirty_ids.insert(table_id); + } + // Recompute join tables whose sources were dirty, using a worklist // to handle chained joins (join of join) in dependency order. tsl::hopscotch_set processed_joins; std::vector worklist; for (auto& [_, table_id] : tables) { - auto dependents = m_resources.get_dependent_join_tables(table_id); + auto dependents = m_join_engine.get_dependent_join_tables(table_id); for (auto& join_id : dependents) { if (processed_joins.find(join_id) == processed_joins.end()) { worklist.push_back(join_id); @@ -3300,13 +3059,29 @@ ProtoServer::_poll() { continue; } - _recompute_join(join_id, resp_envs); + const auto& def = m_join_engine.get_join_def(join_id); + bool left_changed = dirty_ids.contains(def.left_table_id); + bool right_changed = dirty_ids.contains(def.right_table_id); + auto left_table = m_resources.get_table(def.left_table_id); + auto right_table = m_resources.get_table(def.right_table_id); auto join_table = m_resources.get_table(join_id); + m_join_engine.recompute( + join_id, + left_table, + right_table, + join_table, + left_changed, + right_changed + ); + _process_table_unchecked(join_table, join_id, resp_envs); m_resources.mark_table_clean(join_id); + // The recomputed join table is itself "dirty" for chained joins. + dirty_ids.insert(join_id); + // Check for chained joins (join tables that depend on this join) - auto chained = m_resources.get_dependent_join_tables(join_id); + auto chained = m_join_engine.get_dependent_join_tables(join_id); for (auto& chained_id : chained) { if (processed_joins.find(chained_id) == processed_joins.end()) { worklist.push_back(chained_id); diff --git a/rust/perspective-server/cpp/perspective/src/include/perspective/join_engine.h b/rust/perspective-server/cpp/perspective/src/include/perspective/join_engine.h new file mode 100644 index 0000000000..064d05bb13 --- /dev/null +++ b/rust/perspective-server/cpp/perspective/src/include/perspective/join_engine.h @@ -0,0 +1,101 @@ +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ +// ┃ This file is part of the Perspective library, distributed under the terms ┃ +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ + +#pragma once + +#include "perspective/exports.h" +#include "perspective/raw_types.h" +#include "perspective/scalar.h" +#include "perspective/schema.h" +#include "perspective/table.h" +#include +#include +#include +#include +#include +#include + +namespace perspective::server { + +struct JoinDef { + std::string left_table_id; + std::string right_table_id; + std::string on_column; + std::string right_on_column; + proto::JoinType join_type; +}; + +struct MakeJoinResult { + std::shared_ptr
table; + std::string error; + + bool + ok() const { + return error.empty(); + } +}; + +struct JoinCache { + tsl::hopscotch_map> right_index; + std::vector> right_entries; + bool valid = false; +}; + +class PERSPECTIVE_EXPORT JoinEngine { +public: + using t_id = std::string; + + void register_join( + const t_id& join_table_id, + const t_id& left_table_id, + const t_id& right_table_id, + const std::string& on_column, + const std::string& right_on_column, + proto::JoinType join_type + ); + + void unregister_join(const t_id& join_table_id); + bool is_join_table(const t_id& id) const; + std::vector get_dependent_join_tables(const t_id& source_table_id + ) const; + const JoinDef& get_join_def(const t_id& join_table_id) const; + + MakeJoinResult make_join_table( + const std::string& on_column, + const std::string& right_on_column, + proto::JoinType join_type, + const std::shared_ptr
& left_table, + const std::shared_ptr
& right_table + ); + + void recompute( + const t_id& join_table_id, + const std::shared_ptr
& left_table, + const std::shared_ptr
& right_table, + const std::shared_ptr
& join_table, + bool left_changed = true, + bool right_changed = true + ); + +private: + void build_right_index( + JoinCache& cache, + const std::shared_ptr
& right_table, + const std::string& on_column + ); + + tsl::hopscotch_map m_join_defs; + tsl::hopscotch_map m_caches; + std::multimap m_table_to_join_tables; +}; + +} // namespace perspective::server diff --git a/rust/perspective-server/cpp/perspective/src/include/perspective/server.h b/rust/perspective-server/cpp/perspective/src/include/perspective/server.h index 119a63c466..20899371c2 100644 --- a/rust/perspective-server/cpp/perspective/src/include/perspective/server.h +++ b/rust/perspective-server/cpp/perspective/src/include/perspective/server.h @@ -14,6 +14,7 @@ #include "perspective/base.h" #include "perspective/exports.h" +#include "perspective/join_engine.h" #include "perspective/raw_types.h" #include "perspective/schema.h" #include "perspective/view.h" @@ -593,25 +594,6 @@ namespace server { std::uint32_t sub_id, std::uint32_t client_id ); - // Join table tracking - struct JoinDef { - t_id left_table_id; - t_id right_table_id; - std::string on_column; - }; - - void register_join( - const t_id& join_table_id, - const t_id& left_table_id, - const t_id& right_table_id, - const std::string& on_column - ); - void unregister_join(const t_id& join_table_id); - bool is_join_table(const t_id& id); - bool is_readonly_table(const t_id& id); - std::vector get_dependent_join_tables(const t_id& source_table_id); - JoinDef get_join_def(const t_id& join_table_id); - void mark_table_dirty(const t_id& id); void mark_table_clean(const t_id& id); void mark_all_tables_clean(); @@ -649,11 +631,6 @@ namespace server { tsl::hopscotch_set m_dirty_tables; tsl::hopscotch_map m_deleted_tables; - // Join dependency tracking - tsl::hopscotch_map m_join_defs; - std::multimap m_table_to_join_tables; - tsl::hopscotch_set m_readonly_tables; - #ifdef PSP_PARALLEL_FOR std::shared_mutex m_write_lock; #endif @@ -700,17 +677,13 @@ namespace server { std::vector>& outs ); - void _recompute_join( - const ServerResources::t_id& join_table_id, - std::vector>& outs - ); - static std::uint32_t m_client_id; bool m_realtime_mode; std::atomic m_cpu_time_start; std::atomic m_cpu_time; ServerResources m_resources; + JoinEngine m_join_engine; t_computed_expression_parser m_computed_expression_parser; }; diff --git a/rust/perspective-viewer/src/rust/lib.rs b/rust/perspective-viewer/src/rust/lib.rs index a8d07d74cf..6eb752e264 100644 --- a/rust/perspective-viewer/src/rust/lib.rs +++ b/rust/perspective-viewer/src/rust/lib.rs @@ -67,6 +67,7 @@ import type { ColumnWindow, ViewWindow, OnUpdateOptions, + JoinOptions, UpdateOptions, DeleteOptions, ViewConfigUpdate, From 79009b438d965a0bc5e909eb1a5038a4422ab595 Mon Sep 17 00:00:00 2001 From: Andrew Stein Date: Thu, 2 Apr 2026 00:02:44 -0400 Subject: [PATCH 3/3] Docs and benchmarks Signed-off-by: Andrew Stein --- docs/md/SUMMARY.md | 6 +++ docs/md/explanation/join.md | 12 +++++ docs/md/explanation/join/join_types.md | 25 ++++++++++ docs/md/explanation/join/options.md | 35 ++++++++++++++ docs/md/explanation/join/reactivity.md | 46 ++++++++++++++++++ docs/md/how_to/javascript/join.md | 65 ++++++++++++++++++++++++++ docs/md/how_to/python/join.md | 64 +++++++++++++++++++++++++ docs/md/how_to/rust.md | 61 ++++++++++++++++++++++++ tools/bench/basic_suite.mjs | 1 + tools/bench/cross_platform_suite.mjs | 48 ++++++++++++++++++- tools/bench/puppeteer_suite.mjs | 1 + tools/bench/python_suite.mjs | 1 + 12 files changed, 363 insertions(+), 2 deletions(-) create mode 100644 docs/md/explanation/join.md create mode 100644 docs/md/explanation/join/join_types.md create mode 100644 docs/md/explanation/join/options.md create mode 100644 docs/md/explanation/join/reactivity.md create mode 100644 docs/md/how_to/javascript/join.md create mode 100644 docs/md/how_to/python/join.md diff --git a/docs/md/SUMMARY.md b/docs/md/SUMMARY.md index fb43a0a599..d885d160c3 100644 --- a/docs/md/SUMMARY.md +++ b/docs/md/SUMMARY.md @@ -22,6 +22,10 @@ - [Selection and Ordering](./explanation/view/config/selection_and_ordering.md) - [`expressions`](./explanation/view/config/expressions.md) - [Advanced View Operations](./explanation/view/advanced.md) +- [`Join`](./explanation/join.md) + - [Join Types](./explanation/join/join_types.md) + - [Join Options](./explanation/join/options.md) + - [Reactivity and Constraints](./explanation/join/reactivity.md) # JavaScript @@ -32,6 +36,7 @@ - [Cleaning up resources](./how_to/javascript/deleting.md) - [Hosting a `WebSocketServer` in Node.js](./how_to/javascript/nodejs_server.md) - [Customizing `perspective.worker()`](./how_to/javascript/custom_worker.md) + - [Joining Tables](./how_to/javascript/join.md) - [`perspective-viewer` Custom Element library](./how_to/javascript/viewer.md) - [Loading data](./how_to/javascript/loading_data.md) - [Theming](./how_to/javascript/theming.md) @@ -52,6 +57,7 @@ - [Callbacks and events](./how_to/python/callbacks.md) - [Multithreading](./how_to/python/multithreading.md) - [Hosting a WebSocket server](./how_to/python/websocket.md) +- [Joining Tables](./how_to/python/join.md) - [`PerspectiveWidget` for JupyterLab](./how_to/python/jupyterlab.md) - [Virtual Servers](./how_to/python/virtual_server.md) - [DuckDB](./how_to/python/virtual_server/duckdb.md) diff --git a/docs/md/explanation/join.md b/docs/md/explanation/join.md new file mode 100644 index 0000000000..cf119795d0 --- /dev/null +++ b/docs/md/explanation/join.md @@ -0,0 +1,12 @@ +# Join + +`Client::join` creates a read-only `Table` by joining two source tables on a +shared key column. The `left` and `right` arguments can be `Table` objects or +string table names (as returned by `get_hosted_table_names()`). The resulting +table is _reactive_: whenever either source table is updated, the join is +automatically recomputed and any `View` derived from the joined table will +update accordingly. + +Joined tables support the full `View` API — you can apply `group_by`, +`split_by`, `sort`, `filter`, `expressions`, and all other `View` operations on +the result, just as you would with any other `Table`. diff --git a/docs/md/explanation/join/join_types.md b/docs/md/explanation/join/join_types.md new file mode 100644 index 0000000000..a64dc5bf72 --- /dev/null +++ b/docs/md/explanation/join/join_types.md @@ -0,0 +1,25 @@ +# Join Types + +`Client::join` supports three join types, specified via the `join_type` option. +The default is `"inner"`. + +## Inner Join (default) + +An inner join includes only rows where the key column exists in _both_ source +tables. Rows from either table that have no match in the other are excluded. + +## Left Join + +A left join includes all rows from the left table. For left rows that have no +match in the right table, right-side columns are filled with `null`. + +## Outer Join + +An outer join includes all rows from both tables. Unmatched rows on either side +have their missing columns filled with `null`. + +| `join_type` | Left-only rows | Right-only rows | +| ----------- | -------------- | --------------- | +| `"inner"` | excluded | excluded | +| `"left"` | included | excluded | +| `"outer"` | included | included | diff --git a/docs/md/explanation/join/options.md b/docs/md/explanation/join/options.md new file mode 100644 index 0000000000..37aa87c14f --- /dev/null +++ b/docs/md/explanation/join/options.md @@ -0,0 +1,35 @@ +# Join Options + +## `on` — Join Key Column + +The `on` parameter specifies the column name used to match rows between the left +and right tables. This column must exist in the left table and, by default, must +also exist in the right table with the same name and compatible type. + +The join key column becomes the index of the resulting table. + +## `right_on` — Different Right Key Column + +When the join key has a different name in the right table, use `right_on` to +specify the right table's column name. The left table's column name (`on`) is +used in the output schema; the right key column is excluded from the result. + +The `on` and `right_on` columns must have compatible types. An error is thrown +if the types do not match. + +## `join_type` — Join Type + +Controls which rows are included in the result. See +[Join Types](./join_types.md) for details. + +| Value | Behavior | +| ----------- | ----------------------------------------------------- | +| `"inner"` | Only rows with matching keys in both tables (default) | +| `"left"` | All left rows; unmatched right columns are `null` | +| `"outer"` | All rows from both tables; unmatched columns are `null` | + +## `name` — Table Name + +An optional name for the resulting joined table. If omitted, a random name is +generated. This name is used to identify the table in the server's hosted table +registry. diff --git a/docs/md/explanation/join/reactivity.md b/docs/md/explanation/join/reactivity.md new file mode 100644 index 0000000000..13c89b1ea5 --- /dev/null +++ b/docs/md/explanation/join/reactivity.md @@ -0,0 +1,46 @@ +# Reactivity and Constraints + +## Reactive Updates + +Joined tables are fully reactive. When either source table receives an +`update()`, the join is automatically recomputed and any `View` created from the +joined table will reflect the new data. This includes: + +- Updates that modify existing rows in either source table. +- New rows added to either source table that create new matches. +- Chained joins — if a joined table is itself used as input to another join, + updates propagate through the entire chain. + +## Duplicate Keys + +Like SQL, `join()` produces a cross-product for each matching key value. When +multiple rows in the left table share the same key, each is paired with every +matching row in the right table (and vice versa). The number of output rows for +a given key is `left_count × right_count`. + +This behavior depends on whether the source tables are _indexed_: + +- **Unindexed tables** (no `index` option) — rows are appended, so duplicate + keys accumulate naturally. Each `update()` appends new rows, which may + introduce additional duplicates. +- **Indexed tables** (`index` set to the join key) — each key appears at most + once per table, so the join produces at most one row per key. Updates replace + existing rows in-place rather than appending. + +## Read-Only + +Joined tables are read-only. Calling `update()`, `remove()`, `clear()`, or +`replace()` on a joined table will throw an error. Data can only change +indirectly, by updating the source tables. + +## Column Name Conflicts + +The left and right tables must not have overlapping column names (other than the +join key). If a non-key column name appears in both tables, `join()` throws an +error. Rename columns in your source data or use `View` expressions to avoid +conflicts. + +## Source Table Deletion + +A source table cannot be deleted while a joined table depends on it. You must +delete the joined table first, then delete the source tables. diff --git a/docs/md/how_to/javascript/join.md b/docs/md/how_to/javascript/join.md new file mode 100644 index 0000000000..aba7932f81 --- /dev/null +++ b/docs/md/how_to/javascript/join.md @@ -0,0 +1,65 @@ +# Joining Tables + +`perspective.join()` creates a read-only `Table` by joining two source tables on +a shared key column. The result is reactive — it updates automatically when +either source table changes. See [`Join`](../../explanation/join.md) for +conceptual details. + +## Basic Inner Join + +```javascript +const orders = await perspective.table([ + { id: 1, product_id: 101, qty: 5 }, + { id: 2, product_id: 102, qty: 3 }, + { id: 3, product_id: 101, qty: 7 }, +]); + +const products = await perspective.table([ + { product_id: 101, name: "Widget" }, + { product_id: 102, name: "Gadget" }, +]); + +const joined = await perspective.join(orders, products, "product_id"); +const view = await joined.view(); +const json = await view.to_json(); +// [ +// { product_id: 101, id: 1, qty: 5, name: "Widget" }, +// { product_id: 101, id: 3, qty: 7, name: "Widget" }, +// { product_id: 102, id: 2, qty: 3, name: "Gadget" }, +// ] +``` + +## Join Types + +Pass `join_type` in the options to select inner, left, or outer join behavior: + +```javascript +// Left join: all left rows, nulls for unmatched right columns +const left_joined = await perspective.join(left, right, "id", { + join_type: "left", +}); + +// Outer join: all rows from both tables +const outer_joined = await perspective.join(left, right, "id", { + join_type: "outer", +}); +``` + +## Reactive Updates + +The joined table recomputes automatically when either source table is updated: + +```javascript +const left = await perspective.table([{ id: 1, x: 10 }]); +const right = await perspective.table([{ id: 2, y: "b" }]); + +const joined = await perspective.join(left, right, "id"); +const view = await joined.view(); + +let json = await view.to_json(); +// [] — no matching keys yet + +await right.update([{ id: 1, y: "a" }]); +json = await view.to_json(); +// [{ id: 1, x: 10, y: "a" }] — new match detected +``` diff --git a/docs/md/how_to/python/join.md b/docs/md/how_to/python/join.md new file mode 100644 index 0000000000..4f1c1ba3bc --- /dev/null +++ b/docs/md/how_to/python/join.md @@ -0,0 +1,64 @@ +# Joining Tables + +`perspective.join()` creates a read-only `Table` by joining two source tables on +a shared key column. The result is reactive — it updates automatically when +either source table changes. See [`Join`](../../explanation/join.md) for +conceptual details. + +## Basic Inner Join + +```python +orders = perspective.table([ + {"id": 1, "product_id": 101, "qty": 5}, + {"id": 2, "product_id": 102, "qty": 3}, + {"id": 3, "product_id": 101, "qty": 7}, +]) + +products = perspective.table([ + {"product_id": 101, "name": "Widget"}, + {"product_id": 102, "name": "Gadget"}, +]) + +joined = perspective.join(orders, products, "product_id") +view = joined.view() +json = view.to_json() +``` + +## Join Types + +Pass `join_type` to select inner, left, or outer join behavior: + +```python +# Left join: all left rows, nulls for unmatched right columns +left_joined = perspective.join(left, right, "id", join_type="left") + +# Outer join: all rows from both tables +outer_joined = perspective.join(left, right, "id", join_type="outer") +``` + +## Reactive Updates + +The joined table recomputes automatically when either source table is updated: + +```python +left = perspective.table([{"id": 1, "x": 10}]) +right = perspective.table([{"id": 2, "y": "b"}]) + +joined = perspective.join(left, right, "id") +view = joined.view() + +json = view.to_json() +# [] — no matching keys yet + +right.update([{"id": 1, "y": "a"}]) +json = view.to_json() +# [{"id": 1, "x": 10, "y": "a"}] — new match detected +``` + +## Async Client + +The async client has the same API: + +```python +joined = await client.join(orders, products, "product_id", join_type="left") +``` diff --git a/docs/md/how_to/rust.md b/docs/md/how_to/rust.md index 837deaea9d..3a7331cf71 100644 --- a/docs/md/how_to/rust.md +++ b/docs/md/how_to/rust.md @@ -26,3 +26,64 @@ let mut options = TableInitOptions::default(); options.set_name("my_data_source"); client.table(data.into(), options).await?; ``` + +# Joining Tables + +`Client::join` creates a read-only `Table` by joining two source tables on a +shared key column. The result is reactive — it updates automatically when +either source table changes. See [`Join`](../explanation/join.md) for +conceptual details. + +```rust +let orders = client.table( + TableData::Update(UpdateData::JsonRows( + "[{\"id\":1,\"product_id\":101,\"qty\":5},{\"id\":2,\"product_id\":102,\"qty\":3}]".into(), + )), + TableInitOptions::default(), +).await?; + +let products = client.table( + TableData::Update(UpdateData::JsonRows( + "[{\"product_id\":101,\"name\":\"Widget\"},{\"product_id\":102,\"name\":\"Gadget\"}]".into(), + )), + TableInitOptions::default(), +).await?; + +let joined = client.join( + (&orders).into(), + (&products).into(), + "product_id", + JoinOptions::default(), +).await?; + +let view = joined.view(None).await?; +let json = view.to_json().await?; +``` + +Use `JoinOptions` to configure the join type, table name, or `right_on` column: + +```rust +let options = JoinOptions { + join_type: Some(JoinType::Left), + name: Some("orders_with_products".into()), + right_on: None, +}; + +let joined = client.join( + (&orders).into(), + (&products).into(), + "product_id", + options, +).await?; +``` + +You can also join by table name strings instead of `Table` references: + +```rust +let joined = client.join( + "orders".into(), + "products".into(), + "product_id", + JoinOptions::default(), +).await?; +``` diff --git a/tools/bench/basic_suite.mjs b/tools/bench/basic_suite.mjs index fa8d18ea3c..8fa6c40a05 100644 --- a/tools/bench/basic_suite.mjs +++ b/tools/bench/basic_suite.mjs @@ -70,5 +70,6 @@ perspective_bench.suite( await all_benchmarks.table_suite(client, metadata); await all_benchmarks.view_suite(client, metadata); await all_benchmarks.to_data_suite(client, metadata); + await all_benchmarks.join_suite(client, metadata); }, ); diff --git a/tools/bench/cross_platform_suite.mjs b/tools/bench/cross_platform_suite.mjs index af171b4c19..8e5a7a335d 100644 --- a/tools/bench/cross_platform_suite.mjs +++ b/tools/bench/cross_platform_suite.mjs @@ -16,6 +16,50 @@ import { new_superstore_table, } from "./src/js/superstore.mjs"; +export async function join_suite(perspective, metadata) { + if (check_version_gte(metadata.version, "4.3.0")) { + async function before_all() { + const left = await perspective.table( + new_superstore_table(metadata), + ); + + const columns = await left.columns(); + const expressions = Object.fromEntries( + columns + .filter((x) => x !== "Row ID") + .map((x) => [`${x}_2`, `"${x}"`]), + ); + + const view = await left.view({ + columns: ["Row ID", ...Object.keys(expressions)], + expressions, + }); + + const right = await perspective.table(await view.to_arrow()); + await view.delete(); + return { left, right }; + } + + async function after_all({ left, right }) { + await left.delete(); + await right.delete(); + } + + await benchmark({ + name: `.join()`, + before_all, + after_all, + metadata, + async after(_, joined) { + await joined.delete(); + }, + async test({ left, right }) { + return await perspective.join(left, right, "Row ID"); + }, + }); + } +} + export async function to_data_suite(perspective, metadata) { async function before_all() { const table = await perspective.table(new_superstore_table(metadata)); @@ -152,7 +196,7 @@ export async function view_suite(perspective, metadata) { async test({ table, schema }) { const columns = ["Sales", "Quantity", "City"]; const aggregates = Object.fromEntries( - Object.keys(schema).map((x) => [x, "median"]) + Object.keys(schema).map((x) => [x, "median"]), ); if (check_version_gte(metadata.version, "1.2.0")) { @@ -176,7 +220,7 @@ export async function table_suite(perspective, metadata) { async function before_all() { try { const table = await perspective.table( - new_superstore_table(metadata) + new_superstore_table(metadata), ); const view = await table.view(); diff --git a/tools/bench/puppeteer_suite.mjs b/tools/bench/puppeteer_suite.mjs index 33ff9b5cf6..ede70f4ac4 100644 --- a/tools/bench/puppeteer_suite.mjs +++ b/tools/bench/puppeteer_suite.mjs @@ -80,5 +80,6 @@ perspective_bench.suite( await test_suite("table_suite"); await test_suite("view_suite"); await test_suite("to_data_suite"); + await test_suite("join_suite"); }, ); diff --git a/tools/bench/python_suite.mjs b/tools/bench/python_suite.mjs index a6b1719c28..f43cd8c10c 100644 --- a/tools/bench/python_suite.mjs +++ b/tools/bench/python_suite.mjs @@ -64,6 +64,7 @@ perspective_bench.suite( await all_benchmarks.table_suite(client, metadata); await all_benchmarks.view_suite(client, metadata); await all_benchmarks.to_data_suite(client, metadata); + await all_benchmarks.join_suite(client, metadata); }, python.start, python.stop,