diff --git a/doc/pg_clickhouse.md b/doc/pg_clickhouse.md index 78317d1..2bbbc90 100644 --- a/doc/pg_clickhouse.md +++ b/doc/pg_clickhouse.md @@ -310,7 +310,7 @@ types: Int32 | integer | Int64 | bigint | Int8 | smallint | - JSON | json | HTTP engine only + JSON | jsonb | HTTP engine only String | text | UInt16 | integer | UInt32 | bigint | @@ -477,29 +477,29 @@ Set the `pg_clickhouse.session_settings` runtime parameter to configure [ClickHouse settings] to be set on subsequent queries. Example: ```sql -SET pg_clickhouse.session_settings = 'join_use_nulls=1, final=1'; +SET pg_clickhouse.session_settings = 'join_use_nulls 1, final 1'; ``` -The default is `join_use_nulls=1`. Set it to an empty string to fall back on +The default is `join_use_nulls 1`. Set it to an empty string to fall back on the ClickHouse server's settings. ```sql SET pg_clickhouse.session_settings = ''; ``` -The syntax is a comma-delimited list of key/value pairs separated by an equal -sign. Keys must correspond to [ClickHouse settings]. Escape spaces, commas, -and backslashes in values with a backslash: +The syntax is a comma-delimited list of key/value pairs separated by one or +more spaces. Keys must correspond to [ClickHouse settings]. Escape spaces, +commas, and backslashes in values with a backslash: ```sql -SET pg_clickhouse.session_settings = 'join_algorithm = grace_hash\,hash'; +SET pg_clickhouse.session_settings = 'join_algorithm grace_hash\,hash'; ``` Or use single quoted values to avoid escaping spaces and commas; consider using [dollar quoting] to avoid the need to double-quote: ```sql -SET pg_clickhouse.session_settings = $$join_algorithm = 'grace_hash,hash'$$; +SET pg_clickhouse.session_settings = $$join_algorithm 'grace_hash,hash'$$; ``` If you care about legibility and need to set many settings, use multiple @@ -507,19 +507,20 @@ lines, for example: ```sql SET pg_clickhouse.session_settings TO $$ - connect_timeout = 2, - count_distinct_implementation = uniq, - groupby_use_nulls = 0, - join_algorithm = 'prefer_partial_merge', - join_use_nulls = 1, - log_queries_min_type = QUERY_FINISH, - max_block_size = 32768, - max_execution_time = 45, - max_result_rows = 1024, - metrics_perf_events_list = 'this,that', - network_compression_method = ZSTD, - poll_interval = 5, - totals_mode = after_having_auto + connect_timeout 2, + count_distinct_implementation uniq, + final 1, + group_by_use_nulls 1, + join_algorithm 'prefer_partial_merge', + join_use_nulls 1, + log_queries_min_type QUERY_FINISH, + max_block_size 32768, + max_execution_time 45, + max_result_rows 1024, + metrics_perf_events_list 'this,that', + network_compression_method ZSTD, + poll_interval 5, + totals_mode after_having_auto $$; ``` diff --git a/src/connection.c b/src/connection.c index 05bb0c7..fa92eff 100644 --- a/src/connection.c +++ b/src/connection.c @@ -202,7 +202,7 @@ ch_connection_details * connstring_parse(const char *connstring) { ListCell *lc; - List *options = chfdw_parse_options(connstring, false); + List *options = chfdw_parse_options(connstring, false, true); ch_connection_details *details = palloc0(sizeof(ch_connection_details)); if (options == NIL) diff --git a/src/include/engine.h b/src/include/engine.h index 2d312fb..3685376 100644 --- a/src/include/engine.h +++ b/src/include/engine.h @@ -24,6 +24,6 @@ typedef struct const List *settings; } ch_query; -#define new_query(sql) {sql, chfdw_parse_options(ch_session_settings, true)} +#define new_query(sql) {sql, chfdw_parse_options(ch_session_settings, true, false)} #endif /* CLICKHOUSE_ENGINE_H */ diff --git a/src/include/fdw.h b/src/include/fdw.h index 00d2ed5..378bef9 100644 --- a/src/include/fdw.h +++ b/src/include/fdw.h @@ -203,7 +203,7 @@ extern char *ch_session_settings; extern void chfdw_extract_options(List * defelems, char **driver, char **host, int *port, char **dbname, char **username, char **password); -extern List * chfdw_parse_options(const char *options, bool with_comma); +extern List * chfdw_parse_options(const char *options, bool with_comma, bool with_equal); /* in deparse.c */ extern void chfdw_classify_conditions(PlannerInfo * root, diff --git a/src/option.c b/src/option.c index ecdfac9..8b99229 100644 --- a/src/option.c +++ b/src/option.c @@ -298,23 +298,34 @@ chfdw_extract_options(List * defelems, char **driver, char **host, int *port, /* * Parse options as key/value pairs. Used for connection parameters and * ClickHouse settings. Based on the Postgres conninfo_parse() function. The - * format is: + * formats: + * + * with_comma = false, with_equal = false: + * + * key = value key 'value'... + * + * with_comma = false, with_equal = true: * * key = value key = 'value'... * - * Each key/value pair must be comma-delimited if with_comma is true. + * with_comma = true, with_equal = false: + * + * key value, key 'value',... + * + * with_comma = true, with_equal = true: * * key = value, key = 'value',... * - * Each key is an unquoted string followed by `=` with optional spaces - * followed by the value. Values may contain backslash-escaped spaces, - * backslashes, and commans when `with_comma` is true. Use SQL single-quoted - * literals to remove the need to escape commas and spaces. + * Parameter names may not contain spaces or '=' (when with_equal is true), + * and support no escapes. + * + * Values may contain backslash-escaped spaces, backslashes, and commas. Use + * SQL single-quoted literals to remove the need to escape commas and spaces. * * Returns a PostgreSQL List containing DefElem cells. */ List * -chfdw_parse_options(const char *options_string, bool with_comma) +chfdw_parse_options(const char *options_string, bool with_comma, bool with_equal) { char *pname; char *pval; @@ -340,7 +351,7 @@ chfdw_parse_options(const char *options_string, bool with_comma) pname = cp; while (*cp) { - if (*cp == '=') + if (with_equal && *cp == '=') break; if (isspace((unsigned char) *cp)) { @@ -356,19 +367,18 @@ chfdw_parse_options(const char *options_string, bool with_comma) cp++; } - /* Check that there is a following '=' */ - if (*cp != '=') - ereport(ERROR, - errcode(ERRCODE_SYNTAX_ERROR), - errmsg("pg_clickhouse: missing \"=\" after \"%s\" in options string", pname)); - *cp++ = '\0'; - - /* Skip blanks after the '=' */ - while (*cp) + if (with_equal) { - if (!isspace((unsigned char) *cp)) - break; - cp++; + /* Check that there is a following '=' */ + if (*cp != '=') + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("pg_clickhouse: missing \"=\" after \"%s\" in options string", pname)); + *cp++ = '\0'; + + /* Skip blanks after the '=' */ + while (isspace((unsigned char) *cp)) + cp++; } /* Get the parameter value */ @@ -412,6 +422,10 @@ chfdw_parse_options(const char *options_string, bool with_comma) *cp2++ = *cp++; } *cp2 = '\0'; + if (cp2 == pval) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("pg_clickhouse: missing value for parameter \"%s\" in options string", pname))); } else { @@ -478,7 +492,7 @@ check_settings_guc(char **newval, void **extra, GucSource source) /* * Make sure we can parse the settings. */ - chfdw_parse_options(*newval, true); + chfdw_parse_options(*newval, true, false); /* * All good if no error. @@ -505,7 +519,7 @@ _PG_init(void) "Sets the default ClickHouse session settings.", NULL, &ch_session_settings, - "join_use_nulls = 1, group_by_use_nulls = 1, final = 1", + "join_use_nulls 1, group_by_use_nulls 1, final 1", PGC_USERSET, 0, check_settings_guc, diff --git a/src/pglink.c b/src/pglink.c index 6c881d7..3ed2bef 100644 --- a/src/pglink.c +++ b/src/pglink.c @@ -400,6 +400,7 @@ extend_insert_query(ch_http_insert_state * state, TupleTableSlot * slot) case VARCHAROID: case TEXTOID: case JSONOID: + case JSONBOID: case NAMEOID: case BITOID: case BYTEAOID: @@ -785,7 +786,7 @@ binary_insert_tuple(void *istate, TupleTableSlot * slot) ('UUID', 'uuid', ''), ('IPv4', 'inet', ''), ('IPv6', 'inet', ''), - ('JSON', 'json', 'HTTP engine only') + ('JSON', 'jsonb', 'HTTP engine only') ) AS v("ClickHouse", "PostgreSQL", "Notes") ORDER BY "ClickHouse"; @@ -810,7 +811,7 @@ static char *str_types_map[][2] = { {"UUID", "UUID"}, {"IPv4", "inet"}, {"IPv6", "inet"}, - {"JSON", "JSON"}, + {"JSON", "JSONB"}, {NULL, NULL}, }; diff --git a/test/expected/gucs.out b/test/expected/gucs.out index 3dc6d7e..5b8a363 100644 --- a/test/expected/gucs.out +++ b/test/expected/gucs.out @@ -1,17 +1,12 @@ \unset ECHO - ch_noop_bigint ----------------- - -(1 row) - NOTICE: OK `` -NOTICE: OK `join_use_nulls=1` -NOTICE: OK `join_use_nulls=1, xyz=true` -NOTICE: OK ` additional_result_filter = 'x != 2' ` -NOTICE: OK ` additional_result_filter = 'x != 2' ,join_use_nulls = 1 ` -NOTICE: OK ` xxx = DEFAULT, yyy = foo\,bar, zzz = 'He said, \'Hello\'', aaa = hi\ there ` -NOTICE: ERR 42601 - pg_clickhouse: missing "=" after "join_use_nulls" in options string -NOTICE: ERR 42601 - pg_clickhouse: missing "=" after "join_use_nulls" in options string +NOTICE: OK `join_use_nulls 1` +NOTICE: OK `join_use_nulls 1, xyz true` +NOTICE: OK ` additional_result_filter 'x != 2' ` +NOTICE: OK ` additional_result_filter 'x != 2' ,join_use_nulls 1 ` +NOTICE: OK ` xxx DEFAULT, yyy foo\,bar, zzz 'He said, \'Hello\'', aaa hi\ there ` +NOTICE: ERR 42601 - pg_clickhouse: missing value for parameter "join_use_nulls" in options string +NOTICE: ERR 42601 - pg_clickhouse: missing comma after "join_use_nulls" value in options string NOTICE: ERR 42601 - pg_clickhouse: unterminated quoted string in options string NOTICE: ERR 42601 - pg_clickhouse: missing comma after "join_use_nulls" value in options string name | value @@ -28,22 +23,22 @@ NOTICE: ERR 42601 - pg_clickhouse: missing comma after "join_use_nulls" value i join_use_nulls | 1 (3 rows) - pg_clickhouse.session_settings ----------------------------------------------- - + - connect_timeout = 2, + - count_distinct_implementation = uniq, + - join_algorithm = 'prefer_partial_merge',+ - join_use_nulls = 0, + - join_use_nulls = 1, + - log_queries_min_type = QUERY_FINISH, + - max_block_size = 32768, + - max_execution_time = 45, + - max_result_rows = 1024, + - metrics_perf_events_list = 'this,that', + - network_compression_method = ZSTD, + - poll_interval = 5, + - totals_mode = after_having_auto + + pg_clickhouse.session_settings +-------------------------------------------- + + + connect_timeout 2, + + count_distinct_implementation uniq, + + join_algorithm 'prefer_partial_merge',+ + join_use_nulls 0, + + join_use_nulls 1, + + log_queries_min_type QUERY_FINISH, + + max_block_size 32768, + + max_execution_time 45, + + max_result_rows 1024, + + metrics_perf_events_list 'this,that', + + network_compression_method ZSTD, + + poll_interval 5, + + totals_mode after_having_auto + (1 row) diff --git a/test/expected/json.out b/test/expected/json.out index 884c9da..ce62982 100644 --- a/test/expected/json.out +++ b/test/expected/json.out @@ -34,7 +34,7 @@ IMPORT FOREIGN SCHEMA "json_test" FROM SERVER binary_json_loopback INTO json_bin Column | Type | Collation | Nullable | Default | FDW options --------+---------+-----------+----------+---------+------------- id | integer | | not null | | - data | json | | not null | | + data | jsonb | | not null | | Server: binary_json_loopback FDW options: (database 'json_test', table_name 'things', engine 'MergeTree') @@ -44,7 +44,7 @@ IMPORT FOREIGN SCHEMA "json_test" FROM SERVER http_json_loopback INTO json_http; Column | Type | Collation | Nullable | Default | FDW options --------+---------+-----------+----------+---------+------------- id | integer | | not null | | - data | json | | not null | | + data | jsonb | | not null | | Server: http_json_loopback FDW options: (database 'json_test', table_name 'things', engine 'MergeTree') @@ -64,12 +64,12 @@ SELECT * FROM json_bin.things ORDER BY id; ERROR: pg_clickhouse: unsupported column type: JSON DETAIL: Remote Query: SELECT id, data FROM json_test.things ORDER BY id ASC NULLS LAST SELECT * FROM json_http.things ORDER BY id; - id | data -----+---------------------------------------------------------- - 1 | {"id":1,"name":"widget","size":"large","stocked":true} - 2 | {"id":2,"name":"sprocket","size":"small","stocked":true} - 3 | {"id":3,"name":"gizmo","size":"medium","stocked":true} - 4 | {"id":4,"name":"doodad","size":"large","stocked":false} + id | data +----+----------------------------------------------------------------- + 1 | {"id": 1, "name": "widget", "size": "large", "stocked": true} + 2 | {"id": 2, "name": "sprocket", "size": "small", "stocked": true} + 3 | {"id": 3, "name": "gizmo", "size": "medium", "stocked": true} + 4 | {"id": 4, "name": "doodad", "size": "large", "stocked": false} (4 rows) SELECT clickhouse_raw_query('DROP DATABASE json_test'); diff --git a/test/expected/json_1.out b/test/expected/json_1.out index 6e8738b..4087764 100644 --- a/test/expected/json_1.out +++ b/test/expected/json_1.out @@ -34,7 +34,7 @@ IMPORT FOREIGN SCHEMA "json_test" FROM SERVER binary_json_loopback INTO json_bin Column | Type | Collation | Nullable | Default | FDW options --------+---------+-----------+----------+---------+------------- id | integer | | not null | | - data | json | | not null | | + data | jsonb | | not null | | Server: binary_json_loopback FDW options: (database 'json_test', table_name 'things', engine 'MergeTree') @@ -44,7 +44,7 @@ IMPORT FOREIGN SCHEMA "json_test" FROM SERVER http_json_loopback INTO json_http; Column | Type | Collation | Nullable | Default | FDW options --------+---------+-----------+----------+---------+------------- id | integer | | not null | | - data | json | | not null | | + data | jsonb | | not null | | Server: http_json_loopback FDW options: (database 'json_test', table_name 'things', engine 'MergeTree') @@ -64,12 +64,12 @@ SELECT * FROM json_bin.things ORDER BY id; ERROR: pg_clickhouse: unsupported column type: JSON DETAIL: Remote Query: SELECT id, data FROM json_test.things ORDER BY id ASC NULLS LAST SELECT * FROM json_http.things ORDER BY id; - id | data -----+------------------------------------------------------------ - 1 | {"id":"1","name":"widget","size":"large","stocked":true} - 2 | {"id":"2","name":"sprocket","size":"small","stocked":true} - 3 | {"id":"3","name":"gizmo","size":"medium","stocked":true} - 4 | {"id":"4","name":"doodad","size":"large","stocked":false} + id | data +----+------------------------------------------------------------------- + 1 | {"id": "1", "name": "widget", "size": "large", "stocked": true} + 2 | {"id": "2", "name": "sprocket", "size": "small", "stocked": true} + 3 | {"id": "3", "name": "gizmo", "size": "medium", "stocked": true} + 4 | {"id": "4", "name": "doodad", "size": "large", "stocked": false} (4 rows) SELECT clickhouse_raw_query('DROP DATABASE json_test'); diff --git a/test/sql/gucs.sql b/test/sql/gucs.sql index 6b929ee..2258af9 100644 --- a/test/sql/gucs.sql +++ b/test/sql/gucs.sql @@ -1,8 +1,8 @@ \unset ECHO SET client_min_messages = notice; --- Load pg_clickhouse by calling one of its functions. -SELECT ch_noop_bigint(''); +-- Load pg_clickhouse; +LOAD 'pg_clickhouse'; -- Test parsing. DO $do$ @@ -12,17 +12,17 @@ BEGIN FOREACH cfg IN ARRAY ARRAY[ -- Success. '', - 'join_use_nulls=1', - 'join_use_nulls=1, xyz=true', - $$ additional_result_filter = 'x != 2' $$, - $$ additional_result_filter = 'x != 2' ,join_use_nulls = 1 $$, - $$ xxx = DEFAULT, yyy = foo\,bar, zzz = 'He said, \'Hello\'', aaa = hi\ there $$, + 'join_use_nulls 1', + 'join_use_nulls 1, xyz true', + $$ additional_result_filter 'x != 2' $$, + $$ additional_result_filter 'x != 2' ,join_use_nulls 1 $$, + $$ xxx DEFAULT, yyy foo\,bar, zzz 'He said, \'Hello\'', aaa hi\ there $$, -- Failure. 'join_use_nulls', - 'join_use_nulls xyz', - $$ additional_result_filter = 'x != 2 $$, - 'join_use_nulls = xyz no_preceding_comma = 2' + 'join_use_nulls = xyz', + $$ additional_result_filter 'x != 2 $$, + 'join_use_nulls xyz no_preceding_comma = 2' ] LOOP BEGIN RAISE NOTICE 'OK `%`', set_config('pg_clickhouse.session_settings', cfg, true); @@ -93,19 +93,19 @@ SELECT name, value -- Customize all of the above settings. SET pg_clickhouse.session_settings TO $$ - connect_timeout = 2, - count_distinct_implementation = uniq, - join_algorithm = 'prefer_partial_merge', - join_use_nulls = 0, - join_use_nulls = 1, - log_queries_min_type = QUERY_FINISH, - max_block_size = 32768, - max_execution_time = 45, - max_result_rows = 1024, - metrics_perf_events_list = 'this,that', - network_compression_method = ZSTD, - poll_interval = 5, - totals_mode = after_having_auto + connect_timeout 2, + count_distinct_implementation uniq, + join_algorithm 'prefer_partial_merge', + join_use_nulls 0, + join_use_nulls 1, + log_queries_min_type QUERY_FINISH, + max_block_size 32768, + max_execution_time 45, + max_result_rows 1024, + metrics_perf_events_list 'this,that', + network_compression_method ZSTD, + poll_interval 5, + totals_mode after_having_auto $$; SHOW pg_clickhouse.session_settings;