Skip to content

Commit 4fa4148

Browse files
committed
Add encoding_errors= parameter
1 parent 45b5f6c commit 4fa4148

File tree

8 files changed

+62
-11
lines changed

8 files changed

+62
-11
lines changed

accel.c

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,7 @@ typedef struct {
488488
PyObject *_next_seq_id;
489489
PyObject *rows;
490490
} py_str;
491+
char *encoding_errors;
491492
} StateObject;
492493

493494
static void read_options(MySQLAccelOptions *options, PyObject *dict);
@@ -502,6 +503,7 @@ static void State_clear_fields(StateObject *self) {
502503
DESTROY(self->type_codes);
503504
DESTROY(self->encodings);
504505
DESTROY(self->structsequence_desc.fields);
506+
DESTROY(self->encoding_errors);
505507
if (self->py_converters) {
506508
for (unsigned long i = 0; i < self->n_cols; i++) {
507509
Py_CLEAR(self->py_converters[i]);
@@ -574,6 +576,17 @@ static int State_init(StateObject *self, PyObject *args, PyObject *kwds) {
574576
if (py_unbuffered && PyObject_IsTrue(py_unbuffered)) {
575577
self->unbuffered = 1;
576578
}
579+
PyObject *py_encoding_errors = PyDict_GetItemString(py_options, "encoding_errors");
580+
if (py_encoding_errors) {
581+
self->encoding_errors = PyUnicode_AsUTF8(py_encoding_errors);
582+
if (!self->encoding_errors) goto error;
583+
}
584+
}
585+
586+
if (!self->encoding_errors) {
587+
self->encoding_errors = calloc(7, 1);
588+
if (!self->encoding_errors) goto error;
589+
memcpy(self->encoding_errors, "strict", 6);
577590
}
578591

579592
if (self->unbuffered) {
@@ -1371,7 +1384,7 @@ static PyObject *read_row_from_packet(
13711384
py_str = PyBytes_FromStringAndSize(out, out_l);
13721385
if (!py_str) goto error;
13731386
} else {
1374-
py_str = PyUnicode_Decode(out, out_l, py_state->encodings[i], "strict");
1387+
py_str = PyUnicode_Decode(out, out_l, py_state->encodings[i], py_state->encoding_errors);
13751388
if (!py_str) goto error;
13761389
}
13771390
py_item = PyObject_CallFunctionObjArgs(py_state->py_converters[i], py_str, NULL);
@@ -1384,7 +1397,7 @@ static PyObject *read_row_from_packet(
13841397
switch (py_state->type_codes[i]) {
13851398
case MYSQL_TYPE_NEWDECIMAL:
13861399
case MYSQL_TYPE_DECIMAL:
1387-
py_str = PyUnicode_Decode(out, out_l, py_state->encodings[i], "strict");
1400+
py_str = PyUnicode_Decode(out, out_l, py_state->encodings[i], py_state->encoding_errors);
13881401
if (!py_str) goto error;
13891402

13901403
py_item = PyObject_CallFunctionObjArgs(PyFunc.decimal_Decimal, py_str, NULL);
@@ -1431,7 +1444,7 @@ static PyObject *read_row_from_packet(
14311444
py_item = py_state->py_invalid_values[i];
14321445
Py_INCREF(py_item);
14331446
} else {
1434-
py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict");
1447+
py_item = PyUnicode_Decode(orig_out, orig_out_l, "ascii", py_state->encoding_errors);
14351448
if (!py_item) goto error;
14361449
}
14371450
break;
@@ -1451,7 +1464,7 @@ static PyObject *read_row_from_packet(
14511464
year, month, day, hour, minute, second, microsecond);
14521465
if (!py_item) {
14531466
PyErr_Clear();
1454-
py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict");
1467+
py_item = PyUnicode_Decode(orig_out, orig_out_l, "ascii", py_state->encoding_errors);
14551468
}
14561469
if (!py_item) goto error;
14571470
break;
@@ -1468,7 +1481,7 @@ static PyObject *read_row_from_packet(
14681481
py_item = py_state->py_invalid_values[i];
14691482
Py_INCREF(py_item);
14701483
} else {
1471-
py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict");
1484+
py_item = PyUnicode_Decode(orig_out, orig_out_l, "ascii", py_state->encoding_errors);
14721485
if (!py_item) goto error;
14731486
}
14741487
break;
@@ -1483,7 +1496,7 @@ static PyObject *read_row_from_packet(
14831496
year, month, day);
14841497
if (!py_item) {
14851498
PyErr_Clear();
1486-
py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict");
1499+
py_item = PyUnicode_Decode(orig_out, orig_out_l, "ascii", py_state->encoding_errors);
14871500
}
14881501
if (!py_item) goto error;
14891502
break;
@@ -1495,7 +1508,7 @@ static PyObject *read_row_from_packet(
14951508
py_item = py_state->py_invalid_values[i];
14961509
Py_INCREF(py_item);
14971510
} else {
1498-
py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict");
1511+
py_item = PyUnicode_Decode(orig_out, orig_out_l, "ascii", py_state->encoding_errors);
14991512
if (!py_item) goto error;
15001513
}
15011514
break;
@@ -1533,7 +1546,7 @@ static PyObject *read_row_from_packet(
15331546
sign * microsecond);
15341547
if (!py_item) {
15351548
PyErr_Clear();
1536-
py_item = PyUnicode_Decode(orig_out, orig_out_l, "utf8", "strict");
1549+
py_item = PyUnicode_Decode(orig_out, orig_out_l, "ascii", py_state->encoding_errors);
15371550
}
15381551
if (!py_item) goto error;
15391552
break;
@@ -1568,7 +1581,7 @@ static PyObject *read_row_from_packet(
15681581
break;
15691582
}
15701583

1571-
py_item = PyUnicode_Decode(out, out_l, py_state->encodings[i], "strict");
1584+
py_item = PyUnicode_Decode(out, out_l, py_state->encodings[i], py_state->encoding_errors);
15721585
if (!py_item) goto error;
15731586

15741587
// Parse JSON string.

singlestoredb/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,12 @@
8484
environ='SINGLESTOREDB_CHARSET',
8585
)
8686

87+
register_option(
88+
'encoding_errors', 'string', check_str, 'strict',
89+
'Specifies the error handling behavior for decoding string values.',
90+
environ='SINGLESTOREDB_ENCODING_ERRORS',
91+
)
92+
8793
register_option(
8894
'local_infile', 'bool', check_bool, False,
8995
'Should it be possible to load local files?',

singlestoredb/connection.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,6 +1280,7 @@ def connect(
12801280
connect_timeout: Optional[int] = None,
12811281
nan_as_null: Optional[bool] = None,
12821282
inf_as_null: Optional[bool] = None,
1283+
encoding_errors: Optional[str] = None,
12831284
) -> Connection:
12841285
"""
12851286
Return a SingleStoreDB connection.
@@ -1352,6 +1353,8 @@ def connect(
13521353
inf_as_null: bool, optional
13531354
Should Inf values be treated as NULLs when used in parameter
13541355
substitutions including uploaded data?
1356+
encoding_errors: str, optional
1357+
The error handler name for value decoding errors
13551358
13561359
Examples
13571360
--------

singlestoredb/http/connection.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,5 +1032,6 @@ def connect(
10321032
connect_timeout: Optional[int] = None,
10331033
nan_as_null: Optional[bool] = None,
10341034
inf_as_null: Optional[bool] = None,
1035+
encoding_errors: Optional[str] = None,
10351036
) -> Connection:
10361037
return Connection(**dict(locals()))

singlestoredb/management/manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def _check(
7070
new_params = params.copy()
7171
if 'json' in new_params:
7272
for k, v in new_params['json'].items():
73-
if 'password' in k.lower():
73+
if 'password' in k.lower() and v:
7474
new_params['json'][k] = '*' * len(v)
7575
msg += ': {}'.format(str(new_params))
7676
raise ManagementError(errno=res.status_code, msg=msg)

singlestoredb/mysql/connection.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ def __init__( # noqa: C901
304304
multi_statements=None,
305305
nan_as_null=None,
306306
inf_as_null=None,
307+
encoding_errors='strict',
307308
):
308309
BaseConnection.__init__(**dict(locals()))
309310

@@ -416,6 +417,7 @@ def _config(key, arg):
416417
self.charset = charset or DEFAULT_CHARSET
417418
self.collation = collation
418419
self.use_unicode = use_unicode
420+
self.encoding_errors = encoding_errors
419421

420422
self.encoding = charset_by_name(self.charset).encoding
421423

@@ -1455,6 +1457,7 @@ def __init__(self, connection, unbuffered=False):
14551457
self.unbuffered_active = False
14561458
self.converters = []
14571459
self.fields = []
1460+
self.encoding_errors = self.connection.encoding_errors
14581461
if unbuffered:
14591462
try:
14601463
self.init_unbuffered_query()
@@ -1625,7 +1628,7 @@ def _read_row_from_packet(self, packet):
16251628
break
16261629
if data is not None:
16271630
if encoding is not None:
1628-
data = data.decode(encoding)
1631+
data = data.decode(encoding, errors=self.encoding_errors)
16291632
if DEBUG:
16301633
print('DEBUG: DATA = ', data)
16311634
if converter is not None:

singlestoredb/tests/test.sql

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,4 +365,20 @@ CREATE ROWSTORE TABLE IF NOT EXISTS `extended_types` (
365365
COLLATE='utf8_unicode_ci';
366366

367367

368+
--
369+
-- Invalid utf8 table
370+
--
371+
-- These sequences were breaking during fetch on a customer's machine
372+
-- however, they seem to work fine in our tests.
373+
--
374+
CREATE TABLE IF NOT EXISTS `badutf8` (
375+
`text` TEXT CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci
376+
)
377+
COLLATE='utf8_unicode_ci';
378+
379+
380+
INSERT INTO `badutf8` VALUES ('🥷🧙👻.eth');
381+
INSERT INTO `badutf8` VALUES ('🥒rick.eth');
382+
383+
368384
COMMIT;

singlestoredb/tests/test_basics.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,6 +1180,15 @@ def test_inf_as_null(self):
11801180
cur.execute('SELECT %s :> DOUBLE AS X', [1.234])
11811181
self.assertEqual(1.234, list(cur)[0][0])
11821182

1183+
def test_encoding_errors(self):
1184+
with s2.connect(
1185+
database=type(self).dbname,
1186+
encoding_errors='backslashreplace',
1187+
) as conn:
1188+
with conn.cursor() as cur:
1189+
cur.execute('SELECT * FROM badutf8')
1190+
list(cur)
1191+
11831192

11841193
if __name__ == '__main__':
11851194
import nose2

0 commit comments

Comments
 (0)