Skip to content

Commit d0061d5

Browse files
jsbuenoblurb-it[bot]vstinner
authored
gh-146440: Add array_hook parameter to JSON decoders (#146441)
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Victor Stinner <vstinner@python.org>
1 parent 256907d commit d0061d5

File tree

8 files changed

+114
-11
lines changed

8 files changed

+114
-11
lines changed

Doc/library/json.rst

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ Basic Usage
264264

265265
.. function:: load(fp, *, cls=None, object_hook=None, parse_float=None, \
266266
parse_int=None, parse_constant=None, \
267-
object_pairs_hook=None, **kw)
267+
object_pairs_hook=None, array_hook=None, **kw)
268268
269269
Deserialize *fp* to a Python object
270270
using the :ref:`JSON-to-Python conversion table <json-to-py-table>`.
@@ -301,6 +301,15 @@ Basic Usage
301301
Default ``None``.
302302
:type object_pairs_hook: :term:`callable` | None
303303

304+
:param array_hook:
305+
If set, a function that is called with the result of
306+
any JSON array literal decoded with as a Python list.
307+
The return value of this function will be used
308+
instead of the :class:`list`.
309+
This feature can be used to implement custom decoders.
310+
Default ``None``.
311+
:type array_hook: :term:`callable` | None
312+
304313
:param parse_float:
305314
If set, a function that is called with
306315
the string of every JSON float to be decoded.
@@ -349,7 +358,10 @@ Basic Usage
349358
conversion length limitation <int_max_str_digits>` to help avoid denial
350359
of service attacks.
351360

352-
.. function:: loads(s, *, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, **kw)
361+
.. versionchanged:: next
362+
Added the optional *array_hook* parameter.
363+
364+
.. function:: loads(s, *, cls=None, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, object_pairs_hook=None, array_hook=None, **kw)
353365

354366
Identical to :func:`load`, but instead of a file-like object,
355367
deserialize *s* (a :class:`str`, :class:`bytes` or :class:`bytearray`
@@ -367,7 +379,7 @@ Basic Usage
367379
Encoders and Decoders
368380
---------------------
369381

370-
.. class:: JSONDecoder(*, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True, object_pairs_hook=None)
382+
.. class:: JSONDecoder(*, object_hook=None, parse_float=None, parse_int=None, parse_constant=None, strict=True, object_pairs_hook=None, array_hook=None)
371383

372384
Simple JSON decoder.
373385

@@ -412,6 +424,14 @@ Encoders and Decoders
412424
.. versionchanged:: 3.1
413425
Added support for *object_pairs_hook*.
414426

427+
*array_hook* is an optional function that will be called with the
428+
result of every JSON array decoded as a list. The return value of
429+
*array_hook* will be used instead of the :class:`list`. This feature can be
430+
used to implement custom decoders.
431+
432+
.. versionchanged:: next
433+
Added support for *array_hook*.
434+
415435
*parse_float* is an optional function that will be called with the string of
416436
every JSON float to be decoded. By default, this is equivalent to
417437
``float(num_str)``. This can be used to use another datatype or parser for

Doc/whatsnew/3.15.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,17 @@ inspect
801801
for :func:`~inspect.getdoc`.
802802
(Contributed by Serhiy Storchaka in :gh:`132686`.)
803803

804+
json
805+
----
806+
807+
* Add the *array_hook* parameter to :func:`~json.load` and
808+
:func:`~json.loads` functions:
809+
allow a callback for JSON literal array types to customize Python lists in
810+
the resulting decoded object. Passing combined :class:`frozendict` to
811+
*object_pairs_hook* param and :class:`tuple` to ``array_hook`` will yield a
812+
deeply nested immutable Python structure representing the JSON data.
813+
(Contributed by Joao S. O. Bueno in :gh:`146440`)
814+
804815

805816
locale
806817
------

Lib/json/__init__.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
241241
**kw).encode(obj)
242242

243243

244-
_default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None)
244+
_default_decoder = JSONDecoder()
245245

246246

247247
def detect_encoding(b):
@@ -275,7 +275,8 @@ def detect_encoding(b):
275275

276276

277277
def load(fp, *, cls=None, object_hook=None, parse_float=None,
278-
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
278+
parse_int=None, parse_constant=None, object_pairs_hook=None,
279+
array_hook=None, **kw):
279280
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
280281
a JSON document) to a Python object.
281282
@@ -291,17 +292,26 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None,
291292
``object_hook`` is also defined, the ``object_pairs_hook`` takes
292293
priority.
293294
295+
``array_hook`` is an optional function that will be called with the result
296+
of any literal array decode (a ``list``). The return value of this function will
297+
be used instead of the ``list``. This feature can be used along
298+
``object_pairs_hook`` to customize the resulting data structure - for example,
299+
by setting that to ``frozendict`` and ``array_hook`` to ``tuple``, one can get
300+
a deep immutable data structute from any JSON data.
301+
294302
To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
295303
kwarg; otherwise ``JSONDecoder`` is used.
296304
"""
297305
return loads(fp.read(),
298306
cls=cls, object_hook=object_hook,
299307
parse_float=parse_float, parse_int=parse_int,
300-
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
308+
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
309+
array_hook=None, **kw)
301310

302311

303312
def loads(s, *, cls=None, object_hook=None, parse_float=None,
304-
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
313+
parse_int=None, parse_constant=None, object_pairs_hook=None,
314+
array_hook=None, **kw):
305315
"""Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance
306316
containing a JSON document) to a Python object.
307317
@@ -317,6 +327,13 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None,
317327
``object_hook`` is also defined, the ``object_pairs_hook`` takes
318328
priority.
319329
330+
``array_hook`` is an optional function that will be called with the result
331+
of any literal array decode (a ``list``). The return value of this function will
332+
be used instead of the ``list``. This feature can be used along
333+
``object_pairs_hook`` to customize the resulting data structure - for example,
334+
by setting that to ``frozendict`` and ``array_hook`` to ``tuple``, one can get
335+
a deep immutable data structute from any JSON data.
336+
320337
``parse_float``, if specified, will be called with the string
321338
of every JSON float to be decoded. By default this is equivalent to
322339
float(num_str). This can be used to use another datatype or parser
@@ -347,14 +364,17 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None,
347364

348365
if (cls is None and object_hook is None and
349366
parse_int is None and parse_float is None and
350-
parse_constant is None and object_pairs_hook is None and not kw):
367+
parse_constant is None and object_pairs_hook is None
368+
and array_hook is None and not kw):
351369
return _default_decoder.decode(s)
352370
if cls is None:
353371
cls = JSONDecoder
354372
if object_hook is not None:
355373
kw['object_hook'] = object_hook
356374
if object_pairs_hook is not None:
357375
kw['object_pairs_hook'] = object_pairs_hook
376+
if array_hook is not None:
377+
kw['array_hook'] = array_hook
358378
if parse_float is not None:
359379
kw['parse_float'] = parse_float
360380
if parse_int is not None:

Lib/json/decoder.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
218218
pairs = object_hook(pairs)
219219
return pairs, end
220220

221-
def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
221+
def JSONArray(s_and_end, scan_once, array_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
222222
s, end = s_and_end
223223
values = []
224224
nextchar = s[end:end + 1]
@@ -227,6 +227,8 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
227227
nextchar = s[end:end + 1]
228228
# Look-ahead for trivial empty array
229229
if nextchar == ']':
230+
if array_hook is not None:
231+
values = array_hook(values)
230232
return values, end + 1
231233
_append = values.append
232234
while True:
@@ -256,6 +258,8 @@ def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
256258
if nextchar == ']':
257259
raise JSONDecodeError("Illegal trailing comma before end of array", s, comma_idx)
258260

261+
if array_hook is not None:
262+
values = array_hook(values)
259263
return values, end
260264

261265

@@ -291,7 +295,7 @@ class JSONDecoder(object):
291295

292296
def __init__(self, *, object_hook=None, parse_float=None,
293297
parse_int=None, parse_constant=None, strict=True,
294-
object_pairs_hook=None):
298+
object_pairs_hook=None, array_hook=None):
295299
"""``object_hook``, if specified, will be called with the result
296300
of every JSON object decoded and its return value will be used in
297301
place of the given ``dict``. This can be used to provide custom
@@ -304,6 +308,14 @@ def __init__(self, *, object_hook=None, parse_float=None,
304308
If ``object_hook`` is also defined, the ``object_pairs_hook`` takes
305309
priority.
306310
311+
``array_hook`` is an optional function that will be called with the
312+
result of any literal array decode (a ``list``). The return value of
313+
this function will be used instead of the ``list``. This feature can
314+
be used along ``object_pairs_hook`` to customize the resulting data
315+
structure - for example, by setting that to ``frozendict`` and
316+
``array_hook`` to ``tuple``, one can get a deep immutable data
317+
structute from any JSON data.
318+
307319
``parse_float``, if specified, will be called with the string
308320
of every JSON float to be decoded. By default this is equivalent to
309321
float(num_str). This can be used to use another datatype or parser
@@ -330,6 +342,7 @@ def __init__(self, *, object_hook=None, parse_float=None,
330342
self.parse_constant = parse_constant or _CONSTANTS.__getitem__
331343
self.strict = strict
332344
self.object_pairs_hook = object_pairs_hook
345+
self.array_hook = array_hook
333346
self.parse_object = JSONObject
334347
self.parse_array = JSONArray
335348
self.parse_string = scanstring

Lib/json/scanner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def py_make_scanner(context):
2323
parse_constant = context.parse_constant
2424
object_hook = context.object_hook
2525
object_pairs_hook = context.object_pairs_hook
26+
array_hook = context.array_hook
2627
memo = context.memo
2728

2829
def _scan_once(string, idx):
@@ -37,7 +38,7 @@ def _scan_once(string, idx):
3738
return parse_object((string, idx + 1), strict,
3839
_scan_once, object_hook, object_pairs_hook, memo)
3940
elif nextchar == '[':
40-
return parse_array((string, idx + 1), _scan_once)
41+
return parse_array((string, idx + 1), _scan_once, array_hook)
4142
elif nextchar == 'n' and string[idx:idx + 4] == 'null':
4243
return None, idx + 4
4344
elif nextchar == 't' and string[idx:idx + 4] == 'true':

Lib/test/test_json/test_decode.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,24 @@ def test_object_pairs_hook(self):
6969
object_pairs_hook=OrderedDict),
7070
OrderedDict([('empty', OrderedDict())]))
7171

72+
def test_array_hook(self):
73+
s = '[1, 2, 3]'
74+
t = self.loads(s, array_hook=tuple)
75+
self.assertEqual(t, (1, 2, 3))
76+
self.assertEqual(type(t), tuple)
77+
78+
# Nested array in inner structure with object_hook
79+
s = '{"xkd": [[1], [2], [3]]}'
80+
p = frozendict(xkd=((1,), (2,), (3,)))
81+
data = self.loads(s, object_hook=frozendict, array_hook=tuple)
82+
self.assertEqual(data, p)
83+
self.assertEqual(type(data), frozendict)
84+
self.assertEqual(type(data["xkd"]), tuple)
85+
for item in data["xkd"]:
86+
self.assertEqual(type(item), tuple)
87+
88+
self.assertEqual(self.loads('[]', array_hook=tuple), ())
89+
7290
def test_decoder_optimizations(self):
7391
# Several optimizations were made that skip over calls to
7492
# the whitespace regex, so this test is designed to try and
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
:mod:`json`: Add the *array_hook* parameter to :func:`~json.load` and
2+
:func:`~json.loads` functions:
3+
allow a callback for JSON literal array types to customize Python lists in the
4+
resulting decoded object. Passing combined :class:`frozendict` to
5+
*object_pairs_hook* param and :class:`tuple` to ``array_hook`` will yield a
6+
deeply nested immutable Python structure representing the JSON data.

Modules/_json.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ typedef struct _PyScannerObject {
3030
signed char strict;
3131
PyObject *object_hook;
3232
PyObject *object_pairs_hook;
33+
PyObject *array_hook;
3334
PyObject *parse_float;
3435
PyObject *parse_int;
3536
PyObject *parse_constant;
@@ -41,6 +42,7 @@ static PyMemberDef scanner_members[] = {
4142
{"strict", Py_T_BOOL, offsetof(PyScannerObject, strict), Py_READONLY, "strict"},
4243
{"object_hook", _Py_T_OBJECT, offsetof(PyScannerObject, object_hook), Py_READONLY, "object_hook"},
4344
{"object_pairs_hook", _Py_T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), Py_READONLY},
45+
{"array_hook", _Py_T_OBJECT, offsetof(PyScannerObject, array_hook), Py_READONLY},
4446
{"parse_float", _Py_T_OBJECT, offsetof(PyScannerObject, parse_float), Py_READONLY, "parse_float"},
4547
{"parse_int", _Py_T_OBJECT, offsetof(PyScannerObject, parse_int), Py_READONLY, "parse_int"},
4648
{"parse_constant", _Py_T_OBJECT, offsetof(PyScannerObject, parse_constant), Py_READONLY, "parse_constant"},
@@ -720,6 +722,7 @@ scanner_traverse(PyObject *op, visitproc visit, void *arg)
720722
Py_VISIT(Py_TYPE(self));
721723
Py_VISIT(self->object_hook);
722724
Py_VISIT(self->object_pairs_hook);
725+
Py_VISIT(self->array_hook);
723726
Py_VISIT(self->parse_float);
724727
Py_VISIT(self->parse_int);
725728
Py_VISIT(self->parse_constant);
@@ -732,6 +735,7 @@ scanner_clear(PyObject *op)
732735
PyScannerObject *self = PyScannerObject_CAST(op);
733736
Py_CLEAR(self->object_hook);
734737
Py_CLEAR(self->object_pairs_hook);
738+
Py_CLEAR(self->array_hook);
735739
Py_CLEAR(self->parse_float);
736740
Py_CLEAR(self->parse_int);
737741
Py_CLEAR(self->parse_constant);
@@ -942,6 +946,12 @@ _parse_array_unicode(PyScannerObject *s, PyObject *memo, PyObject *pystr, Py_ssi
942946
goto bail;
943947
}
944948
*next_idx_ptr = idx + 1;
949+
/* if array_hook is not None: return array_hook(rval) */
950+
if (!Py_IsNone(s->array_hook)) {
951+
val = PyObject_CallOneArg(s->array_hook, rval);
952+
Py_DECREF(rval);
953+
return val;
954+
}
945955
return rval;
946956
bail:
947957
Py_XDECREF(val);
@@ -1259,6 +1269,10 @@ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
12591269
s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
12601270
if (s->object_pairs_hook == NULL)
12611271
goto bail;
1272+
s->array_hook = PyObject_GetAttrString(ctx, "array_hook");
1273+
if (s->array_hook == NULL) {
1274+
goto bail;
1275+
}
12621276
s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
12631277
if (s->parse_float == NULL)
12641278
goto bail;

0 commit comments

Comments
 (0)