Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 30 additions & 7 deletions Doc/library/lzma.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@
Compressing and decompressing data in memory
--------------------------------------------

.. class:: LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)
.. class:: LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None, *, mt_options=None)

Create a compressor object, which can be used to compress data incrementally.

Expand Down Expand Up @@ -196,13 +196,23 @@
Higher presets produce smaller output, but make the compression process
slower.

Additionally when *format* is specified as :const:`FORMAT_XZ`, adding the

Check warning on line 199 in Doc/library/lzma.rst

View workflow job for this annotation

GitHub Actions / Docs / Docs

py:const reference target not found: FORMAT_XZ [ref.const]
*mt_options* dictionary argument instructs the module to use the
multithreaded compressor implementation. These options provided in
*mt_options* currently have a meaning, anything else is silently ignored:

* *threads*: the desired number of threads the underlying library should use

* *block_size*: Maximum uncompressed size of a block.

.. note::

In addition to being more CPU-intensive, compression with higher presets
also requires much more memory (and produces output that needs more memory
to decompress). With preset ``9`` for example, the overhead for an
:class:`LZMACompressor` object can be as high as 800 MiB. For this reason,
it is generally best to stick with the default preset.
:class:`LZMACompressor` object can be as high as 800 MiB per worker
thread. For this reason, it is generally best to stick with the default
preset.

The *filters* argument (if provided) should be a filter chain specifier.
See :ref:`filter-chain-specs` for details.
Expand Down Expand Up @@ -246,6 +256,19 @@
:const:`FORMAT_RAW`, but should not be used for other formats.
See :ref:`filter-chain-specs` for more information about filter chains.

Additionally when *format* is specified as :const:`FORMAT_XZ`, adding the

Check warning on line 259 in Doc/library/lzma.rst

View workflow job for this annotation

GitHub Actions / Docs / Docs

py:const reference target not found: FORMAT_XZ [ref.const]
*mt_options* dictionary argument instructs the module to use the
multithreaded decompressor implementation which decompresses blocks in
parallel. These options provided in *mt_options* currently have a meaning,
anything else is silently ignored:

* *threads*: the desired number of threads the underlying library should use

* *memlimit_threading*: A soft memory limit. Lets the underlying library
scale (down) the actual number of worker threads to stay within the budget.
At least one worker will always be used even if over this limit. Use
*memlimit* argument if there is a hard memory limit to enforce.

.. note::
This class does not transparently handle inputs containing multiple
compressed streams, unlike :func:`decompress` and :class:`LZMAFile`. To
Expand Down Expand Up @@ -302,16 +325,16 @@

.. versionadded:: 3.5

.. function:: compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None)
.. function:: compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None, *, mt_options=None)

Compress *data* (a :class:`bytes` object), returning the compressed data as a
:class:`bytes` object.

See :class:`LZMACompressor` above for a description of the *format*, *check*,
*preset* and *filters* arguments.
*preset*, *filters* and *mt_options* arguments.


.. function:: decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None)
.. function:: decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None, *, mt_options=None)

Decompress *data* (a :class:`bytes` object), returning the uncompressed data
as a :class:`bytes` object.
Expand All @@ -320,7 +343,7 @@
decompress all of these streams, and return the concatenation of the results.

See :class:`LZMADecompressor` above for a description of the *format*,
*memlimit* and *filters* arguments.
*preset*, *filters* and *mt_options* arguments.


Miscellaneous
Expand Down
27 changes: 18 additions & 9 deletions Lib/lzma.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ class LZMAFile(_streams.BaseStream):
"""

def __init__(self, filename=None, mode="r", *,
format=None, check=-1, preset=None, filters=None):
format=None, check=-1, preset=None, filters=None,
mt_options=None):
"""Open an LZMA-compressed file in binary mode.

filename can be either an actual file name (given as a str,
Expand Down Expand Up @@ -102,14 +103,18 @@ def __init__(self, filename=None, mode="r", *,
raise ValueError("Cannot specify a preset compression "
"level when opening a file for reading")
if format is None:
format = FORMAT_AUTO
if mt_options is None:
format = FORMAT_AUTO
else:
format = FORMAT_XZ
mode_code = _MODE_READ
elif mode in ("w", "wb", "a", "ab", "x", "xb"):
if format is None:
format = FORMAT_XZ
mode_code = _MODE_WRITE
self._compressor = LZMACompressor(format=format, check=check,
preset=preset, filters=filters)
preset=preset, filters=filters,
mt_options=mt_options)
self._pos = 0
else:
raise ValueError("Invalid mode: {!r}".format(mode))
Expand Down Expand Up @@ -324,29 +329,33 @@ def open(filename, mode="rb", *,
return binary_file


def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):
def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None, *,
mt_options=None):
"""Compress a block of data.

Refer to LZMACompressor's docstring for a description of the
optional arguments *format*, *check*, *preset* and *filters*.
optional arguments *format*, *check*, *preset*, *filters* and *mt_options*.

For incremental compression, use an LZMACompressor instead.
"""
comp = LZMACompressor(format, check, preset, filters)
comp = LZMACompressor(format, check, preset, filters,
mt_options=mt_options)
return comp.compress(data) + comp.flush()


def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None):
def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None, *,
mt_options=None):
"""Decompress a block of data.

Refer to LZMADecompressor's docstring for a description of the
optional arguments *format*, *check* and *filters*.
optional arguments *format*, *check*, *preset*, *filters* and *mt_options*.

For incremental decompression, use an LZMADecompressor instead.
"""
results = []
while True:
decomp = LZMADecompressor(format, memlimit, filters)
decomp = LZMADecompressor(format, memlimit, filters,
mt_options=mt_options)
try:
res = decomp.decompress(data)
except LZMAError:
Expand Down
45 changes: 45 additions & 0 deletions Lib/test/test_lzma.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,35 @@ def test_bad_filter_spec(self):
with self.assertRaises(ValueError):
LZMACompressor(filters=[{"id": lzma.FILTER_X86, "foo": 0}])

def test_bad_mt_options(self):
with self.assertRaises(TypeError):
LZMACompressor(format=lzma.FORMAT_XZ, mt_options=3)
with self.assertRaises(TypeError):
LZMACompressor(format=lzma.FORMAT_XZ, mt_options={"threads": 3.45})
with self.assertRaises(TypeError):
LZMACompressor(format=lzma.FORMAT_XZ, mt_options={"flags": "asdf"})
# Can only specify MT encoder with XZ
with self.assertRaises(ValueError):
LZMACompressor(format=lzma.FORMAT_AUTO, mt_options=MT_OPTIONS_1)
with self.assertRaises(ValueError):
LZMACompressor(format=lzma.FORMAT_RAW, mt_options=MT_OPTIONS_1)
with self.assertRaises(ValueError):
LZMACompressor(format=lzma.FORMAT_ALONE, mt_options=MT_OPTIONS_1)

with self.assertRaises(TypeError):
LZMADecompressor(format=lzma.FORMAT_XZ, mt_options=3)
with self.assertRaises(TypeError):
LZMADecompressor(format=lzma.FORMAT_XZ,
mt_options={"threads": 3.45})
with self.assertRaises(TypeError):
LZMADecompressor(format=lzma.FORMAT_XZ,
mt_options={"flags": "asdf"})
# Can only specify MT encoder with XZ
with self.assertRaises(ValueError):
LZMADecompressor(format=lzma.FORMAT_RAW, mt_options=MT_OPTIONS_1)
with self.assertRaises(ValueError):
LZMADecompressor(format=lzma.FORMAT_ALONE, mt_options=MT_OPTIONS_1)

def test_decompressor_after_eof(self):
lzd = LZMADecompressor()
lzd.decompress(COMPRESSED_XZ)
Expand All @@ -85,6 +114,10 @@ def test_decompressor_memlimit(self):
lzd = LZMADecompressor(lzma.FORMAT_XZ, memlimit=1024)
self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_XZ)

lzd = LZMADecompressor(lzma.FORMAT_XZ, memlimit=1024,
mt_options=MT_OPTIONS_1)
self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_XZ)

lzd = LZMADecompressor(lzma.FORMAT_ALONE, memlimit=1024)
self.assertRaises(LZMAError, lzd.decompress, COMPRESSED_ALONE)

Expand All @@ -109,6 +142,10 @@ def test_decompressor_xz(self):
lzd = LZMADecompressor(lzma.FORMAT_XZ)
self._test_decompressor(lzd, COMPRESSED_XZ, lzma.CHECK_CRC64)

def test_decompressor_xz_mt(self):
lzd = LZMADecompressor(lzma.FORMAT_XZ, mt_options=MT_OPTIONS_1)
self._test_decompressor(lzd, COMPRESSED_XZ, lzma.CHECK_CRC64)

def test_decompressor_alone(self):
lzd = LZMADecompressor(lzma.FORMAT_ALONE)
self._test_decompressor(lzd, COMPRESSED_ALONE, lzma.CHECK_NONE)
Expand Down Expand Up @@ -281,6 +318,12 @@ def test_roundtrip_xz(self):
lzd = LZMADecompressor()
self._test_decompressor(lzd, cdata, lzma.CHECK_CRC64)

def test_roundtrip_xz_mt(self):
lzc = LZMACompressor(format=lzma.FORMAT_XZ, mt_options=MT_OPTIONS_1)
cdata = lzc.compress(INPUT) + lzc.flush()
lzd = LZMADecompressor()
self._test_decompressor(lzd, cdata, lzma.CHECK_CRC64)

def test_roundtrip_alone(self):
lzc = LZMACompressor(lzma.FORMAT_ALONE)
cdata = lzc.compress(INPUT) + lzc.flush()
Expand Down Expand Up @@ -2092,6 +2135,8 @@ def test_filter_properties_roundtrip(self):
b'\xeb#\x182\x96I\xf7l\xf3r\x00'
)

MT_OPTIONS_1 = {"threads": 4}


if __name__ == "__main__":
unittest.main()
1 change: 1 addition & 0 deletions Misc/ACKS
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,7 @@ Toshio Kuratomi
Ilia Kurenkov
Vladimir Kushnir
Erno Kuusela
Ondřej Kuzník
Kabir Kwatra
Ross Lagerwall
Cameron Laird
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Support the MT (MultiThreaded) encoder and decoder in :mod:`lzma` module.
Loading
Loading