From 9e64b4f061008151bc338d6983ac88eb65fe27ff Mon Sep 17 00:00:00 2001 From: awarde96 Date: Tue, 23 Sep 2025 09:30:01 +0000 Subject: [PATCH 1/3] Add initial compression options to config and api --- covjsonkit/api.py | 26 +++++++++++++++++++++++++- covjsonkit/config.py | 3 +++ covjsonkit/encoder/encoder.py | 1 - 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/covjsonkit/api.py b/covjsonkit/api.py index d0de8e3..17c45ee 100644 --- a/covjsonkit/api.py +++ b/covjsonkit/api.py @@ -68,13 +68,17 @@ def __init__(self, config=None): self.conf = CovjsonKitConfig.model_validate(config) logging.debug("Config loaded from dictionary: %s", self.conf) # noqa: E501 + self.compression = self.conf.compression + def encode(self, type, domaintype): if domaintype == "timeseries": domaintype = "PointSeries" elif domaintype == "trajectory": domaintype = "path" feature = self._feature_factory(domaintype.lower(), "encoder") - return feature(self.conf, domaintype) + coveragejson = feature(self.conf, domaintype) + coveragejson = self._compress(coveragejson) + return coveragejson def decode(self, covjson): requesttype = covjson["domainType"] @@ -93,3 +97,23 @@ def _feature_factory(self, feature_type, encoder_decoder): elif encoder_decoder == "decoder": features = features_decoder return features[feature_type] + + def _compress(self, data): + if self.compression == "zstd": + import zstandard as zstd + + cctx = zstd.ZstdCompressor(level=3) + compressed = cctx.compress(data) + return compressed + elif self.compression == "LZ4": + import lz4.frame + + compressed = lz4.frame.compress(data) + return compressed + # elif self.compression == "binpack": + # import binpacking + + # compressed = binpacking.pack(data) + # return compressed + else: + return data diff --git a/covjsonkit/config.py b/covjsonkit/config.py index 9fe2abd..0009509 100644 --- a/covjsonkit/config.py +++ b/covjsonkit/config.py @@ -1,5 +1,8 @@ +from typing import Literal + from conflator import ConfigModel class CovjsonKitConfig(ConfigModel): param_db: str = "ecmwf" + compression: Literal["zstd", "lz4", "binpack", None] = None diff --git a/covjsonkit/encoder/encoder.py b/covjsonkit/encoder/encoder.py index 26549f5..9b13855 100644 --- a/covjsonkit/encoder/encoder.py +++ b/covjsonkit/encoder/encoder.py @@ -178,7 +178,6 @@ def calculate_index_bounds(level_len, num_len, para_len, step_len, l, i, j, k): return start_index, end_index def append_composite_coords(dates, tree_values, lat, coords): - # for date in dates: for value in tree_values: coords[dates]["composite"].append([lat, value]) From 09f33ea423466400bbb83b95f50ac27418d707e5 Mon Sep 17 00:00:00 2001 From: awarde96 Date: Tue, 23 Sep 2025 12:40:38 +0000 Subject: [PATCH 2/3] Move compression from main class to utils file as stand alone function --- covjsonkit/api.py | 21 ------------- covjsonkit/utils.py | 77 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 21 deletions(-) diff --git a/covjsonkit/api.py b/covjsonkit/api.py index 17c45ee..30ef5c0 100644 --- a/covjsonkit/api.py +++ b/covjsonkit/api.py @@ -77,7 +77,6 @@ def encode(self, type, domaintype): domaintype = "path" feature = self._feature_factory(domaintype.lower(), "encoder") coveragejson = feature(self.conf, domaintype) - coveragejson = self._compress(coveragejson) return coveragejson def decode(self, covjson): @@ -97,23 +96,3 @@ def _feature_factory(self, feature_type, encoder_decoder): elif encoder_decoder == "decoder": features = features_decoder return features[feature_type] - - def _compress(self, data): - if self.compression == "zstd": - import zstandard as zstd - - cctx = zstd.ZstdCompressor(level=3) - compressed = cctx.compress(data) - return compressed - elif self.compression == "LZ4": - import lz4.frame - - compressed = lz4.frame.compress(data) - return compressed - # elif self.compression == "binpack": - # import binpacking - - # compressed = binpacking.pack(data) - # return compressed - else: - return data diff --git a/covjsonkit/utils.py b/covjsonkit/utils.py index 76d0da1..fe5c2c3 100644 --- a/covjsonkit/utils.py +++ b/covjsonkit/utils.py @@ -1,3 +1,7 @@ +import os +from pathlib import Path + + def merge_coverage_collections(collection1, collection2): """ Merges two coverage collections into one. @@ -50,3 +54,76 @@ def merge_coverage_collections(collection1, collection2): merged_collection["coverages"].append(coverage) return merged_collection + + +def compress(in_path, out_path=None, compression=None, level=3): + """Compress a file using the specified compression algorithm. + Args: + in_path (str): Path to the input file. + out_path (str, optional): Path to the output file. If None, the output + file will be created in the same directory as the input file with an + appropriate suffix. Defaults to None. + compression (str, optional): Type of compression to use. Options are 'zstd', 'LZ4'. + Defaults to None. + level (int, optional): Compression level. Defaults to 3. + Returns: + str: Path to the compressed file. + """ + if compression is None: + print("No compression specified, please specify type of compression (zstd, LZ4, binpack)") + else: + if compression == "zstd": + import zstandard as zstd + + in_path = Path(in_path) + in_file_size = os.path.getsize(in_path) + + if out_path is None: + out_path = in_path + output_file = in_path.with_suffix(out_path.suffix + ".zst") + + cctx = zstd.ZstdCompressor(level=level) + + with open(in_path, "rb") as f_in, open(output_file, "wb") as f_out: + f_out.write(cctx.compress(f_in.read())) + + output_file_size = os.path.getsize(output_file) + + print(f"Compressed {in_path} → {output_file}") + print(f"Input file size: {in_file_size} bytes") + print(f"Output file size: {output_file_size} bytes") + print(f"Compression ratio: {in_file_size / output_file_size:.2f}") + + return output_file + + elif compression == "LZ4": + import lz4.frame + + input_file = Path(in_path) + in_file_size = os.path.getsize(input_file) + + if out_path is None: + output_file = input_file.with_suffix(input_file.suffix + ".lz4") + else: + output_file = Path(out_path) + + with open(input_file, "rb") as f_in: + data = f_in.read() + + compressed = lz4.frame.compress(data, compression_level=level) + + with open(output_file, "wb") as f_out: + f_out.write(compressed) + + output_file_size = os.path.getsize(output_file) + + print(f"Compressed {input_file} → {output_file}") + print(f"Input file size: {in_file_size} bytes") + print(f"Output file size: {output_file_size} bytes") + print(f"Compression ratio: {in_file_size / output_file_size:.2f}") + + return output_file + # import binpacking + + # compressed = binpacking.pack(data) + # return compressed From c03aefe9c52a2eb05952796e2a111e67540cb0af Mon Sep 17 00:00:00 2001 From: awarde96 Date: Tue, 23 Sep 2025 12:47:20 +0000 Subject: [PATCH 3/3] Add compression libraries to requirements --- covjsonkit/api.py | 2 -- requirements.txt | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/covjsonkit/api.py b/covjsonkit/api.py index 30ef5c0..114bd35 100644 --- a/covjsonkit/api.py +++ b/covjsonkit/api.py @@ -68,8 +68,6 @@ def __init__(self, config=None): self.conf = CovjsonKitConfig.model_validate(config) logging.debug("Config loaded from dictionary: %s", self.conf) # noqa: E501 - self.compression = self.conf.compression - def encode(self, type, domaintype): if domaintype == "timeseries": domaintype = "PointSeries" diff --git a/requirements.txt b/requirements.txt index 2d2f076..65603bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,5 @@ conflator rasterio scipy pre-commit +zstandard +lz4