diff --git a/requirements.txt b/requirements.txt index 903af00..d4db6ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ setuptools>=68 wheel +numpy>=1.24 # arrays # Optional dependencies pandas>=1.5 # dataframes pyarrow>=12.0.0 # arrow diff --git a/setup.py b/setup.py index 5a3cf2f..9f43c65 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ packages=find_packages(where="src"), package_dir={"": "src"}, include_package_data=True, - install_requires=[], + install_requires=['numpy>=1.24'], extras_require={ "dataframes": ["pandas"], "arrow": ["pyarrow"], diff --git a/tests/test_core.py b/tests/test_core.py index b884cb6..7c1c2b3 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,5 +1,8 @@ import sys import os +import struct +from unittest.mock import patch + sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) import pytest @@ -7,6 +10,7 @@ from src.keyedstablehash.canonical import canonicalize_to_bytes from src.keyedstablehash.siphash import siphash24 from src.keyedstablehash.stable import stable_keyed_hash +import src.keyedstablehash.canonical as canonical_module SIPHASH_VECTORS = { 0: "310e0edd47db6f72", @@ -55,20 +59,49 @@ def test_canonicalization_handles_sets_and_lists(): def test_rejects_unsupported_type(): - class Example: + class ExampleSlots: __slots__ = ("value",) def __init__(self, value: int): self.value = value - with pytest.raises(TypeError): - stable_keyed_hash(Example(1), key=b"\x00" * 16) + # Slots classes do not have __dict__ and are not automatically supported + with pytest.raises(TypeError) as excinfo: + stable_keyed_hash(ExampleSlots(1), key=b"\x00" * 16) + assert "Unsupported type" in str(excinfo.value) + + +def test_canonical_custom_object_with_dict(): + """Test that objects with __dict__ are canonicalized by class name and vars.""" + + class ExampleObj: + def __init__(self, value): + self.value = value + self.ignore = None # Just to have multiple fields + + obj = ExampleObj(42) + encoded = canonicalize_to_bytes(obj) + + # Check tag for Object + assert encoded.startswith(b"O") + # Check that class name is encoded + assert b"ExampleObj" in encoded + # Check that the internal value 42 is present (encoded as int) + assert canonicalize_to_bytes(42) in encoded def test_encode_length_and_int(): + # Zero + assert canonicalize_to_bytes(0) == b"I" + struct.pack(" 10 def test_feed_canonical_dict_order(): @@ -83,12 +116,67 @@ def test_feed_canonical_list_vs_tuple(): lst = [1, 2, 3] t = (1, 2, 3) assert canonicalize_to_bytes(lst) != canonicalize_to_bytes(t) + assert canonicalize_to_bytes(lst).startswith(b"L") + assert canonicalize_to_bytes(t).startswith(b"T") def test_feed_canonical_set_order(): s1 = {1, 2, 3} s2 = {3, 2, 1} assert canonicalize_to_bytes(s1) == canonicalize_to_bytes(s2) + assert canonicalize_to_bytes(s1).startswith(b"E") + + +def test_canonical_frozenset(): + fs = frozenset([3, 2, 1]) + s = {1, 2, 3} + # Frozenset and Set should produce the same content encoding if logic allows, + # or at least be supported. In canonical.py both use _handle_set. + assert canonicalize_to_bytes(fs) == canonicalize_to_bytes(s) + + +def test_canonical_primitives_full_coverage(): + # None + assert canonicalize_to_bytes(None) == b"N" + + # Bool + assert canonicalize_to_bytes(True) == b"B\x01" + assert canonicalize_to_bytes(False) == b"B\x00" + + # Float + f_val = 1.234 + encoded_f = canonicalize_to_bytes(f_val) + assert encoded_f.startswith(b"F") + assert struct.pack("