Skip to content
26 changes: 24 additions & 2 deletions python/pyfory/_fory.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ class Fory:
"_output_stream",
"field_nullable",
"policy",
"max_collection_size",
"max_binary_size",
)

def __init__(
Expand All @@ -172,6 +174,8 @@ def __init__(
policy: DeserializationPolicy = None,
field_nullable: bool = False,
meta_compressor=None,
max_collection_size: int = 1_000_000,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should also have max_binary_size as parameter too, and pass it to Buffer in fory instance

max_binary_size: int = 64 * 1024 * 1024,
):
"""
Initialize a Fory serialization instance.
Expand Down Expand Up @@ -210,6 +214,17 @@ def __init__(
field_nullable: Treat all dataclass fields as nullable regardless of
Optional annotation.

max_collection_size: Maximum allowed size for collections (lists, sets, tuples)
and maps (dicts) during deserialization. This limit is used to prevent
out-of-memory attacks from malicious payloads that claim extremely large
collection sizes, as collections preallocate memory based on the declared
size. Raises an exception if exceeded. Default is 1,000,000.

max_binary_size: Maximum allowed size in bytes for binary data reads during
deserialization (default: 64 MB). Raises an exception if a single binary
read exceeds this limit, preventing out-of-memory attacks from malicious
payloads that claim extremely large binary sizes.

Example:
>>> # Python-native mode with reference tracking
>>> fory = Fory(ref=True)
Expand All @@ -235,14 +250,16 @@ def __init__(
self.serialization_context = SerializationContext(fory=self, scoped_meta_share_enabled=compatible)
self.type_resolver.initialize()

self.buffer = Buffer.allocate(32)
self.max_binary_size = max_binary_size
self.buffer = Buffer.allocate(32, max_binary_size=max_binary_size)
self.buffer_callback = None
self._buffers = None
self._unsupported_callback = None
self._unsupported_objects = None
self.is_peer_out_of_band_enabled = False
self.max_depth = max_depth
self.depth = 0
self.max_collection_size = max_collection_size
self._output_stream = None

def register(
Expand Down Expand Up @@ -621,7 +638,7 @@ def _deserialize(
assert self.depth == 0, "Nested deserialization should use read_ref/read_no_ref."
self.depth += 1
if isinstance(buffer, bytes):
buffer = Buffer(buffer)
buffer = Buffer(buffer, max_binary_size=self.max_binary_size)
if unsupported_objects is not None:
self._unsupported_objects = iter(unsupported_objects)
reader_index = buffer.get_reader_index()
Expand Down Expand Up @@ -666,6 +683,7 @@ def _read_no_ref_internal(self, buffer, serializer):
"""Internal method to read without modifying read_ref_ids."""
if serializer is None:
serializer = self.type_resolver.read_type_info(buffer).serializer

self.inc_depth()
o = serializer.read(buffer)
self.dec_depth()
Expand Down Expand Up @@ -812,6 +830,10 @@ class ThreadSafeFory:
strict (bool): Whether to require type registration. Defaults to True.
compatible (bool): Whether to enable compatible mode. Defaults to False.
max_depth (int): Maximum depth for deserialization. Defaults to 50.
max_collection_size (int): Maximum allowed size for collections and maps during
deserialization. Defaults to 1,000,000.
max_binary_size (int): Maximum allowed size in bytes for binary data reads during
deserialization. Defaults to 64 MB.

Example:
>>> import pyfury
Expand Down
15 changes: 12 additions & 3 deletions python/pyfory/buffer.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,11 @@ cdef class Buffer:
object output_stream
Py_ssize_t shape[1]
Py_ssize_t stride[1]
int32_t max_binary_size

def __init__(self, data not None, int32_t offset=0, length=None):
def __init__(self, data not None, int32_t offset=0, length=None, int32_t max_binary_size= 64 * 1024 * 1024):
self.data = data
self.max_binary_size = max_binary_size
cdef int32_t buffer_len = len(data)
cdef int length_
if length is None:
Expand All @@ -146,7 +148,7 @@ cdef class Buffer:
self.output_stream = None

@classmethod
def from_stream(cls, stream not None, uint32_t buffer_size=4096):
def from_stream(cls, stream not None, uint32_t buffer_size=4096, int32_t max_binary_size=64 * 1024 * 1024):
cdef CBuffer* stream_buffer
cdef c_string stream_error
if Fory_PyCreateBufferFromStream(
Expand All @@ -156,6 +158,7 @@ cdef class Buffer:
if stream_buffer == NULL:
raise ValueError("failed to create stream buffer")
cdef Buffer buffer = Buffer.__new__(Buffer)
buffer.max_binary_size = max_binary_size
buffer.c_buffer = move(deref(stream_buffer))
del stream_buffer
buffer.data = stream
Expand All @@ -167,6 +170,7 @@ cdef class Buffer:
@staticmethod
cdef Buffer wrap(shared_ptr[CBuffer] c_buffer):
cdef Buffer buffer = Buffer.__new__(Buffer)
buffer.max_binary_size = 64 * 1024 * 1024
cdef CBuffer* ptr = c_buffer.get()
buffer.c_buffer = CBuffer(ptr.data(), ptr.size(), False)
cdef _SharedBufferOwner owner = _SharedBufferOwner.__new__(_SharedBufferOwner)
Expand All @@ -178,11 +182,12 @@ cdef class Buffer:
return buffer

@classmethod
def allocate(cls, int32_t size):
def allocate(cls, int32_t size, int32_t max_binary_size=64 * 1024 * 1024):
cdef CBuffer* buf = allocate_buffer(size)
if buf == NULL:
raise MemoryError("out of memory")
cdef Buffer buffer = Buffer.__new__(Buffer)
buffer.max_binary_size = max_binary_size
buffer.c_buffer = move(deref(buf))
del buf
buffer.data = None
Expand Down Expand Up @@ -407,6 +412,10 @@ cdef class Buffer:
cpdef inline bytes read_bytes(self, int32_t length):
if length == 0:
return b""

if length > self.max_binary_size:
raise ValueError(f"Binary size {length} exceeds the configured limit of {self.max_binary_size}")

cdef bytes py_bytes = PyBytes_FromStringAndSize(NULL, length)
if py_bytes is None:
raise MemoryError("out of memory")
Expand Down
12 changes: 12 additions & 0 deletions python/pyfory/collection.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,9 @@ cdef class ListSerializer(CollectionSerializer):
cdef MapRefResolver ref_resolver = self.fory.ref_resolver
cdef TypeResolver type_resolver = self.fory.type_resolver
cdef int32_t len_ = buffer.read_var_uint32()
# Check size limit before PyList_New preallocation to prevent OOM attacks
if len_ > self.fory.max_collection_size:
raise ValueError(f"List size {len_} exceeds the configured limit of {self.fory.max_collection_size}")
cdef list list_ = PyList_New(len_)
if len_ == 0:
return list_
Expand Down Expand Up @@ -493,6 +496,9 @@ cdef class TupleSerializer(CollectionSerializer):
cdef MapRefResolver ref_resolver = self.fory.ref_resolver
cdef TypeResolver type_resolver = self.fory.type_resolver
cdef int32_t len_ = buffer.read_var_uint32()
# Check size limit before PyTuple_New preallocation to prevent OOM attacks
if len_ > self.fory.max_collection_size:
raise ValueError(f"Tuple size {len_} exceeds the configured limit of {self.fory.max_collection_size}")
cdef tuple tuple_ = PyTuple_New(len_)
if len_ == 0:
return tuple_
Expand Down Expand Up @@ -575,6 +581,9 @@ cdef class SetSerializer(CollectionSerializer):
cdef set instance = set()
ref_resolver.reference(instance)
cdef int32_t len_ = buffer.read_var_uint32()
# Check size limit to prevent OOM attacks from malicious payloads
if len_ > self.fory.max_collection_size:
raise ValueError(f"Set size {len_} exceeds the configured limit of {self.fory.max_collection_size}")
if len_ == 0:
return instance
cdef int8_t collect_flag = buffer.read_int8()
Expand Down Expand Up @@ -897,6 +906,9 @@ cdef class MapSerializer(Serializer):
cdef MapRefResolver ref_resolver = self.ref_resolver
cdef TypeResolver type_resolver = self.type_resolver
cdef int32_t size = buffer.read_var_uint32()
# Check size limit before _PyDict_NewPresized preallocation to prevent OOM attacks
if size > self.fory.max_collection_size:
raise ValueError(f"Map size {size} exceeds the configured limit of {self.fory.max_collection_size}")
cdef dict map_ = _PyDict_NewPresized(size)
ref_resolver.reference(map_)
cdef int32_t ref_id
Expand Down
6 changes: 6 additions & 0 deletions python/pyfory/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ def _write_different_types(self, buffer, value, collect_flag=0):

def read(self, buffer):
len_ = buffer.read_var_uint32()
# Check size limit before collection preallocation to prevent OOM attacks
if len_ > self.fory.max_collection_size:
raise ValueError(f"Collection size {len_} exceeds the configured limit of {self.fory.max_collection_size}")
collection_ = self.new_instance(self.type_)
if len_ == 0:
return collection_
Expand Down Expand Up @@ -481,6 +484,9 @@ def read(self, buffer):
ref_resolver = self.ref_resolver
type_resolver = self.type_resolver
size = buffer.read_var_uint32()
# Check size limit to prevent OOM attacks from malicious payloads
if size > fory.max_collection_size:
raise ValueError(f"Map size {size} exceeds the configured limit of {fory.max_collection_size}")
map_ = {}
ref_resolver.reference(map_)
chunk_header = 0
Expand Down
21 changes: 19 additions & 2 deletions python/pyfory/serialization.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,8 @@ cdef class Fory:
cdef public bint is_peer_out_of_band_enabled
cdef int32_t max_depth
cdef int32_t depth
cdef public int32_t max_collection_size
cdef public int32_t max_binary_size
cdef object _output_stream

def __init__(
Expand All @@ -1090,6 +1092,8 @@ cdef class Fory:
max_depth: int = 50,
field_nullable: bool = False,
meta_compressor=None,
max_collection_size: int = 1_000_000,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should also have max_binary_size as parameter too, and pass it to Buffer in fory instance

max_binary_size: int = 64 * 1024 * 1024,
):
"""
Initialize a Fory serialization instance.
Expand Down Expand Up @@ -1128,6 +1132,17 @@ cdef class Fory:
field_nullable: Treat all dataclass fields as nullable regardless of
Optional annotation.

max_collection_size: Maximum allowed size for collections (lists, sets, tuples)
and maps (dicts) during deserialization. This limit is used to prevent
out-of-memory attacks from malicious payloads that claim extremely large
collection sizes, as collections preallocate memory based on the declared
size. Raises an exception if exceeded. Default is 1,000,000.

max_binary_size: Maximum allowed size in bytes for binary data reads during
deserialization (default: 64 MB). Raises an exception if a single binary
read exceeds this limit, preventing out-of-memory attacks from malicious
payloads that claim extremely large binary sizes.

Example:
>>> # Python-native mode with reference tracking
>>> fory = Fory(ref=True)
Expand All @@ -1149,14 +1164,16 @@ cdef class Fory:
self.type_resolver = TypeResolver(self, meta_share=compatible, meta_compressor=meta_compressor)
self.serialization_context = SerializationContext(fory=self, scoped_meta_share_enabled=compatible)
self.type_resolver.initialize()
self.buffer = Buffer.allocate(32)
self.max_binary_size = max_binary_size
self.buffer = Buffer.allocate(32, max_binary_size=max_binary_size)
self.buffer_callback = None
self._buffers = None
self._unsupported_callback = None
self._unsupported_objects = None
self.is_peer_out_of_band_enabled = False
self.depth = 0
self.max_depth = max_depth
self.max_collection_size = max_collection_size
self._output_stream = None

def register_serializer(self, cls: Union[type, TypeVar], Serializer serializer):
Expand Down Expand Up @@ -1508,7 +1525,7 @@ cdef class Fory:
"""
try:
if type(buffer) == bytes:
buffer = Buffer(buffer)
buffer = Buffer(buffer, max_binary_size=self.max_binary_size)
return self._deserialize(buffer, buffers, unsupported_objects)
finally:
self.reset_read()
Expand Down
Loading
Loading