diff --git a/pallas-codec/Cargo.toml b/pallas-codec/Cargo.toml index 64b327e09..2add46501 100644 --- a/pallas-codec/Cargo.toml +++ b/pallas-codec/Cargo.toml @@ -8,20 +8,18 @@ homepage = "https://github.com/txpipe/pallas" documentation = "https://docs.rs/pallas-codec" license = "Apache-2.0" readme = "README.md" -authors = [ - "Santiago Carmuega ", - "Lucas Rosa ", - "Kasey White ", -] +authors = ["Santiago Carmuega ", "Lucas Rosa ", "Kasey White "] [features] default = [] [dependencies] +arrayvec = "0.7.6" hex = "0.4.3" minicbor = { version = "0.26.0", features = ["std", "half", "derive"] } num-bigint = { version = "0.4.4", optional = true } serde = { version = "1.0.143", features = ["derive"] } +slotmap = "1.0.7" thiserror = "1.0.39" [dev-dependencies] diff --git a/pallas-codec/src/cborheap.rs b/pallas-codec/src/cborheap.rs new file mode 100644 index 000000000..34ea3dd7c --- /dev/null +++ b/pallas-codec/src/cborheap.rs @@ -0,0 +1,192 @@ +use std::ops::Deref; + +use arrayvec::ArrayVec; +use minicbor::{Decode, Encode}; +use slotmap::DefaultKey; + +pub trait CborHeapContext { + fn key(&self) -> Option; +} + +impl CborHeapContext for () { + fn key(&self) -> Option { + None + } +} + +impl CborHeapContext for DefaultKey { + fn key(&self) -> Option { + Some(*self) + } +} + +pub struct CborHeap { + heap: slotmap::SlotMap>, +} + +impl CborHeap { + pub fn new(capacity: usize) -> Self { + Self { + heap: slotmap::SlotMap::with_capacity(capacity), + } + } + + fn get_slice(&self, ref_: &CborRef) -> Option<&[u8]> { + self.heap + .get(ref_.0) + .map(|entry| entry.as_slice()) + .and_then(|slice| slice.get(ref_.1.clone())) + } + + pub fn find_cbor(&self, value: &KeepCbor) -> Option<&[u8]> { + let ref_ = value.cbor_ref.as_ref()?; + self.get_slice(ref_) + } + + pub fn decode<'b, T>( + &'b mut self, + data: &[u8], + ) -> Result<(T, DefaultKey), minicbor::decode::Error> + where + T: Decode<'b, DefaultKey>, + { + let data = ArrayVec::try_from(data).unwrap(); + let mut key = self.heap.insert(data); + + let mut decoder = minicbor::Decoder::new(self.heap[key].as_slice()); + + let value = decoder.decode_with(&mut key)?; + Ok((value, key)) + } + + pub fn forget(&mut self, key: DefaultKey) { + self.heap.remove(key); + } +} + +pub type CborBlockHeap = CborHeap<32>; + +pub type CborTxHeap = CborHeap<1024>; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CborRef(DefaultKey, std::ops::Range); + +#[derive(Debug, Clone, PartialEq, Eq)] + +pub struct KeepCbor { + inner: T, + cbor_ref: Option, +} + +impl KeepCbor { + pub fn original_cbor<'b>(&self, heap: &'b CborHeap<1024>) -> Option<&'b [u8]> { + let ref_ = self.cbor_ref.as_ref()?; + heap.get_slice(ref_) + } +} + +impl From for KeepCbor { + fn from(inner: T) -> Self { + Self { + inner, + cbor_ref: None, + } + } +} + +impl Deref for KeepCbor { + type Target = T::Target; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl<'b, Ctx, T> Decode<'b, Ctx> for KeepCbor +where + Ctx: CborHeapContext, + T: Decode<'b, Ctx>, +{ + fn decode( + d: &mut minicbor::Decoder<'b>, + ctx: &mut Ctx, + ) -> Result { + let start_pos = d.position(); + let value = T::decode(d, ctx)?; + let end_pos = d.position(); + + Ok(KeepCbor { + inner: value, + cbor_ref: ctx.key().map(|key| CborRef(key, start_pos..end_pos)), + }) + } +} + +impl Encode for KeepCbor +where + Ctx: CborHeapContext, + T: Encode, +{ + fn encode( + &self, + e: &mut minicbor::Encoder, + ctx: &mut Ctx, + ) -> Result<(), minicbor::encode::Error> { + self.inner.encode(e, ctx) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Decode, Encode, Debug, PartialEq, Eq, Clone)] + #[cbor(map, context_bound = "CborHeapContext")] + pub struct ExampleStruct { + #[n(0)] + pub simple_field: Option, + #[n(1)] + pub hashable_value: KeepCbor, + #[n(2)] + pub other_simple_field: Option, + } + + fn owning_function(_: ExampleStruct) { + // do anything + } + + #[test] + fn test_happy_path() { + // this is the store for cbor bytes. The item size multiplied by the capacity + // defines the pre-allocated memory. + let mut heap = CborHeap::<1024>::new(1); + + // lets say that we get some CBOR from the network that we want to decode and + // remember (simplified here using a hardcoded value) + let cbor = hex::decode("a3000101613202f5").unwrap(); + + // we ask the heap the decode the CBOR. This step will enter the CBOR into the + // heap and decorate the decoded structure with a lightweight pointer to the + // slice in the heap. + // + // The returned tuple has the decoded structure and a guard that is used to + // forget the CBOR from the heap once we are done with it. + let (plain_struct, cbor_guard) = heap.decode::(&cbor).unwrap(); + + // Let's say we need to access the cbor for one of the fields in the struct, we + // can ask the heap to retrieve that particular CBOR slice. This search is very + // efficient, is just one index lookup in the hep and a range lookup over the + // full bytes of the CBOR. + let cbor_fragment = heap.find_cbor(&plain_struct.hashable_value); + + assert_eq!(hex::encode(cbor_fragment.unwrap()), "6132"); + + // when we're done doing all of the hashing, we can forget the CBOR from the + // heap. + heap.forget(cbor_guard); + + // but the plain structure is still valid and doesn't have any lifetimes or + // dependencies. It can be moved as value to other functions or threads. + owning_function(plain_struct); + } +} diff --git a/pallas-codec/src/lib.rs b/pallas-codec/src/lib.rs index ad9f27e38..8a2a455b3 100644 --- a/pallas-codec/src/lib.rs +++ b/pallas-codec/src/lib.rs @@ -7,6 +7,9 @@ pub use minicbor; /// Round-trip friendly common helper structs pub mod utils; +/// Heap to track the original cbor bytes for decoded structs +pub mod cborheap; + pub trait Fragment: Sized + for<'b> minicbor::Decode<'b, ()> + minicbor::Encode<()> {} impl Fragment for T where T: for<'b> minicbor::Decode<'b, ()> + minicbor::Encode<()> + Sized {}