From 8caa7d894ff5fd519a922f7e9f5350cb6b9be999 Mon Sep 17 00:00:00 2001 From: mehbark Date: Fri, 3 Oct 2025 01:04:11 -0400 Subject: [PATCH] document `CompressionScheme` --- src/compression_scheme.rs | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/compression_scheme.rs b/src/compression_scheme.rs index 3d89d86..c953381 100644 --- a/src/compression_scheme.rs +++ b/src/compression_scheme.rs @@ -1,13 +1,49 @@ use bitvec::{slice::BitSlice, vec::BitVec}; +/// The `CompressionScheme` trait describes a way to [`encode`](Self::encode) +/// and [`decode`](Self::decode) a series of bytes into a series of bits +/// and a [`Header`][Self::Header]. pub trait CompressionScheme { + /// The metadata that is needed to [`decode`](Self::decode) the encoded bits. type Header; + /// Encode some bytes into `buf`, returning a [`Header`][Self::Header]. + /// + /// This does not necessarily have to be deterministic, + /// but it **must** always be decodable by [`decode`](Self::decode). + /// That is, [`decode`](Self::decode) ∘ [`encode`](Self::encode) = `id`. fn encode(src: &[u8], buf: &mut BitVec) -> Self::Header; + + /// Decode the given bits and header into `buf`. + /// + /// This may panic on arbitrary input, + /// but it **must** always decode outputs of [`encode`](Self::encode). + /// That is, [`decode`](Self::decode) ∘ [`encode`](Self::encode) = `id`. fn decode(src: &BitSlice, header: &Self::Header, buf: &mut Vec); + /// How many bytes a [`Header`](Self::Header) would be if it were encoded. + /// + /// This does not have a default implementation of [`std::mem::size_of`] because + /// that would be confusing for smart pointers. + /// For example, a [`Vec`] always consists of a pointer to data on the heap and some + /// other stuff. No matter how many items it contains, it will only be a few dozen bytes. + /// + /// Instead, if your header is, say, a [`Vec`], it's size would be `2 * header.len()` + /// because that's how many bytes it would take to store. + /// + /// You might wonder, "in an actual encoding with a dynamically-sized header, how would you know + /// where the header ends and the data starts?" + /// Well, uh. + /// Good question. + /// Don't worry about it I guess. fn header_size(header: &Self::Header) -> usize; + /// Returns `true` if [`decode`](Self::decode)([`encode`](Self::encode)(`src`)) = `src`. + /// + /// Useful for writing [`quickcheck`](https://crates.io/crates/quickcheck) tests. + /// + /// This function is not magic; it only tests once. + #[must_use] fn idempotent_on(src: &[u8]) -> bool { let mut buf_bits = BitVec::new(); let header = Self::encode(src, &mut buf_bits);