text-compression/src/compression_scheme.rs

57 lines
2.4 KiB
Rust

use bitvec::{slice::BitSlice, vec::BitVec};
/// The `CompressionScheme` trait describes a way to [`encode`](Self::encode)
/// and [`decode`](Self::decode) a series of bytes into a series of bits
/// and a [`Header`][Self::Header].
pub trait CompressionScheme {
/// The metadata that is needed to [`decode`](Self::decode) the encoded bits.
type Header;
/// Encode some bytes into `buf`, returning a [`Header`][Self::Header].
///
/// This does not necessarily have to be deterministic,
/// but it **must** always be decodable by [`decode`](Self::decode).
/// That is, [`decode`](Self::decode) ∘ [`encode`](Self::encode) = `id`.
fn encode(src: &[u8], buf: &mut BitVec) -> Self::Header;
/// Decode the given bits and header into `buf`.
///
/// This may panic on arbitrary input,
/// but it **must** always decode outputs of [`encode`](Self::encode).
/// That is, [`decode`](Self::decode) ∘ [`encode`](Self::encode) = `id`.
fn decode(src: &BitSlice, header: &Self::Header, buf: &mut Vec<u8>);
/// How many bytes a [`Header`](Self::Header) would be if it were encoded.
///
/// This does not have a default implementation of [`std::mem::size_of`] because
/// that would be confusing for smart pointers.
/// For example, a [`Vec`] always consists of a pointer to data on the heap and some
/// other stuff. No matter how many items it contains, it will only be a few dozen bytes.
///
/// Instead, if your header is, say, a [`Vec<u16>`], it's size would be `2 * header.len()`
/// because that's how many bytes it would take to store.
///
/// You might wonder, "in an actual encoding with a dynamically-sized header, how would you know
/// where the header ends and the data starts?"
/// Well, uh.
/// Good question.
/// Don't worry about it I guess.
fn header_size(header: &Self::Header) -> usize;
/// Returns `true` if [`decode`](Self::decode)([`encode`](Self::encode)(`src`)) = `src`.
///
/// Useful for writing [`quickcheck`](https://crates.io/crates/quickcheck) tests.
///
/// This function is not magic; it only tests once.
#[must_use]
fn idempotent_on(src: &[u8]) -> bool {
let mut buf_bits = BitVec::new();
let header = Self::encode(src, &mut buf_bits);
let mut buf_bytes = Vec::new();
Self::decode(&buf_bits, &header, &mut buf_bytes);
src == buf_bytes
}
}