diff --git a/src/freq.rs b/src/freq.rs new file mode 100644 index 0000000..269dae4 --- /dev/null +++ b/src/freq.rs @@ -0,0 +1,77 @@ +//! Sort bytes by frequency, then encode like so: +//! - Most popular: `0` +//! - Second most popular: `10` +//! - Third most popular: `110` +//! - Least popular: `(1 * 255) 0` +//! +//! For example, `aaabbc` would become `0001010110` + +use std::{array, fmt}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Bits { + pub one_count: u8, +} + +impl fmt::Display for Bits { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for _ in 0..self.one_count { + write!(f, "1")?; + } + write!(f, "0") + } +} + +#[derive(Debug, Clone)] +pub struct Encoder<'src> { + src: &'src [u8], + byte_mapping: [u8; 256], +} + +impl<'src> Encoder<'src> { + pub fn new(src: &'src [u8]) -> Self { + let mut byte_counts = [0u32; 256]; + + for &byte in src { + byte_counts[byte as usize] += 1; + } + + #[allow( + clippy::cast_possible_truncation, + reason = "`i` is in the range `0..256`" + )] + let mut ranking: [u8; 256] = array::from_fn(|i| i as u8); + ranking.sort_by_key(|&byte| -i64::from(byte_counts[byte as usize])); + + #[allow( + clippy::cast_possible_truncation, + reason = "`i` is in the range `0..256`" + )] + let byte_mapping = + array::from_fn(|i| ranking.iter().position(|b| *b == (i as u8)).unwrap() as u8); + + Self { src, byte_mapping } + } + + pub fn print_mapping(&self) { + for (one_count, byte) in self.byte_mapping.iter().zip(0u8..) { + println!( + "0x{byte:02x}: {:3} bit{s} ({:?})", + one_count + 1, + (byte as char), + s = if *one_count == 0 { "s" } else { "" }, + ); + } + } +} + +impl Iterator for Encoder<'_> { + type Item = Bits; + + fn next(&mut self) -> Option { + let one_count = self.byte_mapping[*self.src.first()? as usize]; + self.src = &self.src[1..]; + + Some(Bits { one_count }) + } +} diff --git a/src/main.rs b/src/main.rs index 669c29b..509ccd5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,15 +1,34 @@ -use std::io::{self, Read}; +use std::{ + env, + io::{self, Read}, +}; +mod freq; mod rle; fn main() -> Result<(), io::Error> { + let debug = env::args().any(|arg| arg == "--debug" || arg == "-d"); + let mut buf = Vec::new(); let len_src = io::stdin().read_to_end(&mut buf)?; - println!("Original size: {len_src}"); + println!("Original: {len_src}"); let len_rle = rle::Encoder::new(&buf).count() * 2; - println!("Rle'd size: {len_rle}"); + println!(" Rle'd: {len_rle}"); + + let encoder_freq = freq::Encoder::new(&buf); + if debug { + encoder_freq.print_mapping(); + } + + let len_freq_table = 256; + let len_freq_bits = encoder_freq + .map(|freq::Bits { one_count }| one_count as usize + 1) + .sum::() + .div_ceil(8); + let len_freq = len_freq_table + len_freq_bits; + println!(" Freq'd: {len_freq}"); Ok(()) }