This commit is contained in:
mehbark 2025-10-02 19:28:59 -04:00
parent 0100f92504
commit 05f3711bcf
Signed by: mbk
GPG key ID: E333EC1335FFCCDB
2 changed files with 99 additions and 3 deletions

77
src/freq.rs Normal file
View file

@ -0,0 +1,77 @@
//! Sort bytes by frequency, then encode like so:
//! - Most popular: `0`
//! - Second most popular: `10`
//! - Third most popular: `110`
//! - Least popular: `(1 * 255) 0`
//!
//! For example, `aaabbc` would become `0001010110`
use std::{array, fmt};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Bits {
pub one_count: u8,
}
impl fmt::Display for Bits {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for _ in 0..self.one_count {
write!(f, "1")?;
}
write!(f, "0")
}
}
#[derive(Debug, Clone)]
pub struct Encoder<'src> {
src: &'src [u8],
byte_mapping: [u8; 256],
}
impl<'src> Encoder<'src> {
pub fn new(src: &'src [u8]) -> Self {
let mut byte_counts = [0u32; 256];
for &byte in src {
byte_counts[byte as usize] += 1;
}
#[allow(
clippy::cast_possible_truncation,
reason = "`i` is in the range `0..256`"
)]
let mut ranking: [u8; 256] = array::from_fn(|i| i as u8);
ranking.sort_by_key(|&byte| -i64::from(byte_counts[byte as usize]));
#[allow(
clippy::cast_possible_truncation,
reason = "`i` is in the range `0..256`"
)]
let byte_mapping =
array::from_fn(|i| ranking.iter().position(|b| *b == (i as u8)).unwrap() as u8);
Self { src, byte_mapping }
}
pub fn print_mapping(&self) {
for (one_count, byte) in self.byte_mapping.iter().zip(0u8..) {
println!(
"0x{byte:02x}: {:3} bit{s} ({:?})",
one_count + 1,
(byte as char),
s = if *one_count == 0 { "s" } else { "" },
);
}
}
}
impl Iterator for Encoder<'_> {
type Item = Bits;
fn next(&mut self) -> Option<Self::Item> {
let one_count = self.byte_mapping[*self.src.first()? as usize];
self.src = &self.src[1..];
Some(Bits { one_count })
}
}

View file

@ -1,15 +1,34 @@
use std::io::{self, Read}; use std::{
env,
io::{self, Read},
};
mod freq;
mod rle; mod rle;
fn main() -> Result<(), io::Error> { fn main() -> Result<(), io::Error> {
let debug = env::args().any(|arg| arg == "--debug" || arg == "-d");
let mut buf = Vec::new(); let mut buf = Vec::new();
let len_src = io::stdin().read_to_end(&mut buf)?; let len_src = io::stdin().read_to_end(&mut buf)?;
println!("Original size: {len_src}"); println!("Original: {len_src}");
let len_rle = rle::Encoder::new(&buf).count() * 2; let len_rle = rle::Encoder::new(&buf).count() * 2;
println!("Rle'd size: {len_rle}"); println!(" Rle'd: {len_rle}");
let encoder_freq = freq::Encoder::new(&buf);
if debug {
encoder_freq.print_mapping();
}
let len_freq_table = 256;
let len_freq_bits = encoder_freq
.map(|freq::Bits { one_count }| one_count as usize + 1)
.sum::<usize>()
.div_ceil(8);
let len_freq = len_freq_table + len_freq_bits;
println!(" Freq'd: {len_freq}");
Ok(()) Ok(())
} }