freq
This commit is contained in:
parent
0100f92504
commit
05f3711bcf
2 changed files with 99 additions and 3 deletions
77
src/freq.rs
Normal file
77
src/freq.rs
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
//! Sort bytes by frequency, then encode like so:
|
||||
//! - Most popular: `0`
|
||||
//! - Second most popular: `10`
|
||||
//! - Third most popular: `110`
|
||||
//! - Least popular: `(1 * 255) 0`
|
||||
//!
|
||||
//! For example, `aaabbc` would become `0001010110`
|
||||
|
||||
use std::{array, fmt};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct Bits {
|
||||
pub one_count: u8,
|
||||
}
|
||||
|
||||
impl fmt::Display for Bits {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
for _ in 0..self.one_count {
|
||||
write!(f, "1")?;
|
||||
}
|
||||
write!(f, "0")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Encoder<'src> {
|
||||
src: &'src [u8],
|
||||
byte_mapping: [u8; 256],
|
||||
}
|
||||
|
||||
impl<'src> Encoder<'src> {
|
||||
pub fn new(src: &'src [u8]) -> Self {
|
||||
let mut byte_counts = [0u32; 256];
|
||||
|
||||
for &byte in src {
|
||||
byte_counts[byte as usize] += 1;
|
||||
}
|
||||
|
||||
#[allow(
|
||||
clippy::cast_possible_truncation,
|
||||
reason = "`i` is in the range `0..256`"
|
||||
)]
|
||||
let mut ranking: [u8; 256] = array::from_fn(|i| i as u8);
|
||||
ranking.sort_by_key(|&byte| -i64::from(byte_counts[byte as usize]));
|
||||
|
||||
#[allow(
|
||||
clippy::cast_possible_truncation,
|
||||
reason = "`i` is in the range `0..256`"
|
||||
)]
|
||||
let byte_mapping =
|
||||
array::from_fn(|i| ranking.iter().position(|b| *b == (i as u8)).unwrap() as u8);
|
||||
|
||||
Self { src, byte_mapping }
|
||||
}
|
||||
|
||||
pub fn print_mapping(&self) {
|
||||
for (one_count, byte) in self.byte_mapping.iter().zip(0u8..) {
|
||||
println!(
|
||||
"0x{byte:02x}: {:3} bit{s} ({:?})",
|
||||
one_count + 1,
|
||||
(byte as char),
|
||||
s = if *one_count == 0 { "s" } else { "" },
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Encoder<'_> {
|
||||
type Item = Bits;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let one_count = self.byte_mapping[*self.src.first()? as usize];
|
||||
self.src = &self.src[1..];
|
||||
|
||||
Some(Bits { one_count })
|
||||
}
|
||||
}
|
||||
25
src/main.rs
25
src/main.rs
|
|
@ -1,15 +1,34 @@
|
|||
use std::io::{self, Read};
|
||||
use std::{
|
||||
env,
|
||||
io::{self, Read},
|
||||
};
|
||||
|
||||
mod freq;
|
||||
mod rle;
|
||||
|
||||
fn main() -> Result<(), io::Error> {
|
||||
let debug = env::args().any(|arg| arg == "--debug" || arg == "-d");
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let len_src = io::stdin().read_to_end(&mut buf)?;
|
||||
|
||||
println!("Original size: {len_src}");
|
||||
println!("Original: {len_src}");
|
||||
|
||||
let len_rle = rle::Encoder::new(&buf).count() * 2;
|
||||
println!("Rle'd size: {len_rle}");
|
||||
println!(" Rle'd: {len_rle}");
|
||||
|
||||
let encoder_freq = freq::Encoder::new(&buf);
|
||||
if debug {
|
||||
encoder_freq.print_mapping();
|
||||
}
|
||||
|
||||
let len_freq_table = 256;
|
||||
let len_freq_bits = encoder_freq
|
||||
.map(|freq::Bits { one_count }| one_count as usize + 1)
|
||||
.sum::<usize>()
|
||||
.div_ceil(8);
|
||||
let len_freq = len_freq_table + len_freq_bits;
|
||||
println!(" Freq'd: {len_freq}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue