commit 396dd79444d2d0bffb43bc2c6c220a0f71d59727 Author: mehbark Date: Sat Jul 5 21:57:21 2025 -0400 byte search v1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..754b456 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "simd-fun" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..8850380 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "simd-fun" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..5d56faf --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "nightly" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..063e502 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,65 @@ +#![feature(test, portable_simd)] + +extern crate test; + +use std::{ + env, + io::{self, Read}, + simd::prelude::*, +}; + +fn main() { + let needle = *env::args() + .nth(1) + .expect("needle argument is required") + .as_bytes() + .first() + .expect("needle should have at least one byte come on"); + let mut haystack = Vec::new(); + let _ = io::stdin().read_to_end(&mut haystack).unwrap(); + println!( + "normal: {:?}\nsimd: {:?}", + find(needle, &haystack), + find_simd(needle, &haystack) + ); +} + +const CHUNK_SIZE: usize = 64; + +// three times slower than the naive implementation because +// compilers eat this kind of autovectorization for breakfast +fn find_simd(needle: u8, haystack: &[u8]) -> Option { + let simd_needle = Simd::::splat(needle); + for (i, chunk) in haystack.chunks(CHUNK_SIZE).enumerate() { + let chunk = Simd::::load_or_default(chunk); + let mask = chunk.simd_eq(simd_needle); + if let Some(found) = mask.first_set() { + return Some(i * CHUNK_SIZE + found); + } + } + None +} + +fn find(needle: u8, haystack: &[u8]) -> Option { + haystack.iter().position(|&b| b == needle) +} + +#[cfg(test)] +mod bench { + use super::*; + use std::process::Termination; + use test::Bencher; + + const HAYSTACK: &[u8] = include_bytes!("/home/mbk/Documents/shakespeare.txt"); + const NEEDLE: u8 = b'%'; + + #[bench] + fn bench_simd(b: &mut Bencher) -> impl Termination { + b.iter(|| assert_eq!(Some(5_576_727), find_simd(NEEDLE, HAYSTACK))); + } + + #[bench] + fn bench_normal(b: &mut Bencher) -> impl Termination { + b.iter(|| assert_eq!(Some(5_576_727), find(NEEDLE, HAYSTACK))); + } +} diff --git a/src/rust_out b/src/rust_out new file mode 100755 index 0000000..2e2b282 Binary files /dev/null and b/src/rust_out differ