byte search v1

This commit is contained in:
mehbark 2025-07-05 21:57:21 -04:00
commit 396dd79444
6 changed files with 81 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

7
Cargo.lock generated Normal file
View file

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "simd-fun"
version = "0.1.0"

6
Cargo.toml Normal file
View file

@ -0,0 +1,6 @@
[package]
name = "simd-fun"
version = "0.1.0"
edition = "2024"
[dependencies]

2
rust-toolchain.toml Normal file
View file

@ -0,0 +1,2 @@
[toolchain]
channel = "nightly"

65
src/main.rs Normal file
View file

@ -0,0 +1,65 @@
#![feature(test, portable_simd)]
extern crate test;
use std::{
env,
io::{self, Read},
simd::prelude::*,
};
fn main() {
let needle = *env::args()
.nth(1)
.expect("needle argument is required")
.as_bytes()
.first()
.expect("needle should have at least one byte come on");
let mut haystack = Vec::new();
let _ = io::stdin().read_to_end(&mut haystack).unwrap();
println!(
"normal: {:?}\nsimd: {:?}",
find(needle, &haystack),
find_simd(needle, &haystack)
);
}
const CHUNK_SIZE: usize = 64;
// three times slower than the naive implementation because
// compilers eat this kind of autovectorization for breakfast
fn find_simd(needle: u8, haystack: &[u8]) -> Option<usize> {
let simd_needle = Simd::<u8, CHUNK_SIZE>::splat(needle);
for (i, chunk) in haystack.chunks(CHUNK_SIZE).enumerate() {
let chunk = Simd::<u8, CHUNK_SIZE>::load_or_default(chunk);
let mask = chunk.simd_eq(simd_needle);
if let Some(found) = mask.first_set() {
return Some(i * CHUNK_SIZE + found);
}
}
None
}
fn find(needle: u8, haystack: &[u8]) -> Option<usize> {
haystack.iter().position(|&b| b == needle)
}
#[cfg(test)]
mod bench {
use super::*;
use std::process::Termination;
use test::Bencher;
const HAYSTACK: &[u8] = include_bytes!("/home/mbk/Documents/shakespeare.txt");
const NEEDLE: u8 = b'%';
#[bench]
fn bench_simd(b: &mut Bencher) -> impl Termination {
b.iter(|| assert_eq!(Some(5_576_727), find_simd(NEEDLE, HAYSTACK)));
}
#[bench]
fn bench_normal(b: &mut Bencher) -> impl Termination {
b.iter(|| assert_eq!(Some(5_576_727), find(NEEDLE, HAYSTACK)));
}
}

BIN
src/rust_out Executable file

Binary file not shown.