Skip to content

Commit

Permalink
Merge pull request #15 from LaihoE/unroll_simd_loops
Browse files Browse the repository at this point in the history
unroll eq
  • Loading branch information
LaihoE authored Jul 29, 2024
2 parents 9fdbf2a + 31cde14 commit 256a096
Showing 1 changed file with 13 additions and 14 deletions.
27 changes: 13 additions & 14 deletions src/eq.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::SIMD_LEN;
use crate::UNROLL_FACTOR;
use std::simd::cmp::SimdPartialEq;
use std::simd::Mask;
use std::simd::Simd;
Expand All @@ -21,25 +22,23 @@ where
fn eq_simd(&self, other: &Self) -> bool {
let a = self.as_slice();
let b = other.as_slice();

if a.len() != b.len() {
return false;
}
if a.len() <= SIMD_LEN || b.len() <= SIMD_LEN {
return a.iter().eq(b);
}
let chunks_a = a.chunks_exact(SIMD_LEN);
let chunks_b = b.chunks_exact(SIMD_LEN);
let remainder_is_sorted = chunks_a.remainder().iter().eq(chunks_b.remainder().iter());

for (a, b) in chunks_a.zip(chunks_b) {
let chunk_a = Simd::from_slice(a);
let chunk_b = Simd::from_slice(b);
if chunk_a.simd_ne(chunk_b).to_bitmask() != 0 {
let mut chunks_a = a.chunks_exact(SIMD_LEN * UNROLL_FACTOR);
let mut chunks_b = b.chunks_exact(SIMD_LEN * UNROLL_FACTOR);
let mut mask = Mask::default();

for (aa, bb) in chunks_a.by_ref().zip(chunks_b.by_ref()) {
for (aaa, bbb) in aa.chunks_exact(SIMD_LEN).zip(bb.chunks_exact(SIMD_LEN)) {
mask |= Simd::from_slice(aaa).simd_ne(Simd::from_slice(bbb));
}
if mask.any() {
return false;
}
}
return remainder_is_sorted;
return chunks_a.remainder().eq(chunks_b.remainder());
}
}

Expand All @@ -61,7 +60,7 @@ mod tests {
Simd<T, SIMD_LEN>: SimdPartialEq<Mask = Mask<T::Mask, SIMD_LEN>>,
Standard: Distribution<T>,
{
for len in 0..100 {
for len in 0..1000 {
for _ in 0..5 {
let mut v: Vec<T> = vec![T::default(); len];
let mut rng = rand::thread_rng();
Expand Down Expand Up @@ -98,7 +97,7 @@ mod tests {
Simd<T, SIMD_LEN>: SimdPartialEq<Mask = Mask<T::Mask, SIMD_LEN>>,
Standard: Distribution<T>,
{
for len in 0..100 {
for len in 0..1000 {
for _ in 0..5 {
let mut v: Vec<T> = vec![T::default(); len];
let mut rng = rand::thread_rng();
Expand Down

0 comments on commit 256a096

Please sign in to comment.