Skip to content

Commit 1b7c8bc

Browse files
authored
pathfinder_simd fixes for ARM (#572)
* Add missing methods * Fix arm F32x4::concat_xy_xy
1 parent 45b7a89 commit 1b7c8bc

File tree

2 files changed

+39
-5
lines changed

2 files changed

+39
-5
lines changed

Cargo.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

simd/src/arm/mod.rs

+38-4
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010

1111
use std::arch::aarch64::{self, float32x2_t, float32x4_t, int32x2_t, int32x4_t};
1212
use std::arch::aarch64::{uint32x2_t, uint32x4_t};
13-
use std::intrinsics::simd::*;
1413
use std::f32;
1514
use std::fmt::{self, Debug, Formatter};
15+
use std::intrinsics::simd::*;
1616
use std::mem;
1717
use std::ops::{Add, BitAnd, BitOr, Div, Index, IndexMut, Mul, Not, Shr, Sub};
1818

@@ -201,7 +201,6 @@ impl IndexMut<usize> for F32x2 {
201201
}
202202
}
203203

204-
205204
impl Add<F32x2> for F32x2 {
206205
type Output = F32x2;
207206
#[inline]
@@ -352,7 +351,7 @@ impl F32x4 {
352351

353352
#[inline]
354353
pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
355-
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [0, 1, 2, 3])) }
354+
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [0, 1, 4, 5])) }
356355
}
357356

358357
#[inline]
@@ -365,6 +364,11 @@ impl F32x4 {
365364
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [2, 3, 6, 7])) }
366365
}
367366

367+
#[inline]
368+
pub fn concat_wz_yx(self, other: F32x4) -> F32x4 {
369+
unsafe { F32x4(simd_shuffle4!(self.0, other.0, [3, 2, 5, 4])) }
370+
}
371+
368372
// Conversions
369373

370374
/// Converts these packed floats to integers via rounding.
@@ -832,13 +836,22 @@ impl BitOr<U32x2> for U32x2 {
832836
}
833837
}
834838

835-
836839
// Four 32-bit unsigned integers
837840

838841
#[derive(Clone, Copy)]
839842
pub struct U32x4(pub uint32x4_t);
840843

841844
impl U32x4 {
845+
#[inline]
846+
pub fn new(a: u32, b: u32, c: u32, d: u32) -> U32x4 {
847+
unsafe { U32x4(mem::transmute([a, b, c, d])) }
848+
}
849+
850+
#[inline]
851+
pub fn splat(x: u32) -> U32x4 {
852+
U32x4::new(x, x, x, x)
853+
}
854+
842855
/// Returns true if all four booleans in this vector are true.
843856
///
844857
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
@@ -856,6 +869,20 @@ impl U32x4 {
856869
pub fn all_false(&self) -> bool {
857870
unsafe { aarch64::vmaxvq_u32(self.0) == 0 }
858871
}
872+
873+
// Packed comparisons
874+
875+
#[inline]
876+
pub fn packed_eq(self, other: U32x4) -> U32x4 {
877+
unsafe { U32x4(simd_eq(self.0, other.0)) }
878+
}
879+
}
880+
881+
impl Debug for U32x4 {
882+
#[inline]
883+
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
884+
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
885+
}
859886
}
860887

861888
impl Index<usize> for U32x4 {
@@ -870,6 +897,13 @@ impl Index<usize> for U32x4 {
870897
}
871898
}
872899

900+
impl PartialEq for U32x4 {
901+
#[inline]
902+
fn eq(&self, other: &U32x4) -> bool {
903+
self.packed_eq(*other).all_true()
904+
}
905+
}
906+
873907
extern "C" {
874908
#[link_name = "llvm.fabs.v2f32"]
875909
fn fabs_v2f32(a: float32x2_t) -> float32x2_t;

0 commit comments

Comments
 (0)