diff --git a/src/main.rs b/src/main.rs index 683c9a5..90e0d7f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,7 +11,6 @@ #![allow(non_upper_case_globals, non_camel_case_types, non_snake_case)] use std::arch::x86_64::*; use std::f64::consts::PI; -use std::mem; #[repr(C)] struct body { @@ -158,12 +157,39 @@ unsafe fn advance(bodies: &mut [body; BODIES_COUNT]) { // ROUNDED_INTERACTIONS_COUNT elements to simplify one of the following // loops and to also keep the second and third arrays in position_Deltas // aligned properly. - #[repr(align(16))] #[derive(Copy, Clone)] - struct Align16([f64; ROUNDED_INTERACTIONS_COUNT]); + #[repr(C)] + union Interactions { + scalars: [f64; ROUNDED_INTERACTIONS_COUNT], + vectors: [__m128d; ROUNDED_INTERACTIONS_COUNT / 2], + } - static mut position_Deltas: [Align16; 3] = [Align16([0.; ROUNDED_INTERACTIONS_COUNT]); 3]; - static mut magnitudes: Align16 = Align16([0.; ROUNDED_INTERACTIONS_COUNT]); + impl Interactions { + /// Returns a refrence to the storage as `f64`s. + pub fn as_scalars(&mut self) -> &mut [f64; ROUNDED_INTERACTIONS_COUNT] { + // Safety: the in-memory representation of `f64` and `__m128d` is + // compatible, so accesses to the union members is afe in any + // order.. + unsafe { + &mut self.scalars + } + } + + /// Returns a reference to the storage as `__m128d`s. + pub fn as_vectors(&mut self) -> &mut [__m128d; ROUNDED_INTERACTIONS_COUNT / 2] { + // Safety: the in-memory representation of `f64` and `__m128d` is + // compatible, so accesses to the union members is afe in any + // order.. + unsafe { + &mut self.vectors + } + } + } + + static mut position_Deltas: [Interactions; 3] = + [Interactions { scalars: [0.; ROUNDED_INTERACTIONS_COUNT] }; 3]; + static mut magnitudes: Interactions = + Interactions { scalars: [0.; ROUNDED_INTERACTIONS_COUNT] }; // Calculate the position_Deltas between the bodies for each interaction. { @@ -171,7 +197,7 @@ unsafe fn advance(bodies: &mut [body; BODIES_COUNT]) { for i in 0..BODIES_COUNT - 1 { for j in i + 1..BODIES_COUNT { for m in 0..3 { - position_Deltas[m].0[k] = bodies[i].position[m] - bodies[j].position[m]; + position_Deltas[m].as_scalars()[k] = bodies[i].position[m] - bodies[j].position[m]; } k += 1; } @@ -183,16 +209,12 @@ unsafe fn advance(bodies: &mut [body; BODIES_COUNT]) { // ROUNDED_INTERACTIONS_COUNT/2 iterations are done. for i in 0..ROUNDED_INTERACTIONS_COUNT / 2 { // Load position_Deltas of two bodies into position_Delta. - let mut position_Delta = [mem::MaybeUninit::<__m128d>::uninit(); 3]; + let mut position_Delta = [_mm_setzero_pd(); 3]; for m in 0..3 { - position_Delta[m] - .as_mut_ptr() - .write(*(&position_Deltas[m].0 as *const f64 as *const __m128d).add(i)); + position_Delta[m] = position_Deltas[m].as_vectors()[i]; } - let position_Delta: [__m128d; 3] = mem::transmute(position_Delta); - let distance_Squared: __m128d = _mm_add_pd( _mm_add_pd( _mm_mul_pd(position_Delta[0], position_Delta[0]), @@ -235,12 +257,10 @@ unsafe fn advance(bodies: &mut [body; BODIES_COUNT]) { // distance_Squared which was already calculated earlier. Additionally // this method is probably a little more accurate due to less rounding // as well. - (magnitudes.0.as_mut_ptr() as *mut __m128d) - .add(i) - .write(_mm_mul_pd( - _mm_div_pd(_mm_set1_pd(0.01), distance_Squared), - distance_Reciprocal, - )); + magnitudes.as_vectors()[i] = _mm_mul_pd( + _mm_div_pd(_mm_set1_pd(0.01), distance_Squared), + distance_Reciprocal, + ); } // Use the calculated magnitudes of force to update the velocities for all @@ -249,11 +269,11 @@ unsafe fn advance(bodies: &mut [body; BODIES_COUNT]) { let mut k = 0; for i in 0..BODIES_COUNT - 1 { for j in i + 1..BODIES_COUNT { - let i_mass_magnitude = bodies[i].mass * magnitudes.0[k]; - let j_mass_magnitude = bodies[j].mass * magnitudes.0[k]; + let i_mass_magnitude = bodies[i].mass * magnitudes.as_scalars()[k]; + let j_mass_magnitude = bodies[j].mass * magnitudes.as_scalars()[k]; for m in 0..3 { - bodies[i].velocity[m] -= position_Deltas[m].0[k] * j_mass_magnitude; - bodies[j].velocity[m] += position_Deltas[m].0[k] * i_mass_magnitude; + bodies[i].velocity[m] -= position_Deltas[m].as_scalars()[k] * j_mass_magnitude; + bodies[j].velocity[m] += position_Deltas[m].as_scalars()[k] * i_mass_magnitude; } k += 1; }