Use a union instead of the dual types, simplify type casting

This commit is contained in:
Timothy Warren 2020-01-13 11:51:46 -05:00
parent 6eb61e7e32
commit 0ddfc1fcf7

View File

@ -11,7 +11,6 @@
#![allow(non_upper_case_globals, non_camel_case_types, non_snake_case)]
use std::arch::x86_64::*;
use std::f64::consts::PI;
use std::mem;
#[repr(C)]
struct body {
@ -158,12 +157,39 @@ unsafe fn advance(bodies: &mut [body; BODIES_COUNT]) {
// ROUNDED_INTERACTIONS_COUNT elements to simplify one of the following
// loops and to also keep the second and third arrays in position_Deltas
// aligned properly.
#[repr(align(16))]
#[derive(Copy, Clone)]
struct Align16([f64; ROUNDED_INTERACTIONS_COUNT]);
#[repr(C)]
union Interactions {
scalars: [f64; ROUNDED_INTERACTIONS_COUNT],
vectors: [__m128d; ROUNDED_INTERACTIONS_COUNT / 2],
}
static mut position_Deltas: [Align16; 3] = [Align16([0.; ROUNDED_INTERACTIONS_COUNT]); 3];
static mut magnitudes: Align16 = Align16([0.; ROUNDED_INTERACTIONS_COUNT]);
impl Interactions {
/// Returns a refrence to the storage as `f64`s.
pub fn as_scalars(&mut self) -> &mut [f64; ROUNDED_INTERACTIONS_COUNT] {
// Safety: the in-memory representation of `f64` and `__m128d` is
// compatible, so accesses to the union members is afe in any
// order..
unsafe {
&mut self.scalars
}
}
/// Returns a reference to the storage as `__m128d`s.
pub fn as_vectors(&mut self) -> &mut [__m128d; ROUNDED_INTERACTIONS_COUNT / 2] {
// Safety: the in-memory representation of `f64` and `__m128d` is
// compatible, so accesses to the union members is afe in any
// order..
unsafe {
&mut self.vectors
}
}
}
static mut position_Deltas: [Interactions; 3] =
[Interactions { scalars: [0.; ROUNDED_INTERACTIONS_COUNT] }; 3];
static mut magnitudes: Interactions =
Interactions { scalars: [0.; ROUNDED_INTERACTIONS_COUNT] };
// Calculate the position_Deltas between the bodies for each interaction.
{
@ -171,7 +197,7 @@ unsafe fn advance(bodies: &mut [body; BODIES_COUNT]) {
for i in 0..BODIES_COUNT - 1 {
for j in i + 1..BODIES_COUNT {
for m in 0..3 {
position_Deltas[m].0[k] = bodies[i].position[m] - bodies[j].position[m];
position_Deltas[m].as_scalars()[k] = bodies[i].position[m] - bodies[j].position[m];
}
k += 1;
}
@ -183,16 +209,12 @@ unsafe fn advance(bodies: &mut [body; BODIES_COUNT]) {
// ROUNDED_INTERACTIONS_COUNT/2 iterations are done.
for i in 0..ROUNDED_INTERACTIONS_COUNT / 2 {
// Load position_Deltas of two bodies into position_Delta.
let mut position_Delta = [mem::MaybeUninit::<__m128d>::uninit(); 3];
let mut position_Delta = [_mm_setzero_pd(); 3];
for m in 0..3 {
position_Delta[m]
.as_mut_ptr()
.write(*(&position_Deltas[m].0 as *const f64 as *const __m128d).add(i));
position_Delta[m] = position_Deltas[m].as_vectors()[i];
}
let position_Delta: [__m128d; 3] = mem::transmute(position_Delta);
let distance_Squared: __m128d = _mm_add_pd(
_mm_add_pd(
_mm_mul_pd(position_Delta[0], position_Delta[0]),
@ -235,12 +257,10 @@ unsafe fn advance(bodies: &mut [body; BODIES_COUNT]) {
// distance_Squared which was already calculated earlier. Additionally
// this method is probably a little more accurate due to less rounding
// as well.
(magnitudes.0.as_mut_ptr() as *mut __m128d)
.add(i)
.write(_mm_mul_pd(
_mm_div_pd(_mm_set1_pd(0.01), distance_Squared),
distance_Reciprocal,
));
magnitudes.as_vectors()[i] = _mm_mul_pd(
_mm_div_pd(_mm_set1_pd(0.01), distance_Squared),
distance_Reciprocal,
);
}
// Use the calculated magnitudes of force to update the velocities for all
@ -249,11 +269,11 @@ unsafe fn advance(bodies: &mut [body; BODIES_COUNT]) {
let mut k = 0;
for i in 0..BODIES_COUNT - 1 {
for j in i + 1..BODIES_COUNT {
let i_mass_magnitude = bodies[i].mass * magnitudes.0[k];
let j_mass_magnitude = bodies[j].mass * magnitudes.0[k];
let i_mass_magnitude = bodies[i].mass * magnitudes.as_scalars()[k];
let j_mass_magnitude = bodies[j].mass * magnitudes.as_scalars()[k];
for m in 0..3 {
bodies[i].velocity[m] -= position_Deltas[m].0[k] * j_mass_magnitude;
bodies[j].velocity[m] += position_Deltas[m].0[k] * i_mass_magnitude;
bodies[i].velocity[m] -= position_Deltas[m].as_scalars()[k] * j_mass_magnitude;
bodies[j].velocity[m] += position_Deltas[m].as_scalars()[k] * i_mass_magnitude;
}
k += 1;
}