Esempio n. 1
1
 public static void HwAcceleratedSumInPlace(int[] lhs, int[] rhs)
 {
     var simdLength = Vector<int>.Count;
     var i = 0;
     for (i = 0; i < lhs.Length - simdLength; i += simdLength) {
         var va = new Vector<int>(lhs, i);
         var vb = new Vector<int>(rhs, i);
         va += vb;
         va.CopyTo(lhs, i);
     }
     for (; i < lhs.Length; ++i) {
         lhs[i] += rhs[i];
     }
 }
Esempio n. 2
0
        public static double[] Multiply_SIMD_2(Matrix A, Matrix B)
        {
            // Abour 50% fateser when matrix size >= 8x8

            var vecSize = Vector<double>.Count;
            var bRemainer = B.Columns % Vector<double>.Count;
            if (B.Columns % Vector<double>.Count != 0)
            {
                B.AddColumns(bRemainer);
            }

            var C = new double[A.Rows * B.Columns];

            for (int i = 0; i < A.Rows; i++)
            {
                for (int k = 0; k < A.Columns; k++)
                {
                    for (int j = 0; j < B.Columns; j += vecSize)
                    {
                        var vC = new Vector<double>(C, i * A.Rows + j);
                        var vB = new Vector<double>(B.internalArray, k * B.Columns + j);
                        var vA = new Vector<double>(A.internalArray[i * A.Columns + k]);
                        vC += vA * vB;
                        vC.CopyTo(C, i * A.Rows + j);
                    }
                }
            }

            return C.ToArray();
        }
Esempio n. 3
0
 public static void HwAcceleratedSumInPlace(float[] lhs, float[] rhs)
 {
     int simdLength = Vector<float>.Count;
     int i = 0;
     for (i = 0; i < lhs.Length - simdLength; i += simdLength) {
         Vector<float> va = new Vector<float>(lhs, i);
         Vector<float> vb = new Vector<float>(rhs, i);
         va += vb;
         va.CopyTo(lhs, i);
     }
     for (; i < lhs.Length; ++i) {
         lhs[i] += rhs[i];
     }
 }
Esempio n. 4
0
 static int Foo(Vector<int> vec)
 {
     int[] a = new int[5];
     // The index [5] is outside the bounds of array 'a',
     // so this should throw ArgumentOutOfRangeException.
     // There's a subsequent check for whether the destination
     // has enough space to receive the vector, which would
     // raise an ArgumentException; the bug was that assertion
     // prop was using the later exception check to prove the
     // prior one "redundant" because the commas confused the
     // ordering.
     vec.CopyTo(a, 5);
     return a[0];
 }
Esempio n. 5
0
        public static void HwAcceleratedSum(float[] lhs, float[] rhs, float[] result)
        {
            var simdLength = Vector<float>.Count;
            int i;
            for (i = 0; i < lhs.Length - simdLength; i += simdLength) {
                var va = new Vector<float>(lhs, i);
                var vb = new Vector<float>(rhs, i);
                va += vb;
                va.CopyTo(result, i);
            }

            for (; i < lhs.Length; ++i) {
                result[i] = lhs[i] + rhs[i];
            }
        }
Esempio n. 6
0
        /// <summary>
        /// Solves a system of linear equations, <c>Ax = b</c>, with A LU factorized.
        /// </summary>
        /// <param name="input">The right hand side vector, <c>b</c>.</param>
        /// <param name="result">The left hand side <see cref="Matrix{T}"/>, <c>x</c>.</param>
        public override void Solve(Vector<Complex> input, Vector<Complex> result)
        {
            // Check for proper arguments.
            if (input == null)
            {
                throw new ArgumentNullException("input");
            }

            if (result == null)
            {
                throw new ArgumentNullException("result");
            }

            // Check for proper dimensions.
            if (input.Count != result.Count)
            {
                throw new ArgumentException(Resources.ArgumentVectorsSameLength);
            }

            if (input.Count != Factors.RowCount)
            {
                throw Matrix.DimensionsDontMatch<ArgumentException>(input, Factors);
            }

            // Copy the contents of input to result.
            input.CopyTo(result);
            for (var i = 0; i < Pivots.Length; i++)
            {
                if (Pivots[i] == i)
                {
                    continue;
                }

                var p = Pivots[i];
                var temp = result[p];
                result[p] = result[i];
                result[i] = temp;
            }

            var order = Factors.RowCount;

            // Solve L*Y = P*B
            for (var k = 0; k < order; k++)
            {
                for (var i = k + 1; i < order; i++)
                {
                    result[i] -= result[k]*Factors.At(i, k);
                }
            }

            // Solve U*X = Y;
            for (var k = order - 1; k >= 0; k--)
            {
                result[k] /= Factors.At(k, k);
                for (var i = 0; i < k; i++)
                {
                    result[i] -= result[k]*Factors.At(i, k);
                }
            }
        }
        private static void UpdateNodesSingleLoop(Vector<float> dt, float[] nodesX, float[] nodesY,
            float[] velocitiesX, float[] velocitiesY, int vectorSize)
        {
            for (int index = 0; index < nodesX.Length; index += vectorSize)
            {
                var velocityX = new Vector<float>(velocitiesX, index);
                var velocityY = new Vector<float>(velocitiesY, index);
                var positionX = new Vector<float>(nodesX, index);
                var positionY = new Vector<float>(nodesY, index);

                Node.Update(dt, velocityX, velocityY, ref positionX, ref positionY);

                positionX.CopyTo(nodesX, index);
                positionY.CopyTo(nodesY, index);
            }
        }
        private static void UpdateNodesIncrementIndex(Vector<float> dt, float[] nodesX, float[] nodesY,
            float[] velocitiesX, float[] velocitiesY, int width, int height, int vectorSize, int padding)
        {
            int doublePadding = padding * 2;

            int index = padding;
            for (int row = 0; row < height; row++)
            {
                for (int col = 0; col < width; col += vectorSize)
                {
                    var velocityX = new Vector<float>(velocitiesX, index);
                    var velocityY = new Vector<float>(velocitiesY, index);
                    var positionX = new Vector<float>(nodesX, index);
                    var positionY = new Vector<float>(nodesY, index);

                    Node.Update(dt, velocityX, velocityY, ref positionX, ref positionY);

                    positionX.CopyTo(nodesX, index);
                    positionY.CopyTo(nodesY, index);

                    index += vectorSize;
                }

                index += doublePadding;
            }
        }
        private static void UpdateNodesCalculateIndex(Vector<float> dt, float[] nodesX, float[] nodesY,
           float[] velocitiesX, float[] velocitiesY, int width, int height, int vectorSize, int padding)
        {
            int stride = VectorizedScene.GetStride(width, padding);

            for (int row = 0; row < height; row++)
            {
                for (int col = 0; col < width; col += vectorSize)
                {
                    int index = VectorizedScene.GetIndex(padding, stride, col, row);

                    var velocityX = new Vector<float>(velocitiesX, index);
                    var velocityY = new Vector<float>(velocitiesY, index);
                    var positionX = new Vector<float>(nodesX, index);
                    var positionY = new Vector<float>(nodesY, index);

                    Node.Update(dt, velocityX, velocityY, ref positionX, ref positionY);

                    positionX.CopyTo(nodesX, index);
                    positionY.CopyTo(nodesY, index);
                }
            }
        }
Esempio n. 10
0
        /// <summary>
        /// Solves a system of linear equations, <b>Ax = b</b>, with A Cholesky factorized.
        /// </summary>
        /// <param name="input">The right hand side vector, <b>b</b>.</param>
        /// <param name="result">The left hand side <see cref="Matrix{T}"/>, <b>x</b>.</param>
        public override void Solve(Vector<Complex> input, Vector<Complex> result)
        {
            if (input.Count != result.Count)
            {
                throw new ArgumentException(Resources.ArgumentVectorsSameLength);
            }

            if (input.Count != Factor.RowCount)
            {
                throw Matrix.DimensionsDontMatch<ArgumentException>(input, Factor);
            }

            input.CopyTo(result);
            var order = Factor.RowCount;

            // Solve L*Y = B;
            Complex sum;
            for (var i = 0; i < order; i++)
            {
                sum = result[i];
                for (var k = i - 1; k >= 0; k--)
                {
                    sum -= Factor.At(i, k)*result[k];
                }

                result[i] = sum/Factor.At(i, i);
            }

            // Solve L'*X = Y;
            for (var i = order - 1; i >= 0; i--)
            {
                sum = result[i];
                for (var k = i + 1; k < order; k++)
                {
                    sum -= Factor.At(k, i).Conjugate()*result[k];
                }

                result[i] = sum/Factor.At(i, i);
            }
        }
Esempio n. 11
0
        private static void AddVelocityOverlapping(Vector<float> node1X, Vector<float> node1Y,
            Vector<float> node2X, Vector<float> node2Y, Vector<float> restLength, Vector<float> lambda,
            float[] velocitiesX, float[] velocitiesY, int index1, int index2)
        {
            Vector<float> impulseX;
            Vector<float> impulseY;
            Band.GetImpulse(node1X, node1Y, node2X, node2Y, restLength, lambda, out impulseX, out impulseY);

            // Have to complete one load-add-store cycle before we can do the next.
            var velocity1X = new Vector<float>(velocitiesX, index1);
            velocity1X += impulseX;
            velocity1X.CopyTo(velocitiesX, index1);

            var velocity2X = new Vector<float>(velocitiesX, index2);
            velocity2X -= impulseX;
            velocity2X.CopyTo(velocitiesX, index2);

            var velocity1Y = new Vector<float>(velocitiesY, index1);
            velocity1Y += impulseY;
            velocity1Y.CopyTo(velocitiesY, index1);

            var velocity2Y = new Vector<float>(velocitiesY, index2);
            velocity2Y -= impulseY;
            velocity2Y.CopyTo(velocitiesY, index2);
        }
        private static void AddVelocityOverlapping(Vector<float> node1X, Vector<float> node1Y,
            Vector<float> node2X, Vector<float> node2Y, Vector<float> restLength, Vector<float> lambda,
            float[] velocitiesX, float[] velocitiesY, int index1, int index2)
        {
            Vector<float> impulseX;
            Vector<float> impulseY;
            Band.GetImpulse(node1X, node1Y, node2X, node2Y, restLength, lambda, out impulseX, out impulseY);

            // Mask the NaN values to 0.
            var mask = Vector.AsVectorSingle(Vector.Equals(impulseX, impulseX));
            impulseX = Vector.BitwiseAnd(impulseX, mask);
            // Note: NaN's will be in the same spot for impulseX and impulseY, so we can reuse it.
            impulseY = Vector.BitwiseAnd(impulseY, mask);

            // Have to complete one load-add-store cycle before we can do the next.
            var velocity1X = new Vector<float>(velocitiesX, index1);
            velocity1X += impulseX;
            velocity1X.CopyTo(velocitiesX, index1);

            var velocity2X = new Vector<float>(velocitiesX, index2);
            velocity2X -= impulseX;
            velocity2X.CopyTo(velocitiesX, index2);

            var velocity1Y = new Vector<float>(velocitiesY, index1);
            velocity1Y += impulseY;
            velocity1Y.CopyTo(velocitiesY, index1);

            var velocity2Y = new Vector<float>(velocitiesY, index2);
            velocity2Y -= impulseY;
            velocity2Y.CopyTo(velocitiesY, index2);
        }
Esempio n. 13
0
        /// <summary>
        /// Solves a system of linear equations, <b>Ax = b</b>, with A Cholesky factorized.
        /// </summary>
        /// <param name="input">The right hand side vector, <b>b</b>.</param>
        /// <param name="result">The left hand side <see cref="Matrix{T}"/>, <b>x</b>.</param>
        public override void Solve(Vector<Complex> input, Vector<Complex> result)
        {
            // Check for proper arguments.
            if (input == null)
            {
                throw new ArgumentNullException("input");
            }

            if (result == null)
            {
                throw new ArgumentNullException("result");
            }

            // Check for proper dimensions.
            if (input.Count != result.Count)
            {
                throw new ArgumentException(Resources.ArgumentVectorsSameLength);
            }

            if (input.Count != CholeskyFactor.RowCount)
            {
                throw new ArgumentException(Resources.ArgumentMatrixDimensions);
            }

            input.CopyTo(result);
            var order = CholeskyFactor.RowCount;

            // Solve L*Y = B;
            Complex sum;
            for (var i = 0; i < order; i++)
            {
                sum = result[i];
                for (var k = i - 1; k >= 0; k--)
                {
                    sum -= CholeskyFactor.At(i, k) * result[k];
                }

                result[i] = sum / CholeskyFactor.At(i, i);
            }

            // Solve L'*X = Y;
            for (var i = order - 1; i >= 0; i--)
            {
                sum = result[i];
                for (var k = i + 1; k < order; k++)
                {
                    sum -= CholeskyFactor.At(k, i).Conjugate() * result[k];
                }

                result[i] = sum / CholeskyFactor.At(i, i);
            }
        }
Esempio n. 14
0
            /// <summary>
            /// 
            /// </summary>
            /// <param name="previousZone"></param>
            /// <param name="ep"></param>
            /// <param name="nextZone"></param>
            /// <param name="startTime"></param>
            /// <param name="availableTime"></param>
            /// <param name="calculationSpace"></param>
            /// <returns>The sum of the calculation space</returns>
            private float CalculateLocationProbabilities(IZone previousZone, IEpisode ep, IZone nextZone, Time startTime, Time availableTime, float[] calculationSpace)
            {
                var p = zoneSystem.GetFlatIndex(previousZone.ZoneNumber);
                var n = zoneSystem.GetFlatIndex(nextZone.ZoneNumber);
                var size = zones.Length;
                int index = GetTimePeriod(startTime);
                var rowTimes = Parent.TimePeriods[index].RowTravelTimes;
                var columnTimes = Parent.TimePeriods[index].ColumnTravelTimes;
                var from = From[index];
                var available = availableTime.ToMinutes();
                var to = To[index];
                var pIndex = FlatZoneToPDCubeLookup[p];
                var nIndex = FlatZoneToPDCubeLookup[n];
                var data = PDCube[index][pIndex][nIndex];
                int previousIndexOffset = p * size;
                int nextIndexOffset = n * size;
                float total = 0.0f;
                if (Vector.IsHardwareAccelerated)
                {
                    Vector<float> availableTimeV = new Vector<float>(available);
                    Vector<float> totalV = Vector<float>.Zero;
                    int i = 0;
                    if (nIndex == pIndex)
                    {
                        for (i = 0; i < calculationSpace.Length; i++)
                        {
                            var odUtility = 1.0f;
                            var pdindex = data[FlatZoneToPDCubeLookup[i]];
                            if (pdindex >= 0)
                            {
                                odUtility = (pIndex == FlatZoneToPDCubeLookup[i]) ? TimePeriod[index].ODConstants[pdindex].ExpConstant * TimePeriod[index].expSamePD
                                    : TimePeriod[index].ODConstants[pdindex].ExpConstant;
                            }
                            else
                            {
                                odUtility = (pIndex == FlatZoneToPDCubeLookup[i]) ? TimePeriod[index].expSamePD : 1.0f;
                            }
                            calculationSpace[i] = odUtility;
                        }
                    }
                    else
                    {
                        for (i = 0; i < calculationSpace.Length; i++)
                        {
                            var pdindex = data[FlatZoneToPDCubeLookup[i]];
                            calculationSpace[i] = pdindex >= 0 ? TimePeriod[index].ODConstants[pdindex].ExpConstant : 1f;
                        }
                    }

                    for (i = 0; i <= calculationSpace.Length - Vector<float>.Count; i += Vector<float>.Count)
                    {
                        var timeTo = new Vector<float>(rowTimes, previousIndexOffset + i);
                        var timeFrom = new Vector<float>(columnTimes, nextIndexOffset + i);
                        var utilityTo = new Vector<float>(to, previousIndexOffset + i);
                        var utilityFrom = new Vector<float>(from, nextIndexOffset + i);
                        Vector<float> calcV = new Vector<float>(calculationSpace, i);
                        Vector<int> zeroMask = Vector.LessThanOrEqual(timeTo + timeFrom, availableTimeV);
                        calcV = Vector.AsVectorSingle(Vector.BitwiseAnd(Vector.AsVectorInt32(calcV), zeroMask))
                            * utilityTo * utilityFrom;
                        calcV.CopyTo(calculationSpace, i);
                        totalV += calcV;
                    }
                    float remainderTotal = 0.0f;
                    for (; i < calculationSpace.Length; i++)
                    {
                        if (rowTimes[previousIndexOffset + i] + columnTimes[nextIndexOffset + i] <= available)
                        {
                            remainderTotal += (calculationSpace[i] = to[previousIndexOffset + i] * from[nextIndexOffset + i] * calculationSpace[i]);
                        }
                        else
                        {
                            calculationSpace[i] = 0;
                        }
                    }
                    total += remainderTotal + Vector.Dot(totalV, Vector<float>.One);
                }
                else
                {
                    unsafe
                    {
                        fixed (float* pRowTimes = &rowTimes[0])
                        fixed (float* pColumnTimes = &columnTimes[0])
                        fixed (float* pTo = &to[0])
                        fixed (float* pFrom = &from[0])
                        fixed (int* pData = &data[0])
                        {
                            if (nIndex == pIndex)
                            {
                                for (int i = 0; i < calculationSpace.Length; i++)
                                {
                                    if (pRowTimes[previousIndexOffset + i] + pColumnTimes[nextIndexOffset + i] <= available)
                                    {
                                        var odUtility = 1.0f;
                                        var pdindex = pData[FlatZoneToPDCubeLookup[i]];
                                        if (pdindex >= 0)
                                        {
                                            odUtility = (pIndex == FlatZoneToPDCubeLookup[i]) ?
                                                TimePeriod[index].ODConstants[pdindex].ExpConstant * TimePeriod[index].expSamePD
                                                : TimePeriod[index].ODConstants[pdindex].ExpConstant;
                                        }
                                        else
                                        {
                                            odUtility = (pIndex == FlatZoneToPDCubeLookup[i]) ? TimePeriod[index].expSamePD : 1.0f;
                                        }
                                        total += calculationSpace[i] = pTo[previousIndexOffset + i] * pFrom[nextIndexOffset + i] * odUtility;
                                    }
                                    else
                                    {
                                        calculationSpace[i] = 0;
                                    }
                                }
                            }
                            else
                            {
                                for (int i = 0; i < calculationSpace.Length; i++)
                                {
                                    if (pRowTimes[previousIndexOffset + i] + pColumnTimes[nextIndexOffset + i] <= available)
                                    {
                                        var odUtility = 1.0f;
                                        var pdindex = pData[FlatZoneToPDCubeLookup[i]];
                                        if (pdindex >= 0)
                                        {
                                            odUtility = TimePeriod[index].ODConstants[pdindex].ExpConstant;
                                        }
                                        total += calculationSpace[i] = pTo[previousIndexOffset + i] * pFrom[nextIndexOffset + i] * odUtility;
                                    }
                                    else
                                    {
                                        calculationSpace[i] = 0;
                                    }
                                }
                            }
                        }
                    }
                }
                return total;
            }
Esempio n. 15
0
 public static void Multiply3Scalar1AndColumnSum(float[] destination, int destIndex, float[] first, int firstIndex,
     float[] second, int secondIndex, float[] third, int thirdIndex, float scalar, float[] columnSum, int columnIndex, int length)
 {
     Vector<float> scalarV = new Vector<float>(scalar);
     if ((destIndex | firstIndex | secondIndex | thirdIndex | columnIndex) == 0)
     {
         // copy everything we can do inside of a vector
         for (int i = 0; i <= length - Vector<float>.Count; i += Vector<float>.Count)
         {
             var local = new Vector<float>(first, i) * new Vector<float>(second, i) * new Vector<float>(third, i) * scalarV;
             (new Vector<float>(columnSum, i) + local).CopyTo(columnSum, i);
             local.CopyTo(destination, i);
         }
         // copy the remainder
         for (int i = length - (length % Vector<float>.Count); i < length; i++)
         {
             columnSum[i] += (destination[i] = first[i] * second[i] * third[i] * scalar);
         }
     }
     else
     {
         // copy everything we can do inside of a vector
         for (int i = 0; i <= length - Vector<float>.Count; i += Vector<float>.Count)
         {
             var local = new Vector<float>(first, i + firstIndex) * new Vector<float>(second, i + secondIndex) * new Vector<float>(third, i + thirdIndex) * scalarV;
             (new Vector<float>(columnSum, i + columnIndex) + local).CopyTo(columnSum, i + columnIndex);
             local.CopyTo(destination, i + destIndex);
         }
         // copy the remainder
         for (int i = length - (length % Vector<float>.Count); i < length; i++)
         {
             columnSum[i + columnIndex] += (destination[i + destIndex] = first[i + firstIndex] * second[i + secondIndex] * third[i + thirdIndex] * scalar);
         }
     }
 }
Esempio n. 16
0
        private static void AddVelocityIndependent(Vector<float> node1X, Vector<float> node1Y,
            Vector<float> node2X, Vector<float> node2Y, Vector<float> restLength, Vector<float> lambda, float[] velocitiesX, float[] velocitiesY, int index1, int index2)
        {
            Vector<float> impulseX;
            Vector<float> impulseY;
            Band.GetImpulse(node1X, node1Y, node2X, node2Y, restLength, lambda, out impulseX, out impulseY);

            var velocity1X = new Vector<float>(velocitiesX, index1);
            var velocity2X = new Vector<float>(velocitiesX, index2);
            velocity1X += impulseX;
            velocity2X -= impulseX;
            velocity1X.CopyTo(velocitiesX, index1);
            velocity2X.CopyTo(velocitiesX, index2);

            // TODO: Will the CPU schedule these loads to wait on the previous stores?
            // Or is it smart enough to realise that they're independent?
            var velocity1Y = new Vector<float>(velocitiesY, index1);
            var velocity2Y = new Vector<float>(velocitiesY, index2);
            velocity1Y += impulseY;
            velocity2Y -= impulseY;
            velocity1Y.CopyTo(velocitiesY, index1);
            velocity2Y.CopyTo(velocitiesY, index2);
        }
Esempio n. 17
0
 internal IZone GetLocation(IZone previousZone, IEpisode ep, IZone nextZone, Time startTime, Time availableTime, float[] calculationSpace, Random random)
 {
     var p = zoneSystem.GetFlatIndex(previousZone.ZoneNumber);
     var n = zoneSystem.GetFlatIndex(nextZone.ZoneNumber);
     var size = zones.Length;
     int index = GetTimePeriod(startTime);
     var rowTimes = Parent.TimePeriods[index].RowTravelTimes;
     var columnTimes = Parent.TimePeriods[index].ColumnTravelTimes;
     var from = From[index];
     var available = availableTime.ToMinutes();
     var to = To[index];
     var pIndex = FlatZoneToPDCubeLookup[p];
     var nIndex = FlatZoneToPDCubeLookup[n];
     var data = PDCube[pIndex][nIndex];
     int previousIndexOffset = p * size;
     int nextSizeOffset = n * size;
     float total = 0.0f;
     if(VectorHelper.IsHardwareAccelerated)
     {
         Vector<float> availableTimeV = new Vector<float>(available);
         Vector<float> totalV = Vector<float>.Zero;
         if(nIndex == pIndex)
         {
             for(int i = 0; i < calculationSpace.Length; i++)
             {
                 var odUtility = 1.0f;
                 var pdindex = data[FlatZoneToPDCubeLookup[i]];
                 if(pdindex >= 0)
                 {
                     odUtility = (pIndex == FlatZoneToPDCubeLookup[i]) ? ODConstants[pdindex].ExpConstant * expSamePD : ODConstants[pdindex].ExpConstant;
                 }
                 else
                 {
                     odUtility = (pIndex == FlatZoneToPDCubeLookup[i]) ? expSamePD : 1.0f;
                 }
                 calculationSpace[i] = odUtility;
             }
         }
         else
         {
             for(int i = 0; i < calculationSpace.Length; i++)
             {
                 var pdindex = data[FlatZoneToPDCubeLookup[i]];
                 calculationSpace[i] = pdindex >= 0 ? ODConstants[pdindex].ExpConstant : 1f;
             }
         }
         for(int i = 0; i <= calculationSpace.Length - Vector<float>.Count; i += Vector<float>.Count)
         {
             Vector<int> zeroMask = Vector.LessThanOrEqual(new Vector<float>(rowTimes, previousIndexOffset + i)
                 + new Vector<float>(rowTimes, previousIndexOffset + i), availableTimeV);
             Vector<float> calcV = new Vector<float>(calculationSpace, i);
             calcV = Vector.AsVectorSingle(Vector.BitwiseAnd(Vector.AsVectorInt32(calcV), zeroMask))
                 * new Vector<float>(to, previousIndexOffset + i)
                 * new Vector<float>(nextSizeOffset + i);
             calcV.CopyTo(calculationSpace, i);
             totalV += calcV;
         }
         float remainderTotal = 0.0f;
         for(int i = calculationSpace.Length - (calculationSpace.Length % Vector<float>.Count); i < calculationSpace.Length; i++)
         {
             if(rowTimes[previousIndexOffset + i] + columnTimes[nextSizeOffset + i] <= available)
             {
                 remainderTotal += (calculationSpace[i] = to[previousIndexOffset + i] * from[nextSizeOffset + i] * calculationSpace[i]);
             }
             else
             {
                 calculationSpace[i] = 0;
             }
         }
         total += remainderTotal + Vector.Dot(totalV, Vector<float>.One);
     }
     else
     {
         unsafe
         {
             fixed (float* pRowTimes = &rowTimes[0])
             fixed (float* pColumnTimes = &columnTimes[0])
             fixed (float* pTo = &to[0])
             fixed (float* pFrom = &from[0])
             fixed (int* pData = &data[0])
             {
                 if(nIndex == pIndex)
                 {
                     for(int i = 0; i < calculationSpace.Length; i++)
                     {
                         if(pRowTimes[previousIndexOffset + i] + pColumnTimes[nextSizeOffset + i] <= available)
                         {
                             var odUtility = 1.0f;
                             var pdindex = pData[FlatZoneToPDCubeLookup[i]];
                             if(pdindex >= 0)
                             {
                                 odUtility = (pIndex == FlatZoneToPDCubeLookup[i]) ? ODConstants[pdindex].ExpConstant * expSamePD : ODConstants[pdindex].ExpConstant;
                             }
                             else
                             {
                                 odUtility = (pIndex == FlatZoneToPDCubeLookup[i]) ? expSamePD : 1.0f;
                             }
                             total += calculationSpace[i] = pTo[previousIndexOffset + i] * pFrom[nextSizeOffset + i] * odUtility;
                         }
                         else
                         {
                             calculationSpace[i] = 0;
                         }
                     }
                 }
                 else
                 {
                     for(int i = 0; i < calculationSpace.Length; i++)
                     {
                         if(pRowTimes[previousIndexOffset + i] + pColumnTimes[nextSizeOffset + i] <= available)
                         {
                             var odUtility = 1.0f;
                             var pdindex = pData[FlatZoneToPDCubeLookup[i]];
                             if(pdindex >= 0)
                             {
                                 odUtility = ODConstants[pdindex].ExpConstant;
                             }
                             total += calculationSpace[i] = pTo[previousIndexOffset + i] * pFrom[nextSizeOffset + i] * odUtility;
                         }
                         else
                         {
                             calculationSpace[i] = 0;
                         }
                     }
                 }
             }
         }
     }
     if(total <= 0)
     {
         return null;
     }
     var pop = (float)random.NextDouble() * total;
     float current = 0.0f;
     for(int i = 0; i < calculationSpace.Length; i++)
     {
         current += calculationSpace[i];
         if(pop <= current)
         {
             return zones[i];
         }
     }
     for(int i = 0; i < calculationSpace.Length; i++)
     {
         if(calculationSpace[i] > 0)
         {
             return zones[i];
         }
     }
     return null;
 }
        private static void AddVelocityIndependent(Vector<float> node1X, Vector<float> node1Y,
            Vector<float> node2X, Vector<float> node2Y, Vector<float> restLength, Vector<float> lambda, float[] velocitiesX, float[] velocitiesY, int index1, int index2)
        {
            Vector<float> impulseX;
            Vector<float> impulseY;
            Band.GetImpulse(node1X, node1Y, node2X, node2Y, restLength, lambda, out impulseX, out impulseY);

            // Mask the NaN values to 0.
            var mask = Vector.AsVectorSingle(Vector.Equals(impulseX, impulseX));
            impulseX = Vector.BitwiseAnd(impulseX, mask);
            // Note: NaN's will be in the same spot for impulseX and impulseY, so we can reuse it.
            impulseY = Vector.BitwiseAnd(impulseY, mask);

            var velocity1X = new Vector<float>(velocitiesX, index1);
            var velocity2X = new Vector<float>(velocitiesX, index2);
            velocity1X += impulseX;
            velocity2X -= impulseX;
            velocity1X.CopyTo(velocitiesX, index1);
            velocity2X.CopyTo(velocitiesX, index2);

            // TODO: Will the CPU schedule these loads to wait on the previous stores?
            // Or is it smart enough to realise that they're independent?
            var velocity1Y = new Vector<float>(velocitiesY, index1);
            var velocity2Y = new Vector<float>(velocitiesY, index2);
            velocity1Y += impulseY;
            velocity2Y -= impulseY;
            velocity1Y.CopyTo(velocitiesY, index1);
            velocity2Y.CopyTo(velocitiesY, index2);
        }