Example #1
0
            private static unsafe void Add(Vector256 <UInt64>[] s, Vector256 <UInt32>[] v, int shift)
            {
                int shift_sets = shift / Vector256 <UInt64> .Count;
                int shift_rems = shift % Vector256 <UInt64> .Count;

                if (shift_rems == 0)
                {
#if DEBUG
                    Debug <OverflowException> .Assert(checked (shift_sets + v.Length) <= s.Length);
#endif

                    fixed(Vector256 <UInt64> *ps = s)
                    {
                        fixed(Vector256 <UInt32> *pv = v)
                        {
                            for (int i = 0, store_idx = shift_sets; i < v.Length; i++, store_idx++)
                            {
                                ps[store_idx] = Avx2.Add(ps[store_idx], pv[i].AsUInt64());
                            }
                        }
                    }
                }
                else
                {
#if DEBUG
                    Debug <OverflowException> .Assert(checked (shift_sets + v.Length) < s.Length);
#endif

                    Vector256 <UInt64> ml = Mask256.MSV(checked ((uint)(shift_rems * 2))).AsUInt64();
                    Vector256 <UInt64> mh = Mask256.LSV(checked ((uint)(shift_rems * 2))).AsUInt64();

                    byte mm_perm = shift_rems switch {
                        1 => MM_PERM_CBAD,
                        2 => MM_PERM_BADC,
                        3 => MM_PERM_ADCB,
                        _ => throw new ArgumentException(nameof(shift_rems))
                    };

                    int store_idx = shift_sets;
                    Vector256 <UInt64> uh, ul, u;

                    fixed(Vector256 <UInt64> *ps = s)
                    {
                        fixed(Vector256 <UInt32> *pv = v)
                        {
                            u  = Avx2.Permute4x64(pv[0].AsUInt64(), mm_perm);
                            ul = Avx2.And(u, ml);

                            ps[store_idx] = Avx2.Add(ps[store_idx], ul);
                            store_idx++;

                            for (int i = 1; i < v.Length; i++)
                            {
                                uh = Avx2.And(u, mh);
                                u  = Avx2.Permute4x64(pv[i].AsUInt64(), mm_perm);
                                ul = Avx2.And(u, ml);

                                ps[store_idx] = Avx2.Add(ps[store_idx], Avx2.Or(uh, ul));
                                store_idx++;
                            }

                            uh            = Avx2.And(u, mh);
                            ps[store_idx] = Avx2.Add(ps[store_idx], uh);
                        }
                    }
                }
            }