Exemplo n.º 1
0
 public Inner()
 {
     code = new SIMDcode(Options.Default,
                         _ =>
     {
         // BGRx to Y, fast but not super accurate
         // assumes there is no row-padding in the input (a Sytem.Drawing.Bitmap with Format32bppRgb satisfies this requirement)
         // assumes there is no row-padding in the output (ensure width is multiple of 4 or write it into an array)
         // assumes the image has at least 16 pixels or is padded up to such a size (64 bytes in, 16 bytes out)
         // uses approximate Y weights from JPEG in Q7:
         // Wr = 0.299 -> 38/128 (0.296875)
         // Wg = 0.587 -> 75/128 (0.5859375)
         // Wb = 0.114 -> 15/128 (0.1171875)
         // Q8 doesn't work because pmaddubsw's second operand is *signed* bytes and Wg would be 150
         // Q7 would not work in general because of the saturating addition, but the maximum sum
         // that can occur with these weights is (38+75)*255=28815 which fits in a signed word.
         // Result is rounded by adding 0.5, then truncated.
         // _mm_packus_epi16 applies unsigned clipping but its input is never out of range.
         byte rw      = 38, gw = 75, bw = 15;
         var dst      = _.arg(0);
         var src      = _.arg(1);
         var len      = _.arg(2);
         var weight   = _._mm_setr_epi8(bw, gw, rw, 0, bw, gw, rw, 0, bw, gw, rw, 0, bw, gw, rw, 0);
         var rounding = _._mm_set1_epi16(64);
         _.repeat(len, 16);
         {
             var p0 = _._mm_loadu_si128(src);
             var p1 = _._mm_loadu_si128(src, 16);
             var p2 = _._mm_loadu_si128(src, 32);
             var p3 = _._mm_loadu_si128(src, 48);
             var t0 = _._mm_maddubs_epi16(p0, weight);
             var t1 = _._mm_maddubs_epi16(p1, weight);
             var t2 = _._mm_maddubs_epi16(p2, weight);
             var t3 = _._mm_maddubs_epi16(p3, weight);
             _.assign(src, _.add(src, 64));
             t0 = _._mm_hadd_epi16(t0, t1);
             t1 = _._mm_hadd_epi16(t2, t3);
             t0 = _._mm_add_epi16(t0, rounding);
             t1 = _._mm_add_epi16(t1, rounding);
             t0 = _._mm_srli_epi16(t0, 7);
             t1 = _._mm_srli_epi16(t1, 7);
             t0 = _._mm_packus_epi16(t0, t1);
             _._mm_storeu_si128(dst, t0);
             _.assign(dst, _.add(dst, 16));
         }
         _.endrepeat();
     });
     Rgb32_to_Gray = code.GetDelegate <ArrayTransform>(0);
 }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            var opts = Options.Default;

            opts.AVX2 = false;
            //opts.breakAtFunctionEntry = true;
            SIMDcode s = new SIMDcode(opts, _ =>
            {
                // BGRx to Y, fast but not super accurate
                // using approximate Y weights from JPEG in Q7:
                // Wr = 0.299 -> 38/128 (0.296875)
                // Wg = 0.587 -> 75/128 (0.5859375)
                // Wb = 0.114 -> 15/128 (0.1171875)
                // Q8 would be more complicated because pmaddubsw's second operand is *signed* bytes and Wg would be 150
                // Q7 would not work in general because of the saturating addition, but the maximum sum
                // that can occur with these weights is (38+75)*255=28815 which fits in a signed word.
                // Result is rounded by adding 0.5, then truncated.
                // _mm_packus_epi16 applies unsigned clipping but its input is never out of range.
                byte rw      = 38, gw = 75, bw = 15;
                var dst      = _.arg(0);
                var src      = _.arg(1);
                var len      = _.arg(2);
                var weight   = _._mm_setr_epi8(bw, gw, rw, 0, bw, gw, rw, 0, bw, gw, rw, 0, bw, gw, rw, 0);
                var rounding = _._mm_set1_epi16(64);
                _.repeat(len, 16);
                {
                    var p0 = _._mm_loadu_si128(src);
                    var p1 = _._mm_loadu_si128(src, 16);
                    var p2 = _._mm_loadu_si128(src, 32);
                    var p3 = _._mm_loadu_si128(src, 48);
                    var t0 = _._mm_maddubs_epi16(p0, weight);
                    var t1 = _._mm_maddubs_epi16(p1, weight);
                    var t2 = _._mm_maddubs_epi16(p2, weight);
                    var t3 = _._mm_maddubs_epi16(p3, weight);
                    _.assign(src, _.add(src, 64));
                    t0 = _._mm_hadd_epi16(t0, t1);
                    t1 = _._mm_hadd_epi16(t2, t3);
                    t0 = _._mm_add_epi16(t0, rounding);
                    t1 = _._mm_add_epi16(t1, rounding);
                    t0 = _._mm_srli_epi16(t0, 7);
                    t1 = _._mm_srli_epi16(t1, 7);
                    t0 = _._mm_packus_epi16(t0, t1);
                    _._mm_storeu_si128(dst, t0);
                    _.assign(dst, _.add(dst, 16));
                }
                _.endrepeat();
            }, _ =>
            {
                // upscale x2
                var dst    = _.arg(0);
                var src    = _.arg(1);
                var width  = _.arg(2);
                var height = _.arg(3);
                // upscale vertically
                var prev = _._mm_setzero_si128();
                _.repeat(width, 16);
                {
                    var t0 = _._mm_loadu_si128(src);

                    //t3 = 0;
                }
                _.endrepeat();
            });

            Bitmap input  = (Bitmap)Bitmap.FromFile(@"C:\Users\Harold\Pictures\Barns_grand_tetons.jpg");
            Bitmap output = new Bitmap(input.Width, input.Height, System.Drawing.Imaging.PixelFormat.Format8bppIndexed);
            var    bdin   = input.LockBits(new Rectangle(Point.Empty, input.Size), System.Drawing.Imaging.ImageLockMode.ReadOnly, System.Drawing.Imaging.PixelFormat.Format32bppRgb);
            var    bdout  = output.LockBits(new Rectangle(Point.Empty, input.Size), System.Drawing.Imaging.ImageLockMode.WriteOnly, System.Drawing.Imaging.PixelFormat.Format8bppIndexed);

            System.Drawing.Imaging.ColorPalette pal = output.Palette;
            for (int i = 0; i < 256; i++)
            {
                pal.Entries[i] = Color.FromArgb(255, i, i, i);
            }
            output.Palette = pal;

            var foo = s.GetDelegate <Foo>(0);

            foo((uint *)bdout.Scan0, (uint *)bdin.Scan0, input.Width * input.Height);

            input.UnlockBits(bdin);
            output.UnlockBits(bdout);

            output.Save(@"C:\Users\Harold\Pictures\Barns_grand_tetons_graytest.png", System.Drawing.Imaging.ImageFormat.Png);

            return;
        }