예제 #1
0
        public static int Main(string[] args)
        {
            // First we'll declare some Vars to use below.
            var x  = new HSVar("x");
            var y  = new HSVar("y");
            var xo = new HSVar("xo");
            var yo = new HSVar("yo");
            var xi = new HSVar("xi");
            var yi = new HSVar("yi");

            // This lesson will be about "wrapping" a Func or an ImageParam using the
            // Func::in and ImageParam::in directives
            {
                {
                    // Consider a simple two-stage pipeline:
                    var f = new HSFunc("f_local");
                    var g = new HSFunc("g_local");
                    f[x, y] = x + y;
                    g[x, y] = 2 * f[x, y] + 3;

                    f.ComputeRoot();

                    // This produces the following loop nests:
                    // for y:
                    //   for x:
                    //     f(x, y) = x + y
                    // for y:
                    //   for x:
                    //     g(x, y) = 2 * f(x, y) + 3

                    // Using Func::in, we can interpose a new Func in between f
                    // and g using the schedule alone:
                    HSFunc f_in_g = f.In(g);
                    f_in_g.ComputeRoot();

                    // Equivalently, we could also chain the schedules like so:
                    // f.in(g).ComputeRoot();

                    // This produces the following three loop nests:
                    // for y:
                    //   for x:
                    //     f(x, y) = x + y
                    // for y:
                    //   for x:
                    //     f_in_g(x, y) = f(x, y)
                    // for y:
                    //   for x:
                    //     g(x, y) = 2 * f_in_g(x, y) + 3

                    g.Realize <int>(5, 5);

                    // See figures/lesson_19_wrapper_local.mp4 for a visualization.
                }

                // The schedule directive f.in(g) replaces all calls to 'f'
                // inside 'g' with a wrapper Func and then returns that
                // wrapper. Essentially, it rewrites the original pipeline
                // above into the following:
                {
                    var f_in_g = new HSFunc("f_in_g");
                    var f      = new HSFunc("f");
                    var g      = new HSFunc("g");
                    f[x, y]      = x + y;
                    f_in_g[x, y] = f[x, y];
                    g[x, y]      = 2 * f_in_g[x, y] + 3;

                    f.ComputeRoot();
                    f_in_g.ComputeRoot();
                    g.ComputeRoot();
                }

                // In isolation, such a transformation seems pointless, but it
                // can be used for a variety of scheduling tricks.
            }

            {
                // In the schedule above, only the calls to 'f' made by 'g'
                // are replaced. Other calls made to f would still call 'f'
                // directly. If we wish to globally replace all calls to 'f'
                // with a single wrapper, we simply say f.in().

                // Consider a three stage pipeline, with two consumers of f:
                var f = new HSFunc("f_global");
                var g = new HSFunc("g_global");
                var h = new HSFunc("h_global");
                f[x, y] = x + y;
                g[x, y] = 2 * f[x, y];
                h[x, y] = 3 + g[x, y] - f[x, y];
                f.ComputeRoot();
                g.ComputeRoot();
                h.ComputeRoot();

                // We will replace all calls to 'f' inside both 'g' and 'h'
                // with calls to a single wrapper:
                f.In().ComputeRoot();

                // The equivalent loop nests are:
                // for y:
                //   for x:
                //     f(x, y) = x + y
                // for y:
                //   for x:
                //     f_in(x, y) = f(x, y)
                // for y:
                //   for x:
                //     g(x, y) = 2 * f_in(x, y)
                // for y:
                //   for x:
                //     h(x, y) = 3 + g(x, y) - f_in(x, y)

                h.Realize <int>(5, 5);

                // See figures/lesson_19_wrapper_global.mp4 and for a
                // visualization of what this did.
            }

            {
                // We could also give g and h their own unique wrappers of
                // f. This time we'll schedule them each inside the loop nests
                // of the consumer, which is not something we could do with a
                // single global wrapper.

                var f = new HSFunc("f_unique");
                var g = new HSFunc("g_unique");
                var h = new HSFunc("h_unique");
                f[x, y] = x + y;
                g[x, y] = 2 * f[x, y];
                h[x, y] = 3 + g[x, y] - f[x, y];

                f.ComputeRoot();
                g.ComputeRoot();
                h.ComputeRoot();

                f.In(g).ComputeAt(g, y);
                f.In(h).ComputeAt(h, y);

                // This creates the loop nests:
                // for y:
                //   for x:
                //     f(x, y) = x + y
                // for y:
                //   for x:
                //     f_in_g(x, y) = f(x, y)
                //   for x:
                //     g(x, y) = 2 * f_in_g(x, y)
                // for y:
                //   for x:
                //     f_in_h(x, y) = f(x, y)
                //   for x:
                //     h(x, y) = 3 + g(x, y) - f_in_h(x, y)

                h.Realize <int>(5, 5);
                // See figures/lesson_19_wrapper_unique.mp4 for a visualization.
            }

            {
                // So far this may seem like a lot of pointless copying of
                // memory. Func::in can be combined with other scheduling
                // directives for a variety of purposes. The first we will
                // examine is creating distinct realizations of a Func for
                // several consumers and scheduling each differently.

                // We'll start with nearly the same pipeline.
                var f = new HSFunc("f_sched");
                var g = new HSFunc("g_sched");
                var h = new HSFunc("h_sched");
                f[x, y] = x + y;
                g[x, y] = 2 * f[x, y];
                // h will use a far-away region of f
                h[x, y] = 3 + g[x, y] - f[x + 93, y - 87];

                // This time we'll inline f.
                // f.ComputeRoot();
                g.ComputeRoot();
                h.ComputeRoot();

                f.In(g).ComputeAt(g, y);
                f.In(h).ComputeAt(h, y);

                // g and h now call f via distinct wrappers. The wrappers are
                // scheduled, but f is not, which means that f is inlined into
                // its two wrappers. They will each independently compute the
                // region of f required by their consumer. If we had scheduled
                // f ComputeRoot, we'd be computing the bounding box of the
                // region required by g and the region required by h, which
                // would mostly be unused data.

                // We can also schedule each of these wrappers
                // differently. For scheduling purposes, wrappers inherit the
                // pure vars of the Func they wrap, so we use the same x and y
                // that we used when defining f:
                f.In(g).Vectorize(x, 4);
                f.In(h).Split(x, xo, xi, 2).Reorder(xo, xi);

                // Note that calling f.in(g) a second time returns the wrapper
                // already created by the first call, it doesn't make a new one.

                h.Realize <int>(8, 8);
                // See figures/lesson_19_wrapper_vary_schedule.mp4 for a
                // visualization.

                // Note that because f is inlined into its two wrappers, it is
                // the wrappers that do the work of computing f, rather than
                // just loading from an existing computed realization.
            }

            {
                // Func::in is useful to stage loads from a Func via some
                // smaller intermediate buffer, perhaps on the stack or in
                // shared GPU memory.

                // Consider a pipeline that transposes some ComputeRoot'd Func:

                var f = new HSFunc("f_transpose");
                var g = new HSFunc("g_transpose");
                f[x, y] = HSMath.Sin(((x + y) * HSMath.Sqrt(y)) / 10);
                f.ComputeRoot();

                g[x, y] = f[y, x];

                // The execution strategy we want is to load an 4x4 tile of f
                // into registers, transpose it in-register, and then write it
                // out as an 4x4 tile of g. We will use Func::in to express this:

                HSFunc f_tile = f.In(g);

                // We now have a three stage pipeline:
                // f -> f_tile -> g

                // f_tile will load vectors of f, and store them transposed
                // into registers. g will then write this data back to main
                // memory.
                g.Tile(x, y, xo, yo, xi, yi, 4, 4)
                .Vectorize(xi)
                .Unroll(yi);

                // We will compute f_transpose at tiles of g, and use
                // Func::reorder_storage to state that f_transpose should be
                // stored column-major, so that the loads to it done by g can
                // be dense vector loads.
                f_tile.ComputeAt(g, xo)
                .ReorderStorage(y, x)
                .Vectorize(x)
                .Unroll(y);

                // We take care to make sure f_transpose is only ever accessed
                // at constant indicies. The full unrolling/vectorization of
                // all loops that exist inside its compute_at level has this
                // effect. Allocations that are only ever accessed at constant
                // indices can be promoted into registers.

                g.Realize <float>(16, 16);
                // See figures/lesson_19_transpose.mp4 for a visualization
            }

            {
                // ImageParam::in behaves the same way as Func::in, and you
                // can use it to stage loads in similar ways. Instead of
                // transposing again, we'll use ImageParam::in to stage tiles
                // of an input image into GPU shared memory, effectively using
                // shared/local memory as an explicitly-managed cache.

                var img = new HSImageParam <int>(2);

                // We will compute a small blur of the input.
                var blur = new HSFunc("blur");
                blur[x, y] = (img[x - 1, y - 1] + img[x, y - 1] + img[x + 1, y - 1] +
                              img[x - 1, y] + img[x, y] + img[x + 1, y] +
                              img[x - 1, y + 1] + img[x, y + 1] + img[x + 1, y + 1]);

                blur.ComputeRoot().GpuTile(x, y, xo, yo, xi, yi, 8, 8);

                // The wrapper Func created by ImageParam::in has pure vars
                // named _0, _1, etc. Schedule it per tile of "blur", and map
                // _0 and _1 to gpu threads.
                img.In(blur).ComputeAt(blur, xo).GpuThreads(HS._0, HS._1);

                // Without Func::in, computing an 8x8 tile of blur would do
                // 8*8*9 loads to global memory. With Func::in, the wrapper
                // does 10*10 loads to global memory up front, and then blur
                // does 8*8*9 loads to shared/local memory.

                // Select an appropriate GPU API, as we did in lesson 12
                var target = HS.GetHostTarget();
                if (target.OS == HSOperatingSystem.OSX)
                {
                    target.SetFeature(HSFeature.Metal);
                }
                else
                {
                    target.SetFeature(HSFeature.OpenCL);
                }

                // Create an interesting input image to use.
                var input = new HSBuffer <int>(258, 258);
                input.SetMin(-1, -1);
                for (int yy = input.Top; yy <= input.Bottom; yy++)
                {
                    for (int xx = input.Left; xx <= input.Right; xx++)
                    {
                        input[xx, yy] = xx * 17 + yy % 4;
                    }
                }

                img.Set(input);
                blur.CompileJit(target);
                var output = blur.Realize <int>(256, 256);

                // Check the output is what we expected
                for (int yy = output.Top; yy <= output.Bottom; yy++)
                {
                    for (int xx = output.Left; xx <= output.Right; xx++)
                    {
                        int val      = output[xx, yy];
                        int expected = (input[xx - 1, yy - 1] + input[xx, yy - 1] + input[xx + 1, yy - 1] +
                                        input[xx - 1, yy] + input[xx, yy] + input[xx + 1, yy] +
                                        input[xx - 1, yy + 1] + input[xx, yy + 1] + input[xx + 1, yy + 1]);
                        if (val != expected)
                        {
                            Console.WriteLine($"output({xx}, {yy}) = {val} instead of {expected}\n",
                                              xx, yy, val, expected);
                            return(-1);
                        }
                    }
                }
            }

            {
                // Func::in can also be used to group multiple stages of a
                // Func into the same loop nest. Consider the following
                // pipeline, which computes a value per pixel, then sweeps
                // from left to right and back across each scanline.
                var f = new HSFunc("f_group");
                var g = new HSFunc("g_group");
                var h = new HSFunc("h_group");

                // Initialize f
                f[x, y] = HSMath.Sin(x - y);
                var r = new HSRDom(1, 7);

                // Sweep from left to right
                f[r, y] = (f[r, y] + f[r - 1, y]) / 2;

                // Sweep from right to left
                f[7 - r, y] = (f[7 - r, y] + f[8 - r, y]) / 2;

                // Then we do something with a complicated access pattern: A
                // 45 degree rotation with wrap-around
                g[x, y] = f[(x + y) % 8, (x - y) % 8];

                // f should be scheduled ComputeRoot, because its consumer
                // accesses it in a complicated way. But that means all stages
                // of f are computed in separate loop nests:

                // for y:
                //   for x:
                //     f(x, y) = sin(x - y)
                // for y:
                //   for r:
                //     f(r, y) = (f(r, y) + f(r - 1, y)) / 2
                // for y:
                //   for r:
                //     f(7 - r, y) = (f(7 - r, y) + f(8 - r, y)) / 2
                // for y:
                //   for x:
                //     g(x, y) = f((x + y) % 8, (x - y) % 8);

                // We can get better locality if we schedule the work done by
                // f to share a common loop over y. We can do this by
                // computing f at scanlines of a wrapper like so:

                f.In(g).ComputeRoot();
                f.ComputeAt(f.In(g), y);

                // f has the default schedule for a Func with update stages,
                // which is to be computed at the innermost loop of its
                // consumer, which is now the wrapper f.in(g). This therefore
                // generates the following loop nest, which has better
                // locality:

                // for y:
                //   for x:
                //     f(x, y) = sin(x - y)
                //   for r:
                //     f(r, y) = (f(r, y) + f(r - 1, y)) / 2
                //   for r:
                //     f(7 - r, y) = (f(7 - r, y) + f(8 - r, y)) / 2
                //   for x:
                //     f_in_g(x, y) = f(x, y)
                // for y:
                //   for x:
                //     g(x, y) = f_in_g((x + y) % 8, (x - y) % 8);

                // We'll additionally vectorize the initialization of, and
                // then transfer of pixel values from f into its wrapper:
                f.Vectorize(x, 4);
                f.In(g).Vectorize(x, 4);

                g.Realize <float>(8, 8);
                // See figures/lesson_19_group_updates.mp4 for a visualization.
            }

            Console.WriteLine("Success!");

            return(0);
        }
예제 #2
0
        public static int Main(string[] args)
        {
            // First we'll declare some Vars to use below.
            var x = new HSVar("x");
            var y = new HSVar("y");
            var c = new HSVar("c");

            // Now we'll express a multi-stage pipeline that blurs an image
            // first horizontally, and then vertically.
            {
                // Take a color 8-bit input
                var input = HSBuffer <byte> .LoadImage("rgb.png");

                // Upgrade it to 16-bit, so we can do math without it overflowing.
                var input_16 = new HSFunc("input_16");
                input_16[x, y, c] = HS.Cast <ushort>(input[x, y, c]);

                // Blur it horizontally:
                var blur_x = new HSFunc("blur_x");
                blur_x[x, y, c] = (input_16[x - 1, y, c] +
                                   2 * input_16[x, y, c] +
                                   input_16[x + 1, y, c]) / 4;

                // Blur it vertically:
                var blur_y = new HSFunc("blur_y");
                blur_y[x, y, c] = (blur_x[x, y - 1, c] +
                                   2 * blur_x[x, y, c] +
                                   blur_x[x, y + 1, c]) / 4;

                // Convert back to 8-bit.
                var output = new HSFunc("output");
                output[x, y, c] = HS.Cast <byte>(blur_y[x, y, c]);

                // Each Func in this pipeline calls a previous one using
                // familiar function call syntax (we've overloaded operator()
                // on Func objects). A Func may call any other Func that has
                // been given a definition. This restriction prevents
                // pipelines with loops in them. Halide pipelines are always
                // feed-forward graphs of Funcs.

                // Now let's realize it...

                // Buffer<byte> result = output.realize(input.width(), input.height(), 3);

                // Except that the line above is not going to work. Uncomment
                // it to see what happens.

                // Realizing this pipeline over the same domain as the input
                // image requires reading pixels out of bounds in the input,
                // because the blur_x stage reaches outwards horizontally, and
                // the blur_y stage reaches outwards vertically. Halide
                // detects this by injecting a piece of code at the top of the
                // pipeline that computes the region over which the input will
                // be read. When it starts to run the pipeline it first runs
                // this code, determines that the input will be read out of
                // bounds, and refuses to continue. No actual bounds checks
                // occur in the inner loop; that would be slow.
                //
                // So what do we do? There are a few options. If we realize
                // over a domain shifted inwards by one pixel, we won't be
                // asking the Halide routine to read out of bounds. We saw how
                // to do this in the previous lesson:
                var result = new HSBuffer <byte>(input.Width - 2, input.Height - 2, 3);
                result.SetMin(1, 1);
                output.Realize(result);

                // Save the result. It should look like a slightly blurry
                // parrot, and it should be two pixels narrower and two pixels
                // shorter than the input image.
                result.SaveImage("blurry_parrot_1.png");

                // This is usually the fastest way to deal with boundaries:
                // don't write code that reads out of bounds :) The more
                // general solution is our next example.
            }

            // The same pipeline, with a boundary condition on the input.
            {
                // Take a color 8-bit input
                var input = HSBuffer <byte> .LoadImage("rgb.png");

                // This time, we'll wrap the input in a Func that prevents
                // reading out of bounds:
                var clamped = new HSFunc("clamped");

                // Define an expression that clamps x to lie within the
                // range [0, input.width()-1].
                var clamped_x = HS.Clamp(x, 0, input.Width - 1);
                // clamp(x, a, b) is equivalent to max(min(x, b), a).

                // Similarly clamp y.
                var clamped_y = HS.Clamp(y, 0, input.Height - 1);
                // Load from input at the clamped coordinates. This means that
                // no matter how we evaluated the Func 'clamped', we'll never
                // read out of bounds on the input. This is a clamp-to-edge
                // style boundary condition, and is the simplest boundary
                // condition to express in Halide.
                clamped[x, y, c] = input[clamped_x, clamped_y, c];

                // Defining 'clamped' in that way can be done more concisely
                // using a helper function from the BoundaryConditions
                // namespace like so:
                //
                // clamped = BoundaryConditions::repeat_edge(input);
                //
                // These are important to use for other boundary conditions,
                // because they are expressed in the way that Halide can best
                // understand and optimize. When used correctly they are as
                // cheap as having no boundary condition at all.

                // Upgrade it to 16-bit, so we can do math without it
                // overflowing. This time we'll refer to our new Func
                // 'clamped', instead of referring to the input image
                // directly.
                var input_16 = new HSFunc("input_16");
                input_16[x, y, c] = HS.Cast <ushort>(clamped[x, y, c]);

                // The rest of the pipeline will be the same...

                // Blur it horizontally:
                var blur_x = new HSFunc("blur_x");
                blur_x[x, y, c] = (input_16[x - 1, y, c] +
                                   2 * input_16[x, y, c] +
                                   input_16[x + 1, y, c]) / 4;

                // Blur it vertically:
                var blur_y = new HSFunc("blur_y");
                blur_y[x, y, c] = (blur_x[x, y - 1, c] +
                                   2 * blur_x[x, y, c] +
                                   blur_x[x, y + 1, c]) / 4;

                // Convert back to 8-bit.
                var output = new HSFunc("output");
                output[x, y, c] = HS.Cast <byte>(blur_y[x, y, c]);

                // This time it's safe to evaluate the output over the some
                // domain as the input, because we have a boundary condition.
                var result = output.Realize <byte>(input.Width, input.Height, 3);

                // Save the result. It should look like a slightly blurry
                // parrot, but this time it will be the same size as the
                // input.
                result.SaveImage("blurry_parrot_2.png");
            }

            Console.WriteLine("Success!");
            return(0);
        }
예제 #3
0
        public static int Main(string[] args)
        {
            // The last lesson was quite involved, and scheduling complex
            // multi-stage pipelines is ahead of us. As an interlude, let's
            // consider something easy: evaluating funcs over rectangular
            // domains that do not start at the origin.

            // We define our familiar gradient function.
            var gradient = new HSFunc("gradient");
            var x        = new HSVar("x");
            var y        = new HSVar("y");

            gradient[x, y] = x + y;

            // And turn on tracing so we can see how it is being evaluated.
            gradient.TraceStores();

            // Previously we've realized gradient like so:
            //
            // gradient.realize(8, 8);
            //
            // This does three things internally:
            // 1) Generates code than can evaluate gradient over an arbitrary
            // rectangle.
            // 2) Allocates a new 8 x 8 image.
            // 3) Runs the generated code to evaluate gradient for all x, y
            // from (0, 0) to (7, 7) and puts the result into the image.
            // 4) Returns the new image as the result of the realize call.

            // What if we're managing memory carefully and don't want Halide
            // to allocate a new image for us? We can call realize another
            // way. We can pass it an image we would like it to fill in. The
            // following evaluates our Func into an existing image:
            Console.WriteLine("Evaluating gradient from (0, 0) to (7, 7)");
            var result = new HSBuffer <int>(8, 8);

            gradient.Realize(result);

            // Let's check it did what we expect:
            for (int yy = 0; yy < 8; yy++)
            {
                for (int xx = 0; xx < 8; xx++)
                {
                    if (result[xx, yy] != xx + yy)
                    {
                        Console.WriteLine("Something went wrong!\n");
                        return(-1);
                    }
                }
            }

            // Now let's evaluate gradient over a 5 x 7 rectangle that starts
            // somewhere else -- at position (100, 50). So x and y will run
            // from (100, 50) to (104, 56) inclusive.

            // We start by creating an image that represents that rectangle:
            var shifted = new HSBuffer <int>(5, 7); // In the constructor we tell it the size.

            shifted.SetMin(100, 50);                // Then we tell it the top-left corner.

            Console.WriteLine("Evaluating gradient from (100, 50) to (104, 56)");

            // Note that this won't need to compile any new code, because when
            // we realized it the first time, we generated code capable of
            // evaluating gradient over an arbitrary rectangle.
            gradient.Realize(shifted);

            // From C++, we also access the image object using coordinates
            // that start at (100, 50).
            for (int yy = 50; yy < 57; yy++)
            {
                for (int xx = 100; xx < 105; xx++)
                {
                    if (shifted[xx, yy] != xx + yy)
                    {
                        Console.WriteLine("Something went wrong!");
                        return(-1);
                    }
                }
            }
            // The image 'shifted' stores the value of our Func over a domain
            // that starts at (100, 50), so asking for shifted(0, 0) would in
            // fact read out-of-bounds and probably crash.

            // What if we want to evaluate our Func over some region that
            // isn't rectangular? Too bad. Halide only does rectangles :)

            Console.WriteLine("Success!");
            return(0);
        }