public OpticalFlow(int width, int height, CudaContext ctx)
        {
            CUmodule mod = ctx.LoadModulePTX("opticalFlow.ptx");

            warpingKernel            = new WarpingKernel(ctx, mod);
            createFlowFieldFromTiles = new CreateFlowFieldFromTiles(ctx, mod);
            computeDerivativesKernel = new ComputeDerivativesKernel(ctx, mod);
            lukasKanade = new LukasKanadeKernel(ctx, mod);

            d_tmp  = new NPPImage_32fC1(width, height);
            d_Ix   = new NPPImage_32fC1(width, height);
            d_Iy   = new NPPImage_32fC1(width, height);
            d_Iz   = new NPPImage_32fC1(width, height);
            d_flow = new NPPImage_32fC2(width, height);

            buffer = new CudaDeviceVariable <byte>(d_tmp.MeanStdDevGetBufferHostSize() * 3);
            mean   = new CudaDeviceVariable <double>(1);
            std    = new CudaDeviceVariable <double>(1);


            d_filterX = new float[] { -0.25f, 0.25f, -0.25f, 0.25f };
            d_filterY = new float[] { -0.25f, -0.25f, 0.25f, 0.25f };
            d_filterT = new float[] { 0.25f, 0.25f, 0.25f, 0.25f };
        }