//used to initialize & change src data
    public void SetGPUData(ComputeBuffer cbSrc)
    {
        cbData        = cbSrc;
        NUM_ELEMENTS  = cbSrc.count;
        MATRIX_HEIGHT = NUM_ELEMENTS / MATRIX_WIDTH;
        if (NUM_ELEMENTS <= 512)
        {
            GPUSortDel = GPUSortSmallNum;
        }
        else if (NUM_ELEMENTS < 8192)//width小于16
        {
            GPUSortDel = GPUSortMiddleNum;
        }
        else
        {
            GPUSortDel = GPUSortLargeNum;
        }

        int stride = 8;//<int2>

        cbTransposed = new ComputeBuffer(NUM_ELEMENTS, stride);
    }
    //used to initialize & change src data
    public void SetData(int[] src)
    {
        srcData       = src;
        NUM_ELEMENTS  = srcData.Length;
        MATRIX_HEIGHT = NUM_ELEMENTS / MATRIX_WIDTH;
        if (NUM_ELEMENTS <= 512)
        {
            GPUSortDel = GPUSortSmallNum;
        }
        else if (NUM_ELEMENTS < 8192)//width小于16
        {
            GPUSortDel = GPUSortMiddleNum;
        }
        else
        {
            GPUSortDel = GPUSortLargeNum;
        }

        int stride = 4;//just<int>

        cbData       = new ComputeBuffer(NUM_ELEMENTS, stride);
        cbTransposed = new ComputeBuffer(NUM_ELEMENTS, stride);
        cbData.SetData(srcData);
    }