public static void FindLastAxis <T>(T value, DataAndLayout <int> trgt, DataAndLayout <T> src)
        {
            var nd = src.FastAccess.NumDiensions;
            var sp = ScalarPrimitivesRegistry.For <T, T>();

            int op(int[] srcIndex, int pos, T val)
            {
                if (pos != SpecialIdx.NotFound)
                {
                    return(pos);
                }
                else
                {
                    if (sp.Equal(val, value))
                    {
                        return(srcIndex[nd - 1]);
                    }
                    else
                    {
                        return(SpecialIdx.NotFound);
                    }
                }
            }

            var initial = new InitialOption <int>(true, SpecialIdx.NotFound);

            ApplyAxisFold(op, v => v, trgt, src, initial, true, true);
        }
        public static void AnyLastAxis(DataAndLayout <bool> trgt, DataAndLayout <bool> src)
        {
            bool foldOp(int[] pos, bool res, bool v) => res || v;

            var initial = new InitialOption <bool>(true, false);

            ApplyAxisFold(foldOp, v => v, trgt, src, initial, false, true);
        }
        public static void Truncate <T>(DataAndLayout <T> trgt, DataAndLayout <T> src)
        {
            var p = ScalarPrimitivesRegistry.For <T, T>();

            T op(int[] pos, T a) => p.Truncate(a);

            ApplyUnaryOp(op, trgt, src, isIndexed: false, useThreads: true);
        }
        public static void Pow <T>(DataAndLayout <T> trgt, DataAndLayout <T> lhs, DataAndLayout <T> rhs)
        {
            var p = ScalarPrimitivesRegistry.For <T, T>();

            T op(int[] pos, T l, T t) => p.Power(l, t);

            ApplyBinaryOp(op, trgt, lhs, rhs, isIndexed: false, useThreads: true);
        }
        public static void Minimum <T>(DataAndLayout <T> trgt, DataAndLayout <T> src1, DataAndLayout <T> src2)
        {
            var p = ScalarPrimitivesRegistry.For <T, T>();

            T op(int[] pos, T a, T b) => p.Minimum(a, b);

            ApplyBinaryOp(op, trgt, src1, src2, isIndexed: false, useThreads: true);
        }
        public static void GreaterOrEqual <TP>(DataAndLayout <bool> trgt, DataAndLayout <TP> src1, DataAndLayout <TP> src2)
        {
            var p = ScalarPrimitivesRegistry.For <TP, TP>();

            bool op(int[] pos, TP a, TP b) => p.GreaterOrEqual(a, b);

            ApplyBinaryOp(op, trgt, src1, src2, isIndexed: false, useThreads: true);
        }
        public static void CountTrueLastAxis(DataAndLayout <int> trgt, DataAndLayout <bool> src)
        {
            int foldOp(int[] pos, int res, bool v) => v ? res + 1 : res;

            var initial = new InitialOption <int>(true, 0);

            ApplyAxisFold(foldOp, v => v, trgt, src, initial, false, true);
        }
        public static void FillIncrementing <T>(T start, T step, DataAndLayout <T> trgt)
        {
            var p = ScalarPrimitivesRegistry.For <T, int>();

            T op(int[] pos) => p.Add(start, p.Multiply(step, p.Convert(pos[0])));

            ApplyNoaryOp(op, trgt, isIndexed: true, useThreads: true);
        }
        public static void Convert <T, TC>(DataAndLayout <T> trgt, DataAndLayout <TC> src)
        {
            var p = ScalarPrimitivesRegistry.For <T, TC>();

            T op(int[] pos, TC v) => p.Convert(v);

            ApplyUnaryOp(op, trgt, src, isIndexed: false, useThreads: true);
        }
        public static void IsFinite <TP>(DataAndLayout <bool> trgt, DataAndLayout <TP> src)
        {
            var p = ScalarPrimitivesRegistry.For <TP, TP>();

            bool op(int[] pos, TP a) => p.IsFinite(a);

            ApplyUnaryOp(op, trgt, src, isIndexed: false, useThreads: true);
        }
        public static void MinLastAxis <T>(DataAndLayout <T> trgt, DataAndLayout <T> src)
        {
            var p = ScalarPrimitivesRegistry.For <T, T>();

            T foldOp(int[] pos, T res, T v) => p.Less(res, v) ? res : v;

            var initial = new InitialOption <T>(true, Primitives.MaxValue <T>());

            ApplyAxisFold(foldOp, v => v, trgt, src, initial, false, true);
        }
        public static void ProductLastAxis <T>(DataAndLayout <T> trgt, DataAndLayout <T> src)
        {
            var p = ScalarPrimitivesRegistry.For <T, T>();

            T foldOp(int[] pos, T res, T v) => p.Multiply(res, v);

            var initial = new InitialOption <T>(true, Primitives.One <T>());

            ApplyAxisFold(foldOp, v => v, trgt, src, initial, false, true);
        }
        public static void ArgMinLastAxis <T>(DataAndLayout <int> trgt, DataAndLayout <T> src)
        {
            var nd = src.FastAccess.NumDiensions;
            var sp = ScalarPrimitivesRegistry.For <T, T>();

            (int pos, T val) op(int[] pos, (int minPos, T minVal) minInfo, T value) => sp.Less(value, minInfo.minVal) ? (pos[nd - 1], value) : minInfo;

            var initial = new InitialOption <(int, T)>(true, (SpecialIdx.NotFound, Primitives.MaxValue <T>()));

            ApplyAxisFold(op, v => v.Item1, trgt, src, initial, true, true);
        }
        public static void TrueIndices(DataAndLayout <int> trgt, DataAndLayout <bool> src)
        {
            var targetPosItr = new PosIter(trgt.FastAccess);
            var srcPosItr    = new PosIter(src.FastAccess);

            while (targetPosItr.Active)
            {
                if (src.Data[srcPosItr.Addr])
                {
                    for (var d = 0; d < src.FastAccess.NumDiensions; d++)
                    {
                        trgt.Data[targetPosItr.Addr] = srcPosItr.Pos[d];
                        targetPosItr.MoveNext();
                    }
                }

                srcPosItr.MoveNext();
            }
        }
        public static void Fill <T>(T value, DataAndLayout <T> trgt)
        {
            T op(int[] pos) => value;

            ApplyNoaryOp(op, trgt, isIndexed: false, useThreads: true);
        }
        public static void IfThenElse <T>(DataAndLayout <T> trgt, DataAndLayout <bool> condition, DataAndLayout <T> ifTrue, DataAndLayout <T> ifFalse)
        {
            T op(int[] pos, bool cond, T t, T f) => cond ? t : f;

            ApplyTernaryOp(op, trgt, condition, ifTrue, ifFalse, isIndexed: false, useThreads: true);
        }
        public static void Copy <T>(DataAndLayout <T> trgt, DataAndLayout <T> src)
        {
            T op(int[] pos, T value) => value;

            ApplyUnaryOp(op, trgt, src, isIndexed: false, useThreads: true);
        }
        public static void Xor(DataAndLayout <bool> trgt, DataAndLayout <bool> lhs, DataAndLayout <bool> rhs)
        {
            bool op(int[] pos, bool l, bool r) => l ^ r;

            ApplyBinaryOp(op, trgt, lhs, rhs, isIndexed: false, useThreads: true);
        }
        public static void Negate(DataAndLayout <bool> trgt, DataAndLayout <bool> src)
        {
            bool op(int[] pos, bool v) => !v;

            ApplyUnaryOp(op, trgt, src, isIndexed: false, useThreads: true);
        }
        public static void ApplyNoaryOp <T>(Func <int[], T> op, DataAndLayout <T> trgt, bool isIndexed, bool useThreads)
        {
            var nd    = trgt.FastAccess.NumDiensions;
            var shape = trgt.FastAccess.Shape;

            void loops(bool dim0Fixed, int dim0Pos)
            {
                var fromDim  = dim0Fixed ? 1 : 0;
                var startPos = new int[nd];

                if (dim0Fixed)
                {
                    startPos[0] = dim0Pos;
                }

                var targetPosItr = new PosIter(trgt.FastAccess, startPos, fromDim: fromDim, toDim: nd - 2);
                var pos          = new int[targetPosItr.Pos.Length];

                while (targetPosItr.Active)
                {
                    var targetAddr = targetPosItr.Addr;
                    if (nd == 0)
                    {
                        trgt.Data[targetPosItr.Addr] = op(null);
                    }
                    else if (isIndexed)
                    {
                        for (var d = 0; d < nd; d++)
                        {
                            pos[d] = targetPosItr.Pos[d];
                        }

                        for (var i = 0; i < shape[nd - 1]; i++)
                        {
                            trgt.Data[targetAddr] = op(pos);
                            targetAddr            = targetAddr + trgt.FastAccess.Stride[nd - 1];
                            pos[nd - 1]           = pos[nd - 1] + 1;
                        }
                    }
                    else
                    {
                        for (var i = 0; i < shape[nd - 1]; i++)
                        {
                            trgt.Data[targetAddr] = op(null);
                            targetAddr            = targetAddr + trgt.FastAccess.Stride[nd - 1];
                        }
                    }

                    targetPosItr.MoveNext();
                }
            }

            if (useThreads && nd > 1)
            {
                Parallel.For(0, shape[0], index => loops(true, index));
            }
            else
            {
                loops(false, 0);
            }
        }
 public InitialOption(bool useValue, TS value = default(TS), DataAndLayout <TS> dataAndLayout = null)
 {
     UseValue      = useValue;
     Value         = value;
     DataAndLayout = dataAndLayout;
 }
        public static void ApplyAxisFold <TS, T, T1>(
            Func <int[], TS, T1, TS> foldOp,
            Func <TS, T> extractOp,
            DataAndLayout <T> trgt,
            DataAndLayout <T1> src,
            InitialOption <TS> initial,
            bool isIndexed,
            bool useThreads)
        {
            var nd    = src.FastAccess.NumDiensions;
            var shape = src.FastAccess.Shape;

            void loops(bool dim0Fixed, int dim0Pos)
            {
                var fromDim  = dim0Fixed ? 1 : 0;
                var startPos = new int[nd];

                if (dim0Fixed)
                {
                    startPos[0] = dim0Pos;
                }

                var targetPosItr = new PosIter(trgt.FastAccess, startPos, fromDim: fromDim, toDim: nd - 2);

                PosIter initialPosItr;

                if (!initial.UseValue)
                {
                    var intialDataAndLayout = initial.DataAndLayout;
                    initialPosItr = new PosIter(intialDataAndLayout.FastAccess, startPos, fromDim: fromDim, toDim: nd - 2);
                }
                else
                {
                    // it won't be used. Only for compiler. I need a better solution.
                    initialPosItr = new PosIter(src.FastAccess, startPos, fromDim: fromDim, toDim: nd - 2);
                }

                var srcPosItr = new PosIter(src.FastAccess, startPos, fromDim: fromDim, toDim: nd - 2);
                var pos       = new int[targetPosItr.Pos.Length];

                while (targetPosItr.Active)
                {
                    var srcAddr = srcPosItr.Addr;
                    TS  state;
                    if (initial.UseValue)
                    {
                        state = initial.Value;
                    }
                    else
                    {
                        state = initial.DataAndLayout.Data[initialPosItr.Addr];
                    }

                    if (nd == 0)
                    {
                        trgt.Data[targetPosItr.Addr] = extractOp(foldOp(null, state, src.Data[srcAddr]));
                    }
                    else if (isIndexed)
                    {
                        for (var d = 0; d < nd - 1; d++)
                        {
                            pos[d] = targetPosItr.Pos[d];
                        }

                        pos[nd - 1] = 0;

                        for (var i = 0; i < shape[nd - 1]; i++)
                        {
                            state       = foldOp(pos, state, src.Data[srcAddr]);
                            srcAddr    += src.FastAccess.Stride[nd - 1];
                            pos[nd - 1] = pos[nd - 1] + 1;
                        }

                        trgt.Data[targetPosItr.Addr] = extractOp(state);
                    }
                    else
                    {
                        for (var i = 0; i < shape[nd - 1]; i++)
                        {
                            state    = foldOp(null, state, src.Data[srcAddr]);
                            srcAddr += src.FastAccess.Stride[nd - 1];
                        }

                        trgt.Data[targetPosItr.Addr] = extractOp(state);
                    }

                    targetPosItr.MoveNext();
                    if (!initial.UseValue)
                    {
                        initialPosItr.MoveNext();
                    }
                }
            }

            if (useThreads && nd > 1)
            {
                Parallel.For(0, shape[0], index => loops(true, index));
            }
            else
            {
                loops(false, 0);
            }
        }