public static IEnumerable <KeyValuePair <TKey, DataFrameViewGroup> > TGroupBy <TKey>(
            IDataFrameView df, int[] order, TKey[] keys, int[] columns,
            Func <TKey, DataFrameGroupKey[]> func)
            where TKey : IEquatable <TKey>
        {
            TKey       last    = keys.Any() ? keys[order[0]] : default(TKey);
            List <int> subrows = new List <int>();

            foreach (var pos in order)
            {
                var cur = keys[pos];
                if (cur.Equals(last))
                {
                    subrows.Add(pos);
                }
                else if (subrows.Any())
                {
                    yield return(new KeyValuePair <TKey, DataFrameViewGroup>(last,
                                                                             new DataFrameViewGroup(func(last), df.Source ?? df, subrows.ToArray(), df.ColumnsSet)));

                    subrows.Clear();
                    subrows.Add(pos);
                }
                last = cur;
            }
            if (subrows.Any())
            {
                yield return(new KeyValuePair <TKey, DataFrameViewGroup>(last,
                                                                         new DataFrameViewGroup(func(last), df.Source ?? df, subrows.ToArray(), df.ColumnsSet)));
            }
        }
        static IDataFrameViewGroupResults RecGroupBy <T1, T2>(IDataFrameView df, int[] icols, bool sort)
            where T1 : IEquatable <T1>, IComparable <T1>
            where T2 : IEquatable <T2>, IComparable <T2>
        {
            var kind = df.Kinds[icols[2]];

            if (icols.Length == 3)
            {
                switch (kind)
                {
                case DataKind.BL: return(df.TGroupBy <T1, T2, DvBool>(icols, sort));

                case DataKind.I4: return(df.TGroupBy <T1, T2, DvInt4>(icols, sort));

                case DataKind.U4: return(df.TGroupBy <T1, T2, uint>(icols, sort));

                case DataKind.I8: return(df.TGroupBy <T1, T2, DvInt8>(icols, sort));

                case DataKind.R4: return(df.TGroupBy <T1, T2, float>(icols, sort));

                case DataKind.R8: return(df.TGroupBy <T1, T2, double>(icols, sort));

                case DataKind.TX: return(df.TGroupBy <T1, T2, DvText>(icols, sort));

                default:
                    throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'.");
                }
            }
            else
            {
                throw new NotImplementedException($"soGroupByrt is not implemented for {icols.Length} columns.");
            }
        }
Ejemplo n.º 3
0
        static void RecSort <T1, T2>(IDataFrameView df, int[] icols, bool ascending)
            where T1 : IEquatable <T1>, IComparable <T1>
            where T2 : IEquatable <T2>, IComparable <T2>
        {
            var kind = df.Kinds[icols[2]];

            if (icols.Length == 3)
            {
                switch (kind)
                {
                case DataKind.BL: df.TSort <T1, T2, DvBool>(icols, ascending); break;

                case DataKind.I4: df.TSort <T1, T2, DvInt4>(icols, ascending); break;

                case DataKind.U4: df.TSort <T1, T2, uint>(icols, ascending); break;

                case DataKind.I8: df.TSort <T1, T2, DvInt8>(icols, ascending); break;

                case DataKind.R4: df.TSort <T1, T2, float>(icols, ascending); break;

                case DataKind.R8: df.TSort <T1, T2, double>(icols, ascending); break;

                case DataKind.TX: df.TSort <T1, T2, DvText>(icols, ascending); break;

                default:
                    throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                }
            }
            else
            {
                throw new NotImplementedException($"Sort is not implemented for {icols.Length} columns.");
            }
        }
Ejemplo n.º 4
0
 public DataFrame TJoin <T1, T2, T3>(IDataFrameView right, IEnumerable <int> colsLeft, IEnumerable <int> colsRight, string leftSuffix = null, string rightSuffix = null, JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
     where T1 : IEquatable <T1>, IComparable <T1>
     where T2 : IEquatable <T2>, IComparable <T2>
     where T3 : IEquatable <T3>, IComparable <T3>
 {
     return(new DataFrameView(this, null, null).TJoin <T1, T2, T3>(right, colsLeft, colsRight, leftSuffix, rightSuffix, joinType, sort));
 }
        static DataFrame RecJoin <T1, T2>(IDataFrameView left, IDataFrameView right, int[] icolsLeft, int[] icolsRight,
                                          string leftSuffix     = null, string rightSuffix      = null,
                                          JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
            where T1 : IEquatable <T1>, IComparable <T1>
            where T2 : IEquatable <T2>, IComparable <T2>
        {
            var kind = left.Kinds[icolsLeft[2]];

            if (icolsLeft.Length == 3)
            {
                switch (kind)
                {
                case DataKind.BL: return(left.TJoin <T1, T2, DvBool>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.I4: return(left.TJoin <T1, T2, DvInt4>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.U4: return(left.TJoin <T1, T2, uint>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.I8: return(left.TJoin <T1, T2, DvInt8>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.R4: return(left.TJoin <T1, T2, float>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.R8: return(left.TJoin <T1, T2, double>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.TX: return(left.TJoin <T1, T2, DvText>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                default:
                    throw new NotImplementedException($"Join is not implemented for type '{kind}'.");
                }
            }
            else
            {
                throw new NotImplementedException($"Join is not implemented for {icolsLeft.Length} columns.");
            }
        }
Ejemplo n.º 6
0
        public DataFrame TJoin <T1, T2, T3>(IDataFrameView right, IEnumerable <int> colsLeft, IEnumerable <int> colsRight, string leftSuffix = null, string rightSuffix = null, JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
            where T1 : IEquatable <T1>, IComparable <T1>
            where T2 : IEquatable <T2>, IComparable <T2>
            where T3 : IEquatable <T3>, IComparable <T3>
        {
            int[] orderLeft    = _rows.Select(c => c).ToArray();
            int[] orderRight   = (right as DataFrame) is null ? (right as DataFrameView)._rows.Select(c => c).ToArray() : null;
            int[] columnsRight = (right as DataFrame) is null ? (right as DataFrameView)._columns : null;
            var   icolsLeft    = colsLeft.ToArray();
            var   icolsRight   = colsRight.ToArray();
            var   scolsLeft    = icolsLeft.Select(c => Schema.GetColumnName(c)).ToArray();
            var   scolsRight   = icolsRight.Select(c => right.SchemaI.GetColumnName(c)).ToArray();

            return(DataFrameJoining.TJoin(this, right,
                                          orderLeft, orderRight,
                                          _columns, columnsRight,
                                          icolsLeft, icolsRight, sort,
                                          leftSuffix, rightSuffix,
                                          joinType,
                                          GetMultiGetterAt <T1, T2, T3>(icolsLeft),
                                          right.GetMultiGetterAt <T1, T2, T3>(icolsRight),
                                          ke => ke.ToImTuple(),
                                          ke => DataFrameGroupKey.Create(scolsLeft, ke),
                                          ke => DataFrameGroupKey.Create(scolsRight, ke)));
        }
Ejemplo n.º 7
0
        public static ImmutableTuple <T1>[] TSort <T1>(IDataFrameView df, ref int[] order, IEnumerable <int> columns, bool ascending)
            where T1 : IEquatable <T1>, IComparable <T1>
        {
            var keys = df.EnumerateItems <T1>(columns, ascending).Select(c => c.ToImTuple()).ToArray();

            TSort(df, ref order, keys, ascending);
            return(keys);
        }
Ejemplo n.º 8
0
        static void RecSort <T1, T2>(IDataFrameView df, int[] icols, bool ascending)
            where T1 : IEquatable <T1>, IComparable <T1>
            where T2 : IEquatable <T2>, IComparable <T2>
        {
            var kind = df.Kinds[icols[2]];

            if (icols.Length == 3)
            {
                if (kind.IsVector())
                {
                    switch (kind.ItemType().RawKind())
                    {
                    case DataKind.BL: df.TSort <T1, T2, VBufferEqSort <bool> >(icols, ascending); break;

                    case DataKind.I4: df.TSort <T1, T2, VBufferEqSort <int> >(icols, ascending); break;

                    case DataKind.U4: df.TSort <T1, T2, VBufferEqSort <uint> >(icols, ascending); break;

                    case DataKind.I8: df.TSort <T1, T2, VBufferEqSort <long> >(icols, ascending); break;

                    case DataKind.R4: df.TSort <T1, T2, VBufferEqSort <float> >(icols, ascending); break;

                    case DataKind.R8: df.TSort <T1, T2, VBufferEqSort <double> >(icols, ascending); break;

                    case DataKind.TX: df.TSort <T1, T2, VBufferEqSort <DvText> >(icols, ascending); break;

                    default:
                        throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                    }
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.BL: df.TSort <T1, T2, bool>(icols, ascending); break;

                    case DataKind.I4: df.TSort <T1, T2, int>(icols, ascending); break;

                    case DataKind.U4: df.TSort <T1, T2, uint>(icols, ascending); break;

                    case DataKind.I8: df.TSort <T1, T2, long>(icols, ascending); break;

                    case DataKind.R4: df.TSort <T1, T2, float>(icols, ascending); break;

                    case DataKind.R8: df.TSort <T1, T2, double>(icols, ascending); break;

                    case DataKind.TX: df.TSort <T1, T2, DvText>(icols, ascending); break;

                    default:
                        throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                    }
                }
            }
            else
            {
                throw new NotImplementedException($"Sort is not implemented for {icols.Length} columns.");
            }
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Initializes a view on a dataframe.
        /// </summary>
        public DataFrameView(IDataFrameView src, IEnumerable <int> rows, IEnumerable <int> columns)
        {
            _src  = src;
            _rows = rows == null?Enumerable.Range(0, src.Length).ToArray() : rows.ToArray();

            _columns = columns == null?Enumerable.Range(0, src.SchemaI.ColumnCount).ToArray() : columns.ToArray();

            _schema = new DataFrameViewSchema(src.Schema, _columns);
        }
        static DataFrame RecJoin(IDataFrameView left, IDataFrameView right, int[] icolsLeft, int[] icolsRight,
                                 string leftSuffix     = null, string rightSuffix      = null,
                                 JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
        {
            var kind = left.Kinds[icolsLeft[0]];

            if (icolsLeft.Length == 1)
            {
                switch (kind)
                {
                case DataKind.BL: return(left.TJoin <DvBool>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.I4: return(left.TJoin <DvInt4>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.U4: return(left.TJoin <uint>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.I8: return(left.TJoin <DvInt8>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.R4: return(left.TJoin <float>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.R8: return(left.TJoin <double>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.TX: return(left.TJoin <DvText>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                default:
                    throw new NotImplementedException($"Join is not implemented for type '{kind}'.");
                }
            }
            else
            {
                switch (kind)
                {
                case DataKind.BL: return(RecJoin <DvBool>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.I4: return(RecJoin <DvInt4>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.U4: return(RecJoin <uint>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.I8: return(RecJoin <DvInt8>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.R4: return(RecJoin <float>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.R8: return(RecJoin <double>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.TX: return(RecJoin <DvText>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                default:
                    throw new NotImplementedException($"Join is not implemented for type '{kind}'.");
                }
            }
        }
Ejemplo n.º 11
0
        static void RecSort(IDataFrameView df, int[] icols, bool ascending)
        {
            var kind = df.Kinds[icols[0]];

            if (icols.Length == 1)
            {
                switch (kind)
                {
                case DataKind.BL: df.TSort <DvBool>(icols, ascending); break;

                case DataKind.I4: df.TSort <DvInt4>(icols, ascending); break;

                case DataKind.U4: df.TSort <uint>(icols, ascending); break;

                case DataKind.I8: df.TSort <DvInt8>(icols, ascending); break;

                case DataKind.R4: df.TSort <float>(icols, ascending); break;

                case DataKind.R8: df.TSort <double>(icols, ascending); break;

                case DataKind.TX: df.TSort <DvText>(icols, ascending); break;

                default:
                    throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                }
            }
            else
            {
                switch (kind)
                {
                case DataKind.BL: RecSort <DvBool>(df, icols, ascending); break;

                case DataKind.I4: RecSort <DvInt4>(df, icols, ascending); break;

                case DataKind.U4: RecSort <uint>(df, icols, ascending); break;

                case DataKind.I8: RecSort <DvInt8>(df, icols, ascending); break;

                case DataKind.R4: RecSort <float>(df, icols, ascending); break;

                case DataKind.R8: RecSort <double>(df, icols, ascending); break;

                case DataKind.TX: RecSort <DvText>(df, icols, ascending); break;

                default:
                    throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                }
            }
        }
        static IDataFrameViewGroupResults RecGroupBy(IDataFrameView df, int[] icols, bool sort)
        {
            var kind = df.Kinds[icols[0]];

            if (icols.Length == 1)
            {
                switch (kind)
                {
                case DataKind.BL: return(df.TGroupBy <DvBool>(icols, sort));

                case DataKind.I4: return(df.TGroupBy <DvInt4>(icols, sort));

                case DataKind.U4: return(df.TGroupBy <uint>(icols, sort));

                case DataKind.I8: return(df.TGroupBy <DvInt8>(icols, sort));

                case DataKind.R4: return(df.TGroupBy <float>(icols, sort));

                case DataKind.R8: return(df.TGroupBy <double>(icols, sort));

                case DataKind.TX: return(df.TGroupBy <DvText>(icols, sort));

                default:
                    throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'.");
                }
            }
            else
            {
                switch (kind)
                {
                case DataKind.BL: return(RecGroupBy <DvBool>(df, icols, sort));

                case DataKind.I4: return(RecGroupBy <DvInt4>(df, icols, sort));

                case DataKind.U4: return(RecGroupBy <uint>(df, icols, sort));

                case DataKind.I8: return(RecGroupBy <DvInt8>(df, icols, sort));

                case DataKind.R4: return(RecGroupBy <float>(df, icols, sort));

                case DataKind.R8: return(RecGroupBy <double>(df, icols, sort));

                case DataKind.TX: return(RecGroupBy <DvText>(df, icols, sort));

                default:
                    throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'.");
                }
            }
        }
Ejemplo n.º 13
0
        /// <summary>
        /// Raises an exception if two dataframes do not have the same
        /// shape or are two much different.
        /// </summary>
        /// <param name="df">dataframe</param>
        /// <param name="precision">precision</param>
        /// <param name="exc">raises an exception if too different</param>
        /// <returns>max difference</returns>
        public double AssertAlmostEqual(IDataFrameView df, double precision = 1e-5, bool exc = true)
        {
            if (Shape != df.Shape)
            {
                throw new DataValueError(string.Format("Shapes are different ({0}, {1}) != ({2}, {3})",
                                                       Shape.Item1, Shape.Item2, df.Shape.Item1, df.Shape.Item2));
            }
            double max = 0;

            for (int i = 0; i < df.Shape.Item2; ++i)
            {
                var c1 = GetColumn(i);
                var c2 = GetColumn(i);
                var d  = c1.AssertAlmostEqual(c2, precision, exc);
                max = Math.Max(max, d);
            }
            return(max);
        }
Ejemplo n.º 14
0
 public static void TSort <T>(IDataFrameView df, ref int[] order, T[] keys, bool ascending)
     where T : IComparable <T>
 {
     if (order == null)
     {
         order = new int[df.Length];
         for (int i = 0; i < order.Length; ++i)
         {
             order[i] = i;
         }
     }
     if (ascending)
     {
         Array.Sort(order, (x, y) => keys[x].CompareTo(keys[y]));
     }
     else
     {
         Array.Sort(order, (x, y) => - keys[x].CompareTo(keys[y]));
     }
 }
Ejemplo n.º 15
0
 public static DataFrame Join(IDataFrameView left, IDataFrameView right,
                              IEnumerable <int> colsLeft, IEnumerable <int> colsRight,
                              string leftSuffix     = null, string rightSuffix      = null,
                              JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
 {
     int[] icolsLeft  = colsLeft.ToArray();
     int[] icolsRight = colsRight.ToArray();
     if (icolsRight.Length != icolsLeft.Length)
     {
         throw new DataValueError("Left and right must be joined with the same number of columns.");
     }
     for (int i = 0; i < icolsLeft.Length; ++i)
     {
         if (left.SchemaI.GetColumnType(icolsLeft[i]) != right.SchemaI.GetColumnType(icolsRight[i]))
         {
             throw new DataTypeError("Left and right must be joined with the same number of columns and the same types.");
         }
     }
     return(RecJoin(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));
 }
Ejemplo n.º 16
0
        static IDataFrameViewGroupResults RecGroupBy <T1, T2>(IDataFrameView df, int[] icols, bool sort)
            where T1 : IEquatable <T1>, IComparable <T1>
            where T2 : IEquatable <T2>, IComparable <T2>
        {
            var kind = df.Kinds[icols[2]];

            if (icols.Length == 3)
            {
                if (kind.IsVector())
                {
                    throw new NotImplementedException();
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.Boolean: return(df.TGroupBy <T1, T2, bool>(icols, sort));

                    case DataKind.Int32: return(df.TGroupBy <T1, T2, int>(icols, sort));

                    case DataKind.UInt32: return(df.TGroupBy <T1, T2, uint>(icols, sort));

                    case DataKind.Int64: return(df.TGroupBy <T1, T2, long>(icols, sort));

                    case DataKind.Single: return(df.TGroupBy <T1, T2, float>(icols, sort));

                    case DataKind.Double: return(df.TGroupBy <T1, T2, double>(icols, sort));

                    case DataKind.String: return(df.TGroupBy <T1, T2, DvText>(icols, sort));

                    default:
                        throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'.");
                    }
                }
            }
            else
            {
                throw new NotImplementedException($"soGroupByrt is not implemented for {icols.Length} columns.");
            }
        }
        public static DataFrameViewGroupResults <TImutKey> TGroupBy <TMutKey, TImutKey>(
            IDataFrameView df, int[] rows, int[] columns, IEnumerable <int> cols, bool sort,
            MultiGetterAt <TMutKey> getter,
            Func <TMutKey, TImutKey> conv,
            Func <TImutKey, DataFrameGroupKey[]> conv2)
            where TMutKey : ITUple, new()
            where TImutKey : IComparable <TImutKey>, IEquatable <TImutKey>
        {
            var icols = cols.ToArray();

            int[] order = rows == null?rows.Select(c => c).ToArray() : Enumerable.Range(0, df.Length).ToArray();

            var keys = df.EnumerateItems(icols, true, rows, getter).Select(c => conv(c)).ToArray();

            if (sort)
            {
                DataFrameSorting.TSort(df, ref order, keys, true);
            }
            var iter = TGroupBy(df, order, keys, columns, conv2);

            return(new DataFrameViewGroupResults <TImutKey>(iter));
        }
Ejemplo n.º 18
0
        public static DataFrame TJoin <TMutKey, TImutKey>(
            IDataFrameView left, IDataFrameView right,
            int[] rowsLeft, int[] rowsRight,
            int[] columnsLeft, int[] columnsRight,
            IEnumerable <int> colsLeft, IEnumerable <int> colsRight,
            bool sort,
            string leftSuffix, string rightSuffix,
            JoinStrategy joinType,
            MultiGetterAt <TMutKey> getterLeft,
            MultiGetterAt <TMutKey> getterRight,
            Func <TMutKey, TImutKey> conv,
            Func <TImutKey, DataFrameGroupKey[]> convLeft,
            Func <TImutKey, DataFrameGroupKey[]> convRight)
            where TMutKey : ITUple, new()
            where TImutKey : IComparable <TImutKey>, IEquatable <TImutKey>
        {
            var icolsLeft  = colsLeft.ToArray();
            var icolsRight = colsRight.ToArray();

            int[] orderLeft = rowsLeft == null?rowsLeft.Select(c => c).ToArray() : Enumerable.Range(0, left.Length).ToArray();

            int[] orderRight = rowsLeft == null?rowsRight.Select(c => c).ToArray() : Enumerable.Range(0, right.Length).ToArray();

            var keysLeft  = left.EnumerateItems(icolsLeft, true, rowsLeft, getterLeft).Select(c => conv(c)).ToArray();
            var keysRight = right.EnumerateItems(icolsRight, true, rowsRight, getterRight).Select(c => conv(c)).ToArray();

            if (sort)
            {
                DataFrameSorting.TSort(left, ref orderLeft, keysLeft, true);
                DataFrameSorting.TSort(right, ref orderRight, keysRight, true);
            }
            var iter = TJoin <TImutKey>(left, right, orderLeft, orderRight, keysLeft, keysRight,
                                        icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, convLeft, convRight);

            return(DataFrame.Concat(iter));
        }
Ejemplo n.º 19
0
        public static IEnumerable <DataFrame> TJoin <TKey>(
            IDataFrameView left, IDataFrameView right,
            int[] orderLeft, int[] orderRight,
            TKey[] keysLeft, TKey[] keysRight,
            int[] icolsLeft, int[] icolsRight,
            string leftSuffix, string rightSuffix,
            JoinStrategy joinType,
            Func <TKey, DataFrameGroupKey[]> funcLeft,
            Func <TKey, DataFrameGroupKey[]> funcRight)
            where TKey : IEquatable <TKey>, IComparable <TKey>
        {
            var  groupLeft  = DataFrameGrouping.TGroupBy <TKey>(left, orderLeft, keysLeft, icolsLeft, funcLeft);
            var  groupRight = DataFrameGrouping.TGroupBy <TKey>(right, orderRight, keysRight, icolsRight, funcRight);
            var  iterLeft   = groupLeft.GetEnumerator();
            var  iterRight  = groupRight.GetEnumerator();
            bool contLeft   = iterLeft.MoveNext();
            bool contRight  = iterRight.MoveNext();

            leftSuffix  = string.IsNullOrEmpty(leftSuffix) ? string.Empty : leftSuffix;
            rightSuffix = string.IsNullOrEmpty(rightSuffix) ? string.Empty : rightSuffix;
            var newColsLeft  = left.Columns.Select(c => c + leftSuffix).ToArray();
            var newColsRight = right.Columns.Select(c => c + rightSuffix).ToArray();
            var existsCols   = new HashSet <string>(newColsLeft);

            for (int i = 0; i < newColsRight.Length; ++i)
            {
                while (existsCols.Contains(newColsRight[i]))
                {
                    newColsRight[i] += "_y";
                }
                existsCols.Add(newColsRight[i]);
            }
            var newCols = newColsLeft.Concat(newColsRight).ToArray();

            int r;

            while (contLeft || contRight)
            {
                r = contLeft && contRight
                    ? iterLeft.Current.Key.CompareTo(iterRight.Current.Key)
                    : (contRight ? 1 : -1);

                if (r < 0)
                {
                    if (joinType == JoinStrategy.Left || joinType == JoinStrategy.Outer)
                    {
                        var df = iterLeft.Current.Value.Copy();
                        if (!string.IsNullOrEmpty(leftSuffix))
                        {
                            df.RenameColumns(newColsLeft);
                        }
                        for (int i = 0; i < right.ColumnCount; ++i)
                        {
                            var kind = right.SchemaI.GetColumnType(i);
                            var col  = df.AddColumn(newColsRight[i], kind, df.Length);
                            df.GetColumn(col).Set(DataFrameMissingValue.GetMissingOrDefaultMissingValue(kind));
                        }
                        yield return(df);
                    }
                    contLeft = iterLeft.MoveNext();
                }
                else if (r > 0)
                {
                    if (joinType == JoinStrategy.Right || joinType == JoinStrategy.Outer)
                    {
                        var df = iterRight.Current.Value.Copy();
                        df.RenameColumns(newColsRight);
                        for (int i = 0; i < left.ColumnCount; ++i)
                        {
                            var kind = left.SchemaI.GetColumnType(i);
                            var col  = df.AddColumn(newColsLeft[i], kind, df.Length);
                            df.GetColumn(col).Set(DataFrameMissingValue.GetMissingOrDefaultMissingValue(kind));
                        }
                        df.OrderColumns(newCols);
                        yield return(df);
                    }
                    contRight = iterRight.MoveNext();
                }
                else
                {
                    var dfLeft  = iterLeft.Current.Value.Copy();
                    var dfRight = iterRight.Current.Value.Copy();
                    if (!string.IsNullOrEmpty(leftSuffix))
                    {
                        dfLeft.RenameColumns(newColsLeft);
                    }
                    dfRight.RenameColumns(newColsRight);
                    var vleft  = dfLeft.Multiply(dfRight.Length, MultiplyStrategy.Block).Copy();
                    var vright = dfRight.Multiply(dfLeft.Length, MultiplyStrategy.Row).Copy();
                    for (int i = 0; i < vright.ColumnCount; ++i)
                    {
                        vleft.AddColumn(newColsRight[i], vright.GetColumn(i));
                    }
                    yield return(vleft);

                    contLeft  = iterLeft.MoveNext();
                    contRight = iterRight.MoveNext();
                }
            }
        }
Ejemplo n.º 20
0
 /// <summary>
 /// Exact comparison between two dataframes.
 /// </summary>
 public bool Equals(IDataFrameView dfv)
 {
     return(Equals(dfv.Copy()));
 }
Ejemplo n.º 21
0
        static void RecSort(IDataFrameView df, int[] icols, bool ascending)
        {
            var kind = df.Kinds[icols[0]];

            if (icols.Length == 1)
            {
                if (kind.IsVector())
                {
                    switch (kind.ItemType().RawKind())
                    {
                    case DataKind.BL: df.TSort <VBufferEqSort <bool> >(icols, ascending); break;

                    case DataKind.I4: df.TSort <VBufferEqSort <int> >(icols, ascending); break;

                    case DataKind.U4: df.TSort <VBufferEqSort <uint> >(icols, ascending); break;

                    case DataKind.I8: df.TSort <VBufferEqSort <long> >(icols, ascending); break;

                    case DataKind.R4: df.TSort <VBufferEqSort <float> >(icols, ascending); break;

                    case DataKind.R8: df.TSort <VBufferEqSort <double> >(icols, ascending); break;

                    case DataKind.TX: df.TSort <VBufferEqSort <DvText> >(icols, ascending); break;

                    default:
                        throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                    }
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.BL: df.TSort <bool>(icols, ascending); break;

                    case DataKind.I4: df.TSort <int>(icols, ascending); break;

                    case DataKind.U4: df.TSort <uint>(icols, ascending); break;

                    case DataKind.I8: df.TSort <long>(icols, ascending); break;

                    case DataKind.R4: df.TSort <float>(icols, ascending); break;

                    case DataKind.R8: df.TSort <double>(icols, ascending); break;

                    case DataKind.TX: df.TSort <DvText>(icols, ascending); break;

                    default:
                        throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                    }
                }
            }
            else
            {
                if (kind.IsVector())
                {
                    switch (kind.ItemType().RawKind())
                    {
                    case DataKind.BL: RecSort <VBufferEqSort <bool> >(df, icols, ascending); break;

                    case DataKind.I4: RecSort <VBufferEqSort <int> >(df, icols, ascending); break;

                    case DataKind.U4: RecSort <VBufferEqSort <uint> >(df, icols, ascending); break;

                    case DataKind.I8: RecSort <VBufferEqSort <long> >(df, icols, ascending); break;

                    case DataKind.R4: RecSort <VBufferEqSort <float> >(df, icols, ascending); break;

                    case DataKind.R8: RecSort <VBufferEqSort <double> >(df, icols, ascending); break;

                    case DataKind.TX: RecSort <VBufferEqSort <DvText> >(df, icols, ascending); break;

                    default:
                        throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                    }
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.BL: RecSort <bool>(df, icols, ascending); break;

                    case DataKind.I4: RecSort <int>(df, icols, ascending); break;

                    case DataKind.U4: RecSort <uint>(df, icols, ascending); break;

                    case DataKind.I8: RecSort <long>(df, icols, ascending); break;

                    case DataKind.R4: RecSort <float>(df, icols, ascending); break;

                    case DataKind.R8: RecSort <double>(df, icols, ascending); break;

                    case DataKind.TX: RecSort <DvText>(df, icols, ascending); break;

                    default:
                        throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                    }
                }
            }
        }
Ejemplo n.º 22
0
        static IDataFrameViewGroupResults RecGroupBy <T1>(IDataFrameView df, int[] icols, bool sort)
            where T1 : IEquatable <T1>, IComparable <T1>
        {
            var kind = df.Kinds[icols[1]];

            if (icols.Length == 2)
            {
                if (kind.IsVector())
                {
                    throw new NotImplementedException();
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.BL: return(df.TGroupBy <T1, bool>(icols, sort));

                    case DataKind.I4: return(df.TGroupBy <T1, int>(icols, sort));

                    case DataKind.U4: return(df.TGroupBy <T1, uint>(icols, sort));

                    case DataKind.I8: return(df.TGroupBy <T1, long>(icols, sort));

                    case DataKind.R4: return(df.TGroupBy <T1, float>(icols, sort));

                    case DataKind.R8: return(df.TGroupBy <T1, double>(icols, sort));

                    case DataKind.TX: return(df.TGroupBy <T1, DvText>(icols, sort));

                    default:
                        throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'.");
                    }
                }
            }
            else
            {
                if (kind.IsVector())
                {
                    throw new NotImplementedException();
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.BL: return(RecGroupBy <T1, bool>(df, icols, sort));

                    case DataKind.I4: return(RecGroupBy <T1, int>(df, icols, sort));

                    case DataKind.U4: return(RecGroupBy <T1, uint>(df, icols, sort));

                    case DataKind.I8: return(RecGroupBy <T1, long>(df, icols, sort));

                    case DataKind.R4: return(RecGroupBy <T1, float>(df, icols, sort));

                    case DataKind.R8: return(RecGroupBy <T1, double>(df, icols, sort));

                    case DataKind.TX: return(RecGroupBy <T1, DvText>(df, icols, sort));

                    default:
                        throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'.");
                    }
                }
            }
        }
Ejemplo n.º 23
0
 public DataFrame Join(IDataFrameView right, IEnumerable <string> colsLeft, IEnumerable <int> colsRight,
                       string leftSuffix     = null, string rightSuffix      = null,
                       JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
 {
     return(Join(right, colsLeft.Select(c => GetColumnIndex(c)), colsRight, leftSuffix, rightSuffix, joinType, sort));
 }
Ejemplo n.º 24
0
 public DataFrame Join(IDataFrameView right, IEnumerable <int> colsLeft, IEnumerable <int> colsRight,
                       string leftSuffix     = null, string rightSuffix      = null,
                       JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
 {
     return(new DataFrameView(this, null, null).Join(right, colsLeft, colsRight, leftSuffix, rightSuffix, joinType, sort));
 }
Ejemplo n.º 25
0
        static IDataFrameViewGroupResults RecGroupBy(IDataFrameView df, int[] icols, bool sort)
        {
            var kind = df.Kinds[icols[0]];

            if (icols.Length == 1)
            {
                if (kind.IsVector())
                {
                    throw new NotImplementedException();
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.Boolean: return(df.TGroupBy <bool>(icols, sort));

                    case DataKind.Int32: return(df.TGroupBy <int>(icols, sort));

                    case DataKind.UInt32: return(df.TGroupBy <uint>(icols, sort));

                    case DataKind.Int64: return(df.TGroupBy <long>(icols, sort));

                    case DataKind.Single: return(df.TGroupBy <float>(icols, sort));

                    case DataKind.Double: return(df.TGroupBy <double>(icols, sort));

                    case DataKind.String: return(df.TGroupBy <DvText>(icols, sort));

                    default:
                        throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'.");
                    }
                }
            }
            else
            {
                if (kind.IsVector())
                {
                    throw new NotImplementedException();
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.Boolean: return(RecGroupBy <bool>(df, icols, sort));

                    case DataKind.Int32: return(RecGroupBy <int>(df, icols, sort));

                    case DataKind.UInt32: return(RecGroupBy <uint>(df, icols, sort));

                    case DataKind.Int64: return(RecGroupBy <long>(df, icols, sort));

                    case DataKind.Single: return(RecGroupBy <float>(df, icols, sort));

                    case DataKind.Double: return(RecGroupBy <double>(df, icols, sort));

                    case DataKind.String: return(RecGroupBy <DvText>(df, icols, sort));

                    default:
                        throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'.");
                    }
                }
            }
        }
Ejemplo n.º 26
0
        static DataFrame RecJoin <T1>(IDataFrameView left, IDataFrameView right, int[] icolsLeft, int[] icolsRight,
                                      string leftSuffix     = null, string rightSuffix      = null,
                                      JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
            where T1 : IEquatable <T1>, IComparable <T1>
        {
            var kind = left.Kinds[icolsLeft[1]];

            if (icolsLeft.Length == 2)
            {
                if (kind.IsVector())
                {
                    throw new NotImplementedException();
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.Boolean: return(left.TJoin <T1, bool>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Int32: return(left.TJoin <T1, int>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.UInt32: return(left.TJoin <T1, uint>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Int64: return(left.TJoin <T1, long>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Single: return(left.TJoin <T1, float>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Double: return(left.TJoin <T1, double>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.String: return(left.TJoin <T1, DvText>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    default:
                        throw new NotImplementedException($"Join is not implemented for type '{kind}'.");
                    }
                }
            }
            else
            {
                if (kind.IsVector())
                {
                    throw new NotImplementedException();
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.Boolean: return(RecJoin <T1, bool>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Int32: return(RecJoin <T1, int>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.UInt32: return(RecJoin <T1, uint>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Int64: return(RecJoin <T1, long>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Single: return(RecJoin <T1, float>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Double: return(RecJoin <T1, double>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.String: return(RecJoin <T1, DvText>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    default:
                        throw new NotImplementedException($"Join is not implemented for type '{kind}'.");
                    }
                }
            }
        }
 public DataFrameViewGroup(DataFrameGroupKey[] keys, IDataFrameView src, IEnumerable <int> rows, IEnumerable <int> columns) :
     base(src, rows, columns)
 {
     _keys = keys;
 }
Ejemplo n.º 28
0
 public static void Sort(IDataFrameView df, IEnumerable <int> columns, bool ascending = true)
 {
     int[] icols = columns.ToArray();
     RecSort(df, icols, ascending);
 }
 public static IDataFrameViewGroupResults GroupBy(IDataFrameView df, IEnumerable <int> columns, bool ascending = true)
 {
     int[] icols = columns.ToArray();
     return(RecGroupBy(df, icols, ascending));
 }
Ejemplo n.º 30
0
        static void RecSort <T1>(IDataFrameView df, int[] icols, bool ascending)
            where T1 : IEquatable <T1>, IComparable <T1>
        {
            var kind = df.Kinds[icols[1]];

            if (icols.Length == 2)
            {
                if (kind.IsVector())
                {
                    switch (kind.ItemType().RawKind())
                    {
                    case DataKind.Boolean: df.TSort <T1, VBufferEqSort <bool> >(icols, ascending); break;

                    case DataKind.Int32: df.TSort <T1, VBufferEqSort <int> >(icols, ascending); break;

                    case DataKind.UInt32: df.TSort <T1, VBufferEqSort <uint> >(icols, ascending); break;

                    case DataKind.Int64: df.TSort <T1, VBufferEqSort <long> >(icols, ascending); break;

                    case DataKind.Single: df.TSort <T1, VBufferEqSort <float> >(icols, ascending); break;

                    case DataKind.Double: df.TSort <T1, VBufferEqSort <double> >(icols, ascending); break;

                    case DataKind.String: df.TSort <T1, VBufferEqSort <DvText> >(icols, ascending); break;

                    default:
                        throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                    }
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.Boolean: df.TSort <T1, bool>(icols, ascending); break;

                    case DataKind.Int32: df.TSort <T1, int>(icols, ascending); break;

                    case DataKind.UInt32: df.TSort <T1, uint>(icols, ascending); break;

                    case DataKind.Int64: df.TSort <T1, long>(icols, ascending); break;

                    case DataKind.Single: df.TSort <T1, float>(icols, ascending); break;

                    case DataKind.Double: df.TSort <T1, double>(icols, ascending); break;

                    case DataKind.String: df.TSort <T1, DvText>(icols, ascending); break;

                    default:
                        throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                    }
                }
            }
            else
            {
                if (kind.IsVector())
                {
                    switch (kind.ItemType().RawKind())
                    {
                    case DataKind.Boolean: RecSort <T1, VBufferEqSort <bool> >(df, icols, ascending); break;

                    case DataKind.Int32: RecSort <T1, VBufferEqSort <int> >(df, icols, ascending); break;

                    case DataKind.UInt32: RecSort <T1, VBufferEqSort <uint> >(df, icols, ascending); break;

                    case DataKind.Int64: RecSort <T1, VBufferEqSort <long> >(df, icols, ascending); break;

                    case DataKind.Single: RecSort <T1, VBufferEqSort <float> >(df, icols, ascending); break;

                    case DataKind.Double: RecSort <T1, VBufferEqSort <double> >(df, icols, ascending); break;

                    case DataKind.String: RecSort <T1, VBufferEqSort <DvText> >(df, icols, ascending); break;

                    default:
                        throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                    }
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.Boolean: RecSort <T1, bool>(df, icols, ascending); break;

                    case DataKind.Int32: RecSort <T1, int>(df, icols, ascending); break;

                    case DataKind.UInt32: RecSort <T1, uint>(df, icols, ascending); break;

                    case DataKind.Int64: RecSort <T1, long>(df, icols, ascending); break;

                    case DataKind.Single: RecSort <T1, float>(df, icols, ascending); break;

                    case DataKind.Double: RecSort <T1, double>(df, icols, ascending); break;

                    case DataKind.String: RecSort <T1, DvText>(df, icols, ascending); break;

                    default:
                        throw new NotImplementedException($"Sort is not implemented for type '{kind}'.");
                    }
                }
            }
        }