public static IEnumerable <KeyValuePair <TKey, DataFrameViewGroup> > TGroupBy <TKey>( IDataFrameView df, int[] order, TKey[] keys, int[] columns, Func <TKey, DataFrameGroupKey[]> func) where TKey : IEquatable <TKey> { TKey last = keys.Any() ? keys[order[0]] : default(TKey); List <int> subrows = new List <int>(); foreach (var pos in order) { var cur = keys[pos]; if (cur.Equals(last)) { subrows.Add(pos); } else if (subrows.Any()) { yield return(new KeyValuePair <TKey, DataFrameViewGroup>(last, new DataFrameViewGroup(func(last), df.Source ?? df, subrows.ToArray(), df.ColumnsSet))); subrows.Clear(); subrows.Add(pos); } last = cur; } if (subrows.Any()) { yield return(new KeyValuePair <TKey, DataFrameViewGroup>(last, new DataFrameViewGroup(func(last), df.Source ?? df, subrows.ToArray(), df.ColumnsSet))); } }
static IDataFrameViewGroupResults RecGroupBy <T1, T2>(IDataFrameView df, int[] icols, bool sort) where T1 : IEquatable <T1>, IComparable <T1> where T2 : IEquatable <T2>, IComparable <T2> { var kind = df.Kinds[icols[2]]; if (icols.Length == 3) { switch (kind) { case DataKind.BL: return(df.TGroupBy <T1, T2, DvBool>(icols, sort)); case DataKind.I4: return(df.TGroupBy <T1, T2, DvInt4>(icols, sort)); case DataKind.U4: return(df.TGroupBy <T1, T2, uint>(icols, sort)); case DataKind.I8: return(df.TGroupBy <T1, T2, DvInt8>(icols, sort)); case DataKind.R4: return(df.TGroupBy <T1, T2, float>(icols, sort)); case DataKind.R8: return(df.TGroupBy <T1, T2, double>(icols, sort)); case DataKind.TX: return(df.TGroupBy <T1, T2, DvText>(icols, sort)); default: throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'."); } } else { throw new NotImplementedException($"soGroupByrt is not implemented for {icols.Length} columns."); } }
static void RecSort <T1, T2>(IDataFrameView df, int[] icols, bool ascending) where T1 : IEquatable <T1>, IComparable <T1> where T2 : IEquatable <T2>, IComparable <T2> { var kind = df.Kinds[icols[2]]; if (icols.Length == 3) { switch (kind) { case DataKind.BL: df.TSort <T1, T2, DvBool>(icols, ascending); break; case DataKind.I4: df.TSort <T1, T2, DvInt4>(icols, ascending); break; case DataKind.U4: df.TSort <T1, T2, uint>(icols, ascending); break; case DataKind.I8: df.TSort <T1, T2, DvInt8>(icols, ascending); break; case DataKind.R4: df.TSort <T1, T2, float>(icols, ascending); break; case DataKind.R8: df.TSort <T1, T2, double>(icols, ascending); break; case DataKind.TX: df.TSort <T1, T2, DvText>(icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } else { throw new NotImplementedException($"Sort is not implemented for {icols.Length} columns."); } }
public DataFrame TJoin <T1, T2, T3>(IDataFrameView right, IEnumerable <int> colsLeft, IEnumerable <int> colsRight, string leftSuffix = null, string rightSuffix = null, JoinStrategy joinType = JoinStrategy.Inner, bool sort = true) where T1 : IEquatable <T1>, IComparable <T1> where T2 : IEquatable <T2>, IComparable <T2> where T3 : IEquatable <T3>, IComparable <T3> { return(new DataFrameView(this, null, null).TJoin <T1, T2, T3>(right, colsLeft, colsRight, leftSuffix, rightSuffix, joinType, sort)); }
static DataFrame RecJoin <T1, T2>(IDataFrameView left, IDataFrameView right, int[] icolsLeft, int[] icolsRight, string leftSuffix = null, string rightSuffix = null, JoinStrategy joinType = JoinStrategy.Inner, bool sort = true) where T1 : IEquatable <T1>, IComparable <T1> where T2 : IEquatable <T2>, IComparable <T2> { var kind = left.Kinds[icolsLeft[2]]; if (icolsLeft.Length == 3) { switch (kind) { case DataKind.BL: return(left.TJoin <T1, T2, DvBool>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.I4: return(left.TJoin <T1, T2, DvInt4>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.U4: return(left.TJoin <T1, T2, uint>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.I8: return(left.TJoin <T1, T2, DvInt8>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.R4: return(left.TJoin <T1, T2, float>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.R8: return(left.TJoin <T1, T2, double>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.TX: return(left.TJoin <T1, T2, DvText>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); default: throw new NotImplementedException($"Join is not implemented for type '{kind}'."); } } else { throw new NotImplementedException($"Join is not implemented for {icolsLeft.Length} columns."); } }
public DataFrame TJoin <T1, T2, T3>(IDataFrameView right, IEnumerable <int> colsLeft, IEnumerable <int> colsRight, string leftSuffix = null, string rightSuffix = null, JoinStrategy joinType = JoinStrategy.Inner, bool sort = true) where T1 : IEquatable <T1>, IComparable <T1> where T2 : IEquatable <T2>, IComparable <T2> where T3 : IEquatable <T3>, IComparable <T3> { int[] orderLeft = _rows.Select(c => c).ToArray(); int[] orderRight = (right as DataFrame) is null ? (right as DataFrameView)._rows.Select(c => c).ToArray() : null; int[] columnsRight = (right as DataFrame) is null ? (right as DataFrameView)._columns : null; var icolsLeft = colsLeft.ToArray(); var icolsRight = colsRight.ToArray(); var scolsLeft = icolsLeft.Select(c => Schema.GetColumnName(c)).ToArray(); var scolsRight = icolsRight.Select(c => right.SchemaI.GetColumnName(c)).ToArray(); return(DataFrameJoining.TJoin(this, right, orderLeft, orderRight, _columns, columnsRight, icolsLeft, icolsRight, sort, leftSuffix, rightSuffix, joinType, GetMultiGetterAt <T1, T2, T3>(icolsLeft), right.GetMultiGetterAt <T1, T2, T3>(icolsRight), ke => ke.ToImTuple(), ke => DataFrameGroupKey.Create(scolsLeft, ke), ke => DataFrameGroupKey.Create(scolsRight, ke))); }
public static ImmutableTuple <T1>[] TSort <T1>(IDataFrameView df, ref int[] order, IEnumerable <int> columns, bool ascending) where T1 : IEquatable <T1>, IComparable <T1> { var keys = df.EnumerateItems <T1>(columns, ascending).Select(c => c.ToImTuple()).ToArray(); TSort(df, ref order, keys, ascending); return(keys); }
static void RecSort <T1, T2>(IDataFrameView df, int[] icols, bool ascending) where T1 : IEquatable <T1>, IComparable <T1> where T2 : IEquatable <T2>, IComparable <T2> { var kind = df.Kinds[icols[2]]; if (icols.Length == 3) { if (kind.IsVector()) { switch (kind.ItemType().RawKind()) { case DataKind.BL: df.TSort <T1, T2, VBufferEqSort <bool> >(icols, ascending); break; case DataKind.I4: df.TSort <T1, T2, VBufferEqSort <int> >(icols, ascending); break; case DataKind.U4: df.TSort <T1, T2, VBufferEqSort <uint> >(icols, ascending); break; case DataKind.I8: df.TSort <T1, T2, VBufferEqSort <long> >(icols, ascending); break; case DataKind.R4: df.TSort <T1, T2, VBufferEqSort <float> >(icols, ascending); break; case DataKind.R8: df.TSort <T1, T2, VBufferEqSort <double> >(icols, ascending); break; case DataKind.TX: df.TSort <T1, T2, VBufferEqSort <DvText> >(icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } else { switch (kind.RawKind()) { case DataKind.BL: df.TSort <T1, T2, bool>(icols, ascending); break; case DataKind.I4: df.TSort <T1, T2, int>(icols, ascending); break; case DataKind.U4: df.TSort <T1, T2, uint>(icols, ascending); break; case DataKind.I8: df.TSort <T1, T2, long>(icols, ascending); break; case DataKind.R4: df.TSort <T1, T2, float>(icols, ascending); break; case DataKind.R8: df.TSort <T1, T2, double>(icols, ascending); break; case DataKind.TX: df.TSort <T1, T2, DvText>(icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } } else { throw new NotImplementedException($"Sort is not implemented for {icols.Length} columns."); } }
/// <summary> /// Initializes a view on a dataframe. /// </summary> public DataFrameView(IDataFrameView src, IEnumerable <int> rows, IEnumerable <int> columns) { _src = src; _rows = rows == null?Enumerable.Range(0, src.Length).ToArray() : rows.ToArray(); _columns = columns == null?Enumerable.Range(0, src.SchemaI.ColumnCount).ToArray() : columns.ToArray(); _schema = new DataFrameViewSchema(src.Schema, _columns); }
static DataFrame RecJoin(IDataFrameView left, IDataFrameView right, int[] icolsLeft, int[] icolsRight, string leftSuffix = null, string rightSuffix = null, JoinStrategy joinType = JoinStrategy.Inner, bool sort = true) { var kind = left.Kinds[icolsLeft[0]]; if (icolsLeft.Length == 1) { switch (kind) { case DataKind.BL: return(left.TJoin <DvBool>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.I4: return(left.TJoin <DvInt4>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.U4: return(left.TJoin <uint>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.I8: return(left.TJoin <DvInt8>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.R4: return(left.TJoin <float>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.R8: return(left.TJoin <double>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.TX: return(left.TJoin <DvText>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); default: throw new NotImplementedException($"Join is not implemented for type '{kind}'."); } } else { switch (kind) { case DataKind.BL: return(RecJoin <DvBool>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.I4: return(RecJoin <DvInt4>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.U4: return(RecJoin <uint>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.I8: return(RecJoin <DvInt8>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.R4: return(RecJoin <float>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.R8: return(RecJoin <double>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.TX: return(RecJoin <DvText>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); default: throw new NotImplementedException($"Join is not implemented for type '{kind}'."); } } }
static void RecSort(IDataFrameView df, int[] icols, bool ascending) { var kind = df.Kinds[icols[0]]; if (icols.Length == 1) { switch (kind) { case DataKind.BL: df.TSort <DvBool>(icols, ascending); break; case DataKind.I4: df.TSort <DvInt4>(icols, ascending); break; case DataKind.U4: df.TSort <uint>(icols, ascending); break; case DataKind.I8: df.TSort <DvInt8>(icols, ascending); break; case DataKind.R4: df.TSort <float>(icols, ascending); break; case DataKind.R8: df.TSort <double>(icols, ascending); break; case DataKind.TX: df.TSort <DvText>(icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } else { switch (kind) { case DataKind.BL: RecSort <DvBool>(df, icols, ascending); break; case DataKind.I4: RecSort <DvInt4>(df, icols, ascending); break; case DataKind.U4: RecSort <uint>(df, icols, ascending); break; case DataKind.I8: RecSort <DvInt8>(df, icols, ascending); break; case DataKind.R4: RecSort <float>(df, icols, ascending); break; case DataKind.R8: RecSort <double>(df, icols, ascending); break; case DataKind.TX: RecSort <DvText>(df, icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } }
static IDataFrameViewGroupResults RecGroupBy(IDataFrameView df, int[] icols, bool sort) { var kind = df.Kinds[icols[0]]; if (icols.Length == 1) { switch (kind) { case DataKind.BL: return(df.TGroupBy <DvBool>(icols, sort)); case DataKind.I4: return(df.TGroupBy <DvInt4>(icols, sort)); case DataKind.U4: return(df.TGroupBy <uint>(icols, sort)); case DataKind.I8: return(df.TGroupBy <DvInt8>(icols, sort)); case DataKind.R4: return(df.TGroupBy <float>(icols, sort)); case DataKind.R8: return(df.TGroupBy <double>(icols, sort)); case DataKind.TX: return(df.TGroupBy <DvText>(icols, sort)); default: throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'."); } } else { switch (kind) { case DataKind.BL: return(RecGroupBy <DvBool>(df, icols, sort)); case DataKind.I4: return(RecGroupBy <DvInt4>(df, icols, sort)); case DataKind.U4: return(RecGroupBy <uint>(df, icols, sort)); case DataKind.I8: return(RecGroupBy <DvInt8>(df, icols, sort)); case DataKind.R4: return(RecGroupBy <float>(df, icols, sort)); case DataKind.R8: return(RecGroupBy <double>(df, icols, sort)); case DataKind.TX: return(RecGroupBy <DvText>(df, icols, sort)); default: throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'."); } } }
/// <summary> /// Raises an exception if two dataframes do not have the same /// shape or are two much different. /// </summary> /// <param name="df">dataframe</param> /// <param name="precision">precision</param> /// <param name="exc">raises an exception if too different</param> /// <returns>max difference</returns> public double AssertAlmostEqual(IDataFrameView df, double precision = 1e-5, bool exc = true) { if (Shape != df.Shape) { throw new DataValueError(string.Format("Shapes are different ({0}, {1}) != ({2}, {3})", Shape.Item1, Shape.Item2, df.Shape.Item1, df.Shape.Item2)); } double max = 0; for (int i = 0; i < df.Shape.Item2; ++i) { var c1 = GetColumn(i); var c2 = GetColumn(i); var d = c1.AssertAlmostEqual(c2, precision, exc); max = Math.Max(max, d); } return(max); }
public static void TSort <T>(IDataFrameView df, ref int[] order, T[] keys, bool ascending) where T : IComparable <T> { if (order == null) { order = new int[df.Length]; for (int i = 0; i < order.Length; ++i) { order[i] = i; } } if (ascending) { Array.Sort(order, (x, y) => keys[x].CompareTo(keys[y])); } else { Array.Sort(order, (x, y) => - keys[x].CompareTo(keys[y])); } }
public static DataFrame Join(IDataFrameView left, IDataFrameView right, IEnumerable <int> colsLeft, IEnumerable <int> colsRight, string leftSuffix = null, string rightSuffix = null, JoinStrategy joinType = JoinStrategy.Inner, bool sort = true) { int[] icolsLeft = colsLeft.ToArray(); int[] icolsRight = colsRight.ToArray(); if (icolsRight.Length != icolsLeft.Length) { throw new DataValueError("Left and right must be joined with the same number of columns."); } for (int i = 0; i < icolsLeft.Length; ++i) { if (left.SchemaI.GetColumnType(icolsLeft[i]) != right.SchemaI.GetColumnType(icolsRight[i])) { throw new DataTypeError("Left and right must be joined with the same number of columns and the same types."); } } return(RecJoin(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); }
static IDataFrameViewGroupResults RecGroupBy <T1, T2>(IDataFrameView df, int[] icols, bool sort) where T1 : IEquatable <T1>, IComparable <T1> where T2 : IEquatable <T2>, IComparable <T2> { var kind = df.Kinds[icols[2]]; if (icols.Length == 3) { if (kind.IsVector()) { throw new NotImplementedException(); } else { switch (kind.RawKind()) { case DataKind.Boolean: return(df.TGroupBy <T1, T2, bool>(icols, sort)); case DataKind.Int32: return(df.TGroupBy <T1, T2, int>(icols, sort)); case DataKind.UInt32: return(df.TGroupBy <T1, T2, uint>(icols, sort)); case DataKind.Int64: return(df.TGroupBy <T1, T2, long>(icols, sort)); case DataKind.Single: return(df.TGroupBy <T1, T2, float>(icols, sort)); case DataKind.Double: return(df.TGroupBy <T1, T2, double>(icols, sort)); case DataKind.String: return(df.TGroupBy <T1, T2, DvText>(icols, sort)); default: throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'."); } } } else { throw new NotImplementedException($"soGroupByrt is not implemented for {icols.Length} columns."); } }
public static DataFrameViewGroupResults <TImutKey> TGroupBy <TMutKey, TImutKey>( IDataFrameView df, int[] rows, int[] columns, IEnumerable <int> cols, bool sort, MultiGetterAt <TMutKey> getter, Func <TMutKey, TImutKey> conv, Func <TImutKey, DataFrameGroupKey[]> conv2) where TMutKey : ITUple, new() where TImutKey : IComparable <TImutKey>, IEquatable <TImutKey> { var icols = cols.ToArray(); int[] order = rows == null?rows.Select(c => c).ToArray() : Enumerable.Range(0, df.Length).ToArray(); var keys = df.EnumerateItems(icols, true, rows, getter).Select(c => conv(c)).ToArray(); if (sort) { DataFrameSorting.TSort(df, ref order, keys, true); } var iter = TGroupBy(df, order, keys, columns, conv2); return(new DataFrameViewGroupResults <TImutKey>(iter)); }
public static DataFrame TJoin <TMutKey, TImutKey>( IDataFrameView left, IDataFrameView right, int[] rowsLeft, int[] rowsRight, int[] columnsLeft, int[] columnsRight, IEnumerable <int> colsLeft, IEnumerable <int> colsRight, bool sort, string leftSuffix, string rightSuffix, JoinStrategy joinType, MultiGetterAt <TMutKey> getterLeft, MultiGetterAt <TMutKey> getterRight, Func <TMutKey, TImutKey> conv, Func <TImutKey, DataFrameGroupKey[]> convLeft, Func <TImutKey, DataFrameGroupKey[]> convRight) where TMutKey : ITUple, new() where TImutKey : IComparable <TImutKey>, IEquatable <TImutKey> { var icolsLeft = colsLeft.ToArray(); var icolsRight = colsRight.ToArray(); int[] orderLeft = rowsLeft == null?rowsLeft.Select(c => c).ToArray() : Enumerable.Range(0, left.Length).ToArray(); int[] orderRight = rowsLeft == null?rowsRight.Select(c => c).ToArray() : Enumerable.Range(0, right.Length).ToArray(); var keysLeft = left.EnumerateItems(icolsLeft, true, rowsLeft, getterLeft).Select(c => conv(c)).ToArray(); var keysRight = right.EnumerateItems(icolsRight, true, rowsRight, getterRight).Select(c => conv(c)).ToArray(); if (sort) { DataFrameSorting.TSort(left, ref orderLeft, keysLeft, true); DataFrameSorting.TSort(right, ref orderRight, keysRight, true); } var iter = TJoin <TImutKey>(left, right, orderLeft, orderRight, keysLeft, keysRight, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, convLeft, convRight); return(DataFrame.Concat(iter)); }
public static IEnumerable <DataFrame> TJoin <TKey>( IDataFrameView left, IDataFrameView right, int[] orderLeft, int[] orderRight, TKey[] keysLeft, TKey[] keysRight, int[] icolsLeft, int[] icolsRight, string leftSuffix, string rightSuffix, JoinStrategy joinType, Func <TKey, DataFrameGroupKey[]> funcLeft, Func <TKey, DataFrameGroupKey[]> funcRight) where TKey : IEquatable <TKey>, IComparable <TKey> { var groupLeft = DataFrameGrouping.TGroupBy <TKey>(left, orderLeft, keysLeft, icolsLeft, funcLeft); var groupRight = DataFrameGrouping.TGroupBy <TKey>(right, orderRight, keysRight, icolsRight, funcRight); var iterLeft = groupLeft.GetEnumerator(); var iterRight = groupRight.GetEnumerator(); bool contLeft = iterLeft.MoveNext(); bool contRight = iterRight.MoveNext(); leftSuffix = string.IsNullOrEmpty(leftSuffix) ? string.Empty : leftSuffix; rightSuffix = string.IsNullOrEmpty(rightSuffix) ? string.Empty : rightSuffix; var newColsLeft = left.Columns.Select(c => c + leftSuffix).ToArray(); var newColsRight = right.Columns.Select(c => c + rightSuffix).ToArray(); var existsCols = new HashSet <string>(newColsLeft); for (int i = 0; i < newColsRight.Length; ++i) { while (existsCols.Contains(newColsRight[i])) { newColsRight[i] += "_y"; } existsCols.Add(newColsRight[i]); } var newCols = newColsLeft.Concat(newColsRight).ToArray(); int r; while (contLeft || contRight) { r = contLeft && contRight ? iterLeft.Current.Key.CompareTo(iterRight.Current.Key) : (contRight ? 1 : -1); if (r < 0) { if (joinType == JoinStrategy.Left || joinType == JoinStrategy.Outer) { var df = iterLeft.Current.Value.Copy(); if (!string.IsNullOrEmpty(leftSuffix)) { df.RenameColumns(newColsLeft); } for (int i = 0; i < right.ColumnCount; ++i) { var kind = right.SchemaI.GetColumnType(i); var col = df.AddColumn(newColsRight[i], kind, df.Length); df.GetColumn(col).Set(DataFrameMissingValue.GetMissingOrDefaultMissingValue(kind)); } yield return(df); } contLeft = iterLeft.MoveNext(); } else if (r > 0) { if (joinType == JoinStrategy.Right || joinType == JoinStrategy.Outer) { var df = iterRight.Current.Value.Copy(); df.RenameColumns(newColsRight); for (int i = 0; i < left.ColumnCount; ++i) { var kind = left.SchemaI.GetColumnType(i); var col = df.AddColumn(newColsLeft[i], kind, df.Length); df.GetColumn(col).Set(DataFrameMissingValue.GetMissingOrDefaultMissingValue(kind)); } df.OrderColumns(newCols); yield return(df); } contRight = iterRight.MoveNext(); } else { var dfLeft = iterLeft.Current.Value.Copy(); var dfRight = iterRight.Current.Value.Copy(); if (!string.IsNullOrEmpty(leftSuffix)) { dfLeft.RenameColumns(newColsLeft); } dfRight.RenameColumns(newColsRight); var vleft = dfLeft.Multiply(dfRight.Length, MultiplyStrategy.Block).Copy(); var vright = dfRight.Multiply(dfLeft.Length, MultiplyStrategy.Row).Copy(); for (int i = 0; i < vright.ColumnCount; ++i) { vleft.AddColumn(newColsRight[i], vright.GetColumn(i)); } yield return(vleft); contLeft = iterLeft.MoveNext(); contRight = iterRight.MoveNext(); } } }
/// <summary> /// Exact comparison between two dataframes. /// </summary> public bool Equals(IDataFrameView dfv) { return(Equals(dfv.Copy())); }
static void RecSort(IDataFrameView df, int[] icols, bool ascending) { var kind = df.Kinds[icols[0]]; if (icols.Length == 1) { if (kind.IsVector()) { switch (kind.ItemType().RawKind()) { case DataKind.BL: df.TSort <VBufferEqSort <bool> >(icols, ascending); break; case DataKind.I4: df.TSort <VBufferEqSort <int> >(icols, ascending); break; case DataKind.U4: df.TSort <VBufferEqSort <uint> >(icols, ascending); break; case DataKind.I8: df.TSort <VBufferEqSort <long> >(icols, ascending); break; case DataKind.R4: df.TSort <VBufferEqSort <float> >(icols, ascending); break; case DataKind.R8: df.TSort <VBufferEqSort <double> >(icols, ascending); break; case DataKind.TX: df.TSort <VBufferEqSort <DvText> >(icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } else { switch (kind.RawKind()) { case DataKind.BL: df.TSort <bool>(icols, ascending); break; case DataKind.I4: df.TSort <int>(icols, ascending); break; case DataKind.U4: df.TSort <uint>(icols, ascending); break; case DataKind.I8: df.TSort <long>(icols, ascending); break; case DataKind.R4: df.TSort <float>(icols, ascending); break; case DataKind.R8: df.TSort <double>(icols, ascending); break; case DataKind.TX: df.TSort <DvText>(icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } } else { if (kind.IsVector()) { switch (kind.ItemType().RawKind()) { case DataKind.BL: RecSort <VBufferEqSort <bool> >(df, icols, ascending); break; case DataKind.I4: RecSort <VBufferEqSort <int> >(df, icols, ascending); break; case DataKind.U4: RecSort <VBufferEqSort <uint> >(df, icols, ascending); break; case DataKind.I8: RecSort <VBufferEqSort <long> >(df, icols, ascending); break; case DataKind.R4: RecSort <VBufferEqSort <float> >(df, icols, ascending); break; case DataKind.R8: RecSort <VBufferEqSort <double> >(df, icols, ascending); break; case DataKind.TX: RecSort <VBufferEqSort <DvText> >(df, icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } else { switch (kind.RawKind()) { case DataKind.BL: RecSort <bool>(df, icols, ascending); break; case DataKind.I4: RecSort <int>(df, icols, ascending); break; case DataKind.U4: RecSort <uint>(df, icols, ascending); break; case DataKind.I8: RecSort <long>(df, icols, ascending); break; case DataKind.R4: RecSort <float>(df, icols, ascending); break; case DataKind.R8: RecSort <double>(df, icols, ascending); break; case DataKind.TX: RecSort <DvText>(df, icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } } }
static IDataFrameViewGroupResults RecGroupBy <T1>(IDataFrameView df, int[] icols, bool sort) where T1 : IEquatable <T1>, IComparable <T1> { var kind = df.Kinds[icols[1]]; if (icols.Length == 2) { if (kind.IsVector()) { throw new NotImplementedException(); } else { switch (kind.RawKind()) { case DataKind.BL: return(df.TGroupBy <T1, bool>(icols, sort)); case DataKind.I4: return(df.TGroupBy <T1, int>(icols, sort)); case DataKind.U4: return(df.TGroupBy <T1, uint>(icols, sort)); case DataKind.I8: return(df.TGroupBy <T1, long>(icols, sort)); case DataKind.R4: return(df.TGroupBy <T1, float>(icols, sort)); case DataKind.R8: return(df.TGroupBy <T1, double>(icols, sort)); case DataKind.TX: return(df.TGroupBy <T1, DvText>(icols, sort)); default: throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'."); } } } else { if (kind.IsVector()) { throw new NotImplementedException(); } else { switch (kind.RawKind()) { case DataKind.BL: return(RecGroupBy <T1, bool>(df, icols, sort)); case DataKind.I4: return(RecGroupBy <T1, int>(df, icols, sort)); case DataKind.U4: return(RecGroupBy <T1, uint>(df, icols, sort)); case DataKind.I8: return(RecGroupBy <T1, long>(df, icols, sort)); case DataKind.R4: return(RecGroupBy <T1, float>(df, icols, sort)); case DataKind.R8: return(RecGroupBy <T1, double>(df, icols, sort)); case DataKind.TX: return(RecGroupBy <T1, DvText>(df, icols, sort)); default: throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'."); } } } }
public DataFrame Join(IDataFrameView right, IEnumerable <string> colsLeft, IEnumerable <int> colsRight, string leftSuffix = null, string rightSuffix = null, JoinStrategy joinType = JoinStrategy.Inner, bool sort = true) { return(Join(right, colsLeft.Select(c => GetColumnIndex(c)), colsRight, leftSuffix, rightSuffix, joinType, sort)); }
public DataFrame Join(IDataFrameView right, IEnumerable <int> colsLeft, IEnumerable <int> colsRight, string leftSuffix = null, string rightSuffix = null, JoinStrategy joinType = JoinStrategy.Inner, bool sort = true) { return(new DataFrameView(this, null, null).Join(right, colsLeft, colsRight, leftSuffix, rightSuffix, joinType, sort)); }
static IDataFrameViewGroupResults RecGroupBy(IDataFrameView df, int[] icols, bool sort) { var kind = df.Kinds[icols[0]]; if (icols.Length == 1) { if (kind.IsVector()) { throw new NotImplementedException(); } else { switch (kind.RawKind()) { case DataKind.Boolean: return(df.TGroupBy <bool>(icols, sort)); case DataKind.Int32: return(df.TGroupBy <int>(icols, sort)); case DataKind.UInt32: return(df.TGroupBy <uint>(icols, sort)); case DataKind.Int64: return(df.TGroupBy <long>(icols, sort)); case DataKind.Single: return(df.TGroupBy <float>(icols, sort)); case DataKind.Double: return(df.TGroupBy <double>(icols, sort)); case DataKind.String: return(df.TGroupBy <DvText>(icols, sort)); default: throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'."); } } } else { if (kind.IsVector()) { throw new NotImplementedException(); } else { switch (kind.RawKind()) { case DataKind.Boolean: return(RecGroupBy <bool>(df, icols, sort)); case DataKind.Int32: return(RecGroupBy <int>(df, icols, sort)); case DataKind.UInt32: return(RecGroupBy <uint>(df, icols, sort)); case DataKind.Int64: return(RecGroupBy <long>(df, icols, sort)); case DataKind.Single: return(RecGroupBy <float>(df, icols, sort)); case DataKind.Double: return(RecGroupBy <double>(df, icols, sort)); case DataKind.String: return(RecGroupBy <DvText>(df, icols, sort)); default: throw new NotImplementedException($"GroupBy is not implemented for type '{kind}'."); } } } }
static DataFrame RecJoin <T1>(IDataFrameView left, IDataFrameView right, int[] icolsLeft, int[] icolsRight, string leftSuffix = null, string rightSuffix = null, JoinStrategy joinType = JoinStrategy.Inner, bool sort = true) where T1 : IEquatable <T1>, IComparable <T1> { var kind = left.Kinds[icolsLeft[1]]; if (icolsLeft.Length == 2) { if (kind.IsVector()) { throw new NotImplementedException(); } else { switch (kind.RawKind()) { case DataKind.Boolean: return(left.TJoin <T1, bool>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.Int32: return(left.TJoin <T1, int>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.UInt32: return(left.TJoin <T1, uint>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.Int64: return(left.TJoin <T1, long>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.Single: return(left.TJoin <T1, float>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.Double: return(left.TJoin <T1, double>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.String: return(left.TJoin <T1, DvText>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); default: throw new NotImplementedException($"Join is not implemented for type '{kind}'."); } } } else { if (kind.IsVector()) { throw new NotImplementedException(); } else { switch (kind.RawKind()) { case DataKind.Boolean: return(RecJoin <T1, bool>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.Int32: return(RecJoin <T1, int>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.UInt32: return(RecJoin <T1, uint>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.Int64: return(RecJoin <T1, long>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.Single: return(RecJoin <T1, float>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.Double: return(RecJoin <T1, double>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); case DataKind.String: return(RecJoin <T1, DvText>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort)); default: throw new NotImplementedException($"Join is not implemented for type '{kind}'."); } } } }
public DataFrameViewGroup(DataFrameGroupKey[] keys, IDataFrameView src, IEnumerable <int> rows, IEnumerable <int> columns) : base(src, rows, columns) { _keys = keys; }
public static void Sort(IDataFrameView df, IEnumerable <int> columns, bool ascending = true) { int[] icols = columns.ToArray(); RecSort(df, icols, ascending); }
public static IDataFrameViewGroupResults GroupBy(IDataFrameView df, IEnumerable <int> columns, bool ascending = true) { int[] icols = columns.ToArray(); return(RecGroupBy(df, icols, ascending)); }
static void RecSort <T1>(IDataFrameView df, int[] icols, bool ascending) where T1 : IEquatable <T1>, IComparable <T1> { var kind = df.Kinds[icols[1]]; if (icols.Length == 2) { if (kind.IsVector()) { switch (kind.ItemType().RawKind()) { case DataKind.Boolean: df.TSort <T1, VBufferEqSort <bool> >(icols, ascending); break; case DataKind.Int32: df.TSort <T1, VBufferEqSort <int> >(icols, ascending); break; case DataKind.UInt32: df.TSort <T1, VBufferEqSort <uint> >(icols, ascending); break; case DataKind.Int64: df.TSort <T1, VBufferEqSort <long> >(icols, ascending); break; case DataKind.Single: df.TSort <T1, VBufferEqSort <float> >(icols, ascending); break; case DataKind.Double: df.TSort <T1, VBufferEqSort <double> >(icols, ascending); break; case DataKind.String: df.TSort <T1, VBufferEqSort <DvText> >(icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } else { switch (kind.RawKind()) { case DataKind.Boolean: df.TSort <T1, bool>(icols, ascending); break; case DataKind.Int32: df.TSort <T1, int>(icols, ascending); break; case DataKind.UInt32: df.TSort <T1, uint>(icols, ascending); break; case DataKind.Int64: df.TSort <T1, long>(icols, ascending); break; case DataKind.Single: df.TSort <T1, float>(icols, ascending); break; case DataKind.Double: df.TSort <T1, double>(icols, ascending); break; case DataKind.String: df.TSort <T1, DvText>(icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } } else { if (kind.IsVector()) { switch (kind.ItemType().RawKind()) { case DataKind.Boolean: RecSort <T1, VBufferEqSort <bool> >(df, icols, ascending); break; case DataKind.Int32: RecSort <T1, VBufferEqSort <int> >(df, icols, ascending); break; case DataKind.UInt32: RecSort <T1, VBufferEqSort <uint> >(df, icols, ascending); break; case DataKind.Int64: RecSort <T1, VBufferEqSort <long> >(df, icols, ascending); break; case DataKind.Single: RecSort <T1, VBufferEqSort <float> >(df, icols, ascending); break; case DataKind.Double: RecSort <T1, VBufferEqSort <double> >(df, icols, ascending); break; case DataKind.String: RecSort <T1, VBufferEqSort <DvText> >(df, icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } else { switch (kind.RawKind()) { case DataKind.Boolean: RecSort <T1, bool>(df, icols, ascending); break; case DataKind.Int32: RecSort <T1, int>(df, icols, ascending); break; case DataKind.UInt32: RecSort <T1, uint>(df, icols, ascending); break; case DataKind.Int64: RecSort <T1, long>(df, icols, ascending); break; case DataKind.Single: RecSort <T1, float>(df, icols, ascending); break; case DataKind.Double: RecSort <T1, double>(df, icols, ascending); break; case DataKind.String: RecSort <T1, DvText>(df, icols, ascending); break; default: throw new NotImplementedException($"Sort is not implemented for type '{kind}'."); } } } }