public DataFrameViewGroupResults <ImmutableTuple <T1> > TGroupBy <T1>(IEnumerable <int> cols, bool sort = true) where T1 : IEquatable <T1>, IComparable <T1> { int[] order = _rows.Select(c => c).ToArray(); var icols = cols.ToArray(); var scols = icols.Select(c => Schema.GetColumnName(c)).ToArray(); return(DataFrameGrouping.TGroupBy(this, order, _columns, icols, true, GetMultiGetterAt <T1>(icols), ke => ke.ToImTuple(), ke => DataFrameGroupKey.Create(scols, ke))); }
/// <summary> /// Groupby. /// </summary> public IDataFrameViewGroupResults GroupBy(IEnumerable <int> cols, bool sort = true) { return(DataFrameGrouping.GroupBy(this, cols, sort)); }
public static IEnumerable <DataFrame> TJoin <TKey>( IDataFrameView left, IDataFrameView right, int[] orderLeft, int[] orderRight, TKey[] keysLeft, TKey[] keysRight, int[] icolsLeft, int[] icolsRight, string leftSuffix, string rightSuffix, JoinStrategy joinType, Func <TKey, DataFrameGroupKey[]> funcLeft, Func <TKey, DataFrameGroupKey[]> funcRight) where TKey : IEquatable <TKey>, IComparable <TKey> { var groupLeft = DataFrameGrouping.TGroupBy <TKey>(left, orderLeft, keysLeft, icolsLeft, funcLeft); var groupRight = DataFrameGrouping.TGroupBy <TKey>(right, orderRight, keysRight, icolsRight, funcRight); var iterLeft = groupLeft.GetEnumerator(); var iterRight = groupRight.GetEnumerator(); bool contLeft = iterLeft.MoveNext(); bool contRight = iterRight.MoveNext(); leftSuffix = string.IsNullOrEmpty(leftSuffix) ? string.Empty : leftSuffix; rightSuffix = string.IsNullOrEmpty(rightSuffix) ? string.Empty : rightSuffix; var newColsLeft = left.Columns.Select(c => c + leftSuffix).ToArray(); var newColsRight = right.Columns.Select(c => c + rightSuffix).ToArray(); var existsCols = new HashSet <string>(newColsLeft); for (int i = 0; i < newColsRight.Length; ++i) { while (existsCols.Contains(newColsRight[i])) { newColsRight[i] += "_y"; } existsCols.Add(newColsRight[i]); } var newCols = newColsLeft.Concat(newColsRight).ToArray(); int r; while (contLeft || contRight) { r = contLeft && contRight ? iterLeft.Current.Key.CompareTo(iterRight.Current.Key) : (contRight ? 1 : -1); if (r < 0) { if (joinType == JoinStrategy.Left || joinType == JoinStrategy.Outer) { var df = iterLeft.Current.Value.Copy(); if (!string.IsNullOrEmpty(leftSuffix)) { df.RenameColumns(newColsLeft); } for (int i = 0; i < right.ColumnCount; ++i) { var kind = right.SchemaI.GetColumnType(i); var col = df.AddColumn(newColsRight[i], kind, df.Length); df.GetColumn(col).Set(DataFrameMissingValue.GetMissingOrDefaultMissingValue(kind)); } yield return(df); } contLeft = iterLeft.MoveNext(); } else if (r > 0) { if (joinType == JoinStrategy.Right || joinType == JoinStrategy.Outer) { var df = iterRight.Current.Value.Copy(); df.RenameColumns(newColsRight); for (int i = 0; i < left.ColumnCount; ++i) { var kind = left.SchemaI.GetColumnType(i); var col = df.AddColumn(newColsLeft[i], kind, df.Length); df.GetColumn(col).Set(DataFrameMissingValue.GetMissingOrDefaultMissingValue(kind)); } df.OrderColumns(newCols); yield return(df); } contRight = iterRight.MoveNext(); } else { var dfLeft = iterLeft.Current.Value.Copy(); var dfRight = iterRight.Current.Value.Copy(); if (!string.IsNullOrEmpty(leftSuffix)) { dfLeft.RenameColumns(newColsLeft); } dfRight.RenameColumns(newColsRight); var vleft = dfLeft.Multiply(dfRight.Length, MultiplyStrategy.Block).Copy(); var vright = dfRight.Multiply(dfLeft.Length, MultiplyStrategy.Row).Copy(); for (int i = 0; i < vright.ColumnCount; ++i) { vleft.AddColumn(newColsRight[i], vright.GetColumn(i)); } yield return(vleft); contLeft = iterLeft.MoveNext(); contRight = iterRight.MoveNext(); } } }