예제 #1
0
        public DataFrameViewGroupResults <ImmutableTuple <T1> > TGroupBy <T1>(IEnumerable <int> cols, bool sort = true)
            where T1 : IEquatable <T1>, IComparable <T1>
        {
            int[] order = _rows.Select(c => c).ToArray();
            var   icols = cols.ToArray();
            var   scols = icols.Select(c => Schema.GetColumnName(c)).ToArray();

            return(DataFrameGrouping.TGroupBy(this, order, _columns, icols, true, GetMultiGetterAt <T1>(icols),
                                              ke => ke.ToImTuple(), ke => DataFrameGroupKey.Create(scols, ke)));
        }
예제 #2
0
 /// <summary>
 /// Groupby.
 /// </summary>
 public IDataFrameViewGroupResults GroupBy(IEnumerable <int> cols, bool sort = true)
 {
     return(DataFrameGrouping.GroupBy(this, cols, sort));
 }
        public static IEnumerable <DataFrame> TJoin <TKey>(
            IDataFrameView left, IDataFrameView right,
            int[] orderLeft, int[] orderRight,
            TKey[] keysLeft, TKey[] keysRight,
            int[] icolsLeft, int[] icolsRight,
            string leftSuffix, string rightSuffix,
            JoinStrategy joinType,
            Func <TKey, DataFrameGroupKey[]> funcLeft,
            Func <TKey, DataFrameGroupKey[]> funcRight)
            where TKey : IEquatable <TKey>, IComparable <TKey>
        {
            var  groupLeft  = DataFrameGrouping.TGroupBy <TKey>(left, orderLeft, keysLeft, icolsLeft, funcLeft);
            var  groupRight = DataFrameGrouping.TGroupBy <TKey>(right, orderRight, keysRight, icolsRight, funcRight);
            var  iterLeft   = groupLeft.GetEnumerator();
            var  iterRight  = groupRight.GetEnumerator();
            bool contLeft   = iterLeft.MoveNext();
            bool contRight  = iterRight.MoveNext();

            leftSuffix  = string.IsNullOrEmpty(leftSuffix) ? string.Empty : leftSuffix;
            rightSuffix = string.IsNullOrEmpty(rightSuffix) ? string.Empty : rightSuffix;
            var newColsLeft  = left.Columns.Select(c => c + leftSuffix).ToArray();
            var newColsRight = right.Columns.Select(c => c + rightSuffix).ToArray();
            var existsCols   = new HashSet <string>(newColsLeft);

            for (int i = 0; i < newColsRight.Length; ++i)
            {
                while (existsCols.Contains(newColsRight[i]))
                {
                    newColsRight[i] += "_y";
                }
                existsCols.Add(newColsRight[i]);
            }
            var newCols = newColsLeft.Concat(newColsRight).ToArray();

            int r;

            while (contLeft || contRight)
            {
                r = contLeft && contRight
                    ? iterLeft.Current.Key.CompareTo(iterRight.Current.Key)
                    : (contRight ? 1 : -1);

                if (r < 0)
                {
                    if (joinType == JoinStrategy.Left || joinType == JoinStrategy.Outer)
                    {
                        var df = iterLeft.Current.Value.Copy();
                        if (!string.IsNullOrEmpty(leftSuffix))
                        {
                            df.RenameColumns(newColsLeft);
                        }
                        for (int i = 0; i < right.ColumnCount; ++i)
                        {
                            var kind = right.SchemaI.GetColumnType(i);
                            var col  = df.AddColumn(newColsRight[i], kind, df.Length);
                            df.GetColumn(col).Set(DataFrameMissingValue.GetMissingOrDefaultMissingValue(kind));
                        }
                        yield return(df);
                    }
                    contLeft = iterLeft.MoveNext();
                }
                else if (r > 0)
                {
                    if (joinType == JoinStrategy.Right || joinType == JoinStrategy.Outer)
                    {
                        var df = iterRight.Current.Value.Copy();
                        df.RenameColumns(newColsRight);
                        for (int i = 0; i < left.ColumnCount; ++i)
                        {
                            var kind = left.SchemaI.GetColumnType(i);
                            var col  = df.AddColumn(newColsLeft[i], kind, df.Length);
                            df.GetColumn(col).Set(DataFrameMissingValue.GetMissingOrDefaultMissingValue(kind));
                        }
                        df.OrderColumns(newCols);
                        yield return(df);
                    }
                    contRight = iterRight.MoveNext();
                }
                else
                {
                    var dfLeft  = iterLeft.Current.Value.Copy();
                    var dfRight = iterRight.Current.Value.Copy();
                    if (!string.IsNullOrEmpty(leftSuffix))
                    {
                        dfLeft.RenameColumns(newColsLeft);
                    }
                    dfRight.RenameColumns(newColsRight);
                    var vleft  = dfLeft.Multiply(dfRight.Length, MultiplyStrategy.Block).Copy();
                    var vright = dfRight.Multiply(dfLeft.Length, MultiplyStrategy.Row).Copy();
                    for (int i = 0; i < vright.ColumnCount; ++i)
                    {
                        vleft.AddColumn(newColsRight[i], vright.GetColumn(i));
                    }
                    yield return(vleft);

                    contLeft  = iterLeft.MoveNext();
                    contRight = iterRight.MoveNext();
                }
            }
        }