Beispiel #1
0
        /// <summary>
        /// Creates a getter on the column. The getter returns the element at
        /// cursor.Position.
        /// </summary>
        public ValueGetter <DType2> GetGetter <DType2>(IRowCursor cursor)
        {
            var _data2  = _data as DType2[];
            var missing = DataFrameMissingValue.GetMissingValue(Kind);

            return((ref DType2 value) =>
            {
                value = cursor.Position < _data.LongLength
                        ? _data2[cursor.Position]
                        : (DType2)missing;
            });
        }
        public static IEnumerable <DataFrame> TJoin <TKey>(
            IDataFrameView left, IDataFrameView right,
            int[] orderLeft, int[] orderRight,
            TKey[] keysLeft, TKey[] keysRight,
            int[] icolsLeft, int[] icolsRight,
            string leftSuffix, string rightSuffix,
            JoinStrategy joinType,
            Func <TKey, DataFrameGroupKey[]> funcLeft,
            Func <TKey, DataFrameGroupKey[]> funcRight)
            where TKey : IEquatable <TKey>, IComparable <TKey>
        {
            var  groupLeft  = DataFrameGrouping.TGroupBy <TKey>(left, orderLeft, keysLeft, icolsLeft, funcLeft);
            var  groupRight = DataFrameGrouping.TGroupBy <TKey>(right, orderRight, keysRight, icolsRight, funcRight);
            var  iterLeft   = groupLeft.GetEnumerator();
            var  iterRight  = groupRight.GetEnumerator();
            bool contLeft   = iterLeft.MoveNext();
            bool contRight  = iterRight.MoveNext();

            leftSuffix  = string.IsNullOrEmpty(leftSuffix) ? string.Empty : leftSuffix;
            rightSuffix = string.IsNullOrEmpty(rightSuffix) ? string.Empty : rightSuffix;
            var newColsLeft  = left.Columns.Select(c => c + leftSuffix).ToArray();
            var newColsRight = right.Columns.Select(c => c + rightSuffix).ToArray();
            var existsCols   = new HashSet <string>(newColsLeft);

            for (int i = 0; i < newColsRight.Length; ++i)
            {
                while (existsCols.Contains(newColsRight[i]))
                {
                    newColsRight[i] += "_y";
                }
                existsCols.Add(newColsRight[i]);
            }
            var newCols = newColsLeft.Concat(newColsRight).ToArray();

            int r;

            while (contLeft || contRight)
            {
                r = contLeft && contRight
                    ? iterLeft.Current.Key.CompareTo(iterRight.Current.Key)
                    : (contRight ? 1 : -1);

                if (r < 0)
                {
                    if (joinType == JoinStrategy.Left || joinType == JoinStrategy.Outer)
                    {
                        var df = iterLeft.Current.Value.Copy();
                        if (!string.IsNullOrEmpty(leftSuffix))
                        {
                            df.RenameColumns(newColsLeft);
                        }
                        for (int i = 0; i < right.ColumnCount; ++i)
                        {
                            var kind = right.Schema.GetColumnType(i).RawKind;
                            var col  = df.AddColumn(newColsRight[i], kind, df.Length);
                            df.GetColumn(col).Set(DataFrameMissingValue.GetMissingValue(kind));
                        }
                        yield return(df);
                    }
                    contLeft = iterLeft.MoveNext();
                }
                else if (r > 0)
                {
                    if (joinType == JoinStrategy.Right || joinType == JoinStrategy.Outer)
                    {
                        var df = iterRight.Current.Value.Copy();
                        df.RenameColumns(newColsRight);
                        for (int i = 0; i < left.ColumnCount; ++i)
                        {
                            var kind = left.Schema.GetColumnType(i).RawKind;
                            var col  = df.AddColumn(newColsLeft[i], kind, df.Length);
                            df.GetColumn(col).Set(DataFrameMissingValue.GetMissingValue(kind));
                        }
                        df.OrderColumns(newCols);
                        yield return(df);
                    }
                    contRight = iterRight.MoveNext();
                }
                else
                {
                    var dfLeft  = iterLeft.Current.Value.Copy();
                    var dfRight = iterRight.Current.Value.Copy();
                    if (!string.IsNullOrEmpty(leftSuffix))
                    {
                        dfLeft.RenameColumns(newColsLeft);
                    }
                    dfRight.RenameColumns(newColsRight);
                    var vleft  = dfLeft.Multiply(dfRight.Length, MultiplyStrategy.Block).Copy();
                    var vright = dfRight.Multiply(dfLeft.Length, MultiplyStrategy.Row).Copy();
                    for (int i = 0; i < vright.ColumnCount; ++i)
                    {
                        vleft.AddColumn(newColsRight[i], vright.GetColumn(i));
                    }
                    yield return(vleft);

                    contLeft  = iterLeft.MoveNext();
                    contRight = iterRight.MoveNext();
                }
            }
        }