Ejemplo n.º 1
0
        /// <summary>
        /// Joins the two specified enumerations of values into a single enumeration of paired values
        /// by applying the specified join strategy.
        /// </summary>
        /// <param name="joinStrategy">The join strategy to use.</param>
        /// <param name="values">An array of enumerations of values.</param>
        /// <returns>The resulting enumeration of paired values.</returns>
        /// <exception cref="ArgumentNullException">Thrown if <paramref name="values"/> is null.</exception>
        /// <seealso cref="Join(IEnumerable[])"/>
        /// <seealso cref="CombinatorialJoinAttribute"/>
        /// <seealso cref="SequentialJoinAttribute"/>
        /// <seealso cref="PairwiseJoinAttribute"/>
        public static IEnumerable <object[]> Join(JoinStrategy joinStrategy, params IEnumerable[] values)
        {
            int size      = values.Length;
            var binding   = new DataBinding(0, null);
            var bindings  = new DataBinding[size][];
            var providers = new List <IDataProvider>();

            for (int i = 0; i < size; i++)
            {
                providers.Add(new ValueSequenceDataSet(values[i], null, false));
                bindings[i] = new[] { binding };
            }

            foreach (var items in map[joinStrategy].Join(providers, bindings, false))
            {
                var array = new object[size];

                for (int i = 0; i < size; i++)
                {
                    array[i] = items[i].GetValue(binding);
                }

                yield return(array);
            }
        }
        static DataFrame RecJoin <T1, T2>(IDataFrameView left, IDataFrameView right, int[] icolsLeft, int[] icolsRight,
                                          string leftSuffix     = null, string rightSuffix      = null,
                                          JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
            where T1 : IEquatable <T1>, IComparable <T1>
            where T2 : IEquatable <T2>, IComparable <T2>
        {
            var kind = left.Kinds[icolsLeft[2]];

            if (icolsLeft.Length == 3)
            {
                switch (kind)
                {
                case DataKind.BL: return(left.TJoin <T1, T2, DvBool>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.I4: return(left.TJoin <T1, T2, DvInt4>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.U4: return(left.TJoin <T1, T2, uint>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.I8: return(left.TJoin <T1, T2, DvInt8>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.R4: return(left.TJoin <T1, T2, float>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.R8: return(left.TJoin <T1, T2, double>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.TX: return(left.TJoin <T1, T2, DvText>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                default:
                    throw new NotImplementedException($"Join is not implemented for type '{kind}'.");
                }
            }
            else
            {
                throw new NotImplementedException($"Join is not implemented for {icolsLeft.Length} columns.");
            }
        }
        static DataFrame RecJoin(IDataFrameView left, IDataFrameView right, int[] icolsLeft, int[] icolsRight,
                                 string leftSuffix     = null, string rightSuffix      = null,
                                 JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
        {
            var kind = left.Kinds[icolsLeft[0]];

            if (icolsLeft.Length == 1)
            {
                switch (kind)
                {
                case DataKind.BL: return(left.TJoin <DvBool>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.I4: return(left.TJoin <DvInt4>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.U4: return(left.TJoin <uint>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.I8: return(left.TJoin <DvInt8>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.R4: return(left.TJoin <float>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.R8: return(left.TJoin <double>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.TX: return(left.TJoin <DvText>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                default:
                    throw new NotImplementedException($"Join is not implemented for type '{kind}'.");
                }
            }
            else
            {
                switch (kind)
                {
                case DataKind.BL: return(RecJoin <DvBool>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.I4: return(RecJoin <DvInt4>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.U4: return(RecJoin <uint>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.I8: return(RecJoin <DvInt8>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.R4: return(RecJoin <float>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.R8: return(RecJoin <double>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                case DataKind.TX: return(RecJoin <DvText>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                default:
                    throw new NotImplementedException($"Join is not implemented for type '{kind}'.");
                }
            }
        }
 public static DataFrame Join(IDataFrameView left, IDataFrameView right,
                              IEnumerable <int> colsLeft, IEnumerable <int> colsRight,
                              string leftSuffix     = null, string rightSuffix      = null,
                              JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
 {
     int[] icolsLeft  = colsLeft.ToArray();
     int[] icolsRight = colsRight.ToArray();
     if (icolsRight.Length != icolsLeft.Length)
     {
         throw new DataValueError("Left and right must be joined with the same number of columns.");
     }
     for (int i = 0; i < icolsLeft.Length; ++i)
     {
         if (left.SchemaI.GetColumnType(icolsLeft[i]) != right.SchemaI.GetColumnType(icolsRight[i]))
         {
             throw new DataTypeError("Left and right must be joined with the same number of columns and the same types.");
         }
     }
     return(RecJoin(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));
 }
        public static DataFrame TJoin <TMutKey, TImutKey>(
            IDataFrameView left, IDataFrameView right,
            int[] rowsLeft, int[] rowsRight,
            int[] columnsLeft, int[] columnsRight,
            IEnumerable <int> colsLeft, IEnumerable <int> colsRight,
            bool sort,
            string leftSuffix, string rightSuffix,
            JoinStrategy joinType,
            MultiGetterAt <TMutKey> getterLeft,
            MultiGetterAt <TMutKey> getterRight,
            Func <TMutKey, TImutKey> conv,
            Func <TImutKey, DataFrameGroupKey[]> convLeft,
            Func <TImutKey, DataFrameGroupKey[]> convRight)
            where TMutKey : ITUple, new()
            where TImutKey : IComparable <TImutKey>, IEquatable <TImutKey>
        {
            var icolsLeft  = colsLeft.ToArray();
            var icolsRight = colsRight.ToArray();

            int[] orderLeft = rowsLeft == null?rowsLeft.Select(c => c).ToArray() : Enumerable.Range(0, left.Length).ToArray();

            int[] orderRight = rowsLeft == null?rowsRight.Select(c => c).ToArray() : Enumerable.Range(0, right.Length).ToArray();

            var keysLeft  = left.EnumerateItems(icolsLeft, true, rowsLeft, getterLeft).Select(c => conv(c)).ToArray();
            var keysRight = right.EnumerateItems(icolsRight, true, rowsRight, getterRight).Select(c => conv(c)).ToArray();

            if (sort)
            {
                DataFrameSorting.TSort(left, ref orderLeft, keysLeft, true);
                DataFrameSorting.TSort(right, ref orderRight, keysRight, true);
            }
            var iter = TJoin <TImutKey>(left, right, orderLeft, orderRight, keysLeft, keysRight,
                                        icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, convLeft, convRight);

            return(DataFrame.Concat(iter));
        }
Ejemplo n.º 6
0
 public DataFrame TJoin <T1, T2, T3>(IDataFrameView right, IEnumerable <int> colsLeft, IEnumerable <int> colsRight, string leftSuffix = null, string rightSuffix = null, JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
     where T1 : IEquatable <T1>, IComparable <T1>
     where T2 : IEquatable <T2>, IComparable <T2>
     where T3 : IEquatable <T3>, IComparable <T3>
 {
     return(new DataFrameView(this, null, null).TJoin <T1, T2, T3>(right, colsLeft, colsRight, leftSuffix, rightSuffix, joinType, sort));
 }
Ejemplo n.º 7
0
 public DataFrame Join(IDataFrameView right, IEnumerable <int> colsLeft, IEnumerable <int> colsRight,
                       string leftSuffix     = null, string rightSuffix      = null,
                       JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
 {
     return(new DataFrameView(this, null, null).Join(right, colsLeft, colsRight, leftSuffix, rightSuffix, joinType, sort));
 }
Ejemplo n.º 8
0
        public DataFrame TJoin <T1, T2, T3>(IDataFrameView right, IEnumerable <int> colsLeft, IEnumerable <int> colsRight, string leftSuffix = null, string rightSuffix = null, JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
            where T1 : IEquatable <T1>, IComparable <T1>
            where T2 : IEquatable <T2>, IComparable <T2>
            where T3 : IEquatable <T3>, IComparable <T3>
        {
            int[] orderLeft    = _rows.Select(c => c).ToArray();
            int[] orderRight   = (right as DataFrame) is null ? (right as DataFrameView)._rows.Select(c => c).ToArray() : null;
            int[] columnsRight = (right as DataFrame) is null ? (right as DataFrameView)._columns : null;
            var   icolsLeft    = colsLeft.ToArray();
            var   icolsRight   = colsRight.ToArray();
            var   scolsLeft    = icolsLeft.Select(c => Schema.GetColumnName(c)).ToArray();
            var   scolsRight   = icolsRight.Select(c => right.SchemaI.GetColumnName(c)).ToArray();

            return(DataFrameJoining.TJoin(this, right,
                                          orderLeft, orderRight,
                                          _columns, columnsRight,
                                          icolsLeft, icolsRight, sort,
                                          leftSuffix, rightSuffix,
                                          joinType,
                                          GetMultiGetterAt <T1, T2, T3>(icolsLeft),
                                          right.GetMultiGetterAt <T1, T2, T3>(icolsRight),
                                          ke => ke.ToImTuple(),
                                          ke => DataFrameGroupKey.Create(scolsLeft, ke),
                                          ke => DataFrameGroupKey.Create(scolsRight, ke)));
        }
Ejemplo n.º 9
0
 public DataFrame Join(IDataFrameView right, IEnumerable <string> colsLeft, IEnumerable <int> colsRight,
                       string leftSuffix     = null, string rightSuffix      = null,
                       JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
 {
     return(Join(right, colsLeft.Select(c => GetColumnIndex(c)), colsRight, leftSuffix, rightSuffix, joinType, sort));
 }
Ejemplo n.º 10
0
        static DataFrame RecJoin <T1>(IDataFrameView left, IDataFrameView right, int[] icolsLeft, int[] icolsRight,
                                      string leftSuffix     = null, string rightSuffix      = null,
                                      JoinStrategy joinType = JoinStrategy.Inner, bool sort = true)
            where T1 : IEquatable <T1>, IComparable <T1>
        {
            var kind = left.Kinds[icolsLeft[1]];

            if (icolsLeft.Length == 2)
            {
                if (kind.IsVector())
                {
                    throw new NotImplementedException();
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.Boolean: return(left.TJoin <T1, bool>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Int32: return(left.TJoin <T1, int>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.UInt32: return(left.TJoin <T1, uint>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Int64: return(left.TJoin <T1, long>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Single: return(left.TJoin <T1, float>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Double: return(left.TJoin <T1, double>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.String: return(left.TJoin <T1, DvText>(right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    default:
                        throw new NotImplementedException($"Join is not implemented for type '{kind}'.");
                    }
                }
            }
            else
            {
                if (kind.IsVector())
                {
                    throw new NotImplementedException();
                }
                else
                {
                    switch (kind.RawKind())
                    {
                    case DataKind.Boolean: return(RecJoin <T1, bool>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Int32: return(RecJoin <T1, int>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.UInt32: return(RecJoin <T1, uint>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Int64: return(RecJoin <T1, long>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Single: return(RecJoin <T1, float>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.Double: return(RecJoin <T1, double>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    case DataKind.String: return(RecJoin <T1, DvText>(left, right, icolsLeft, icolsRight, leftSuffix, rightSuffix, joinType, sort));

                    default:
                        throw new NotImplementedException($"Join is not implemented for type '{kind}'.");
                    }
                }
            }
        }
Ejemplo n.º 11
0
        public static IEnumerable <DataFrame> TJoin <TKey>(
            IDataFrameView left, IDataFrameView right,
            int[] orderLeft, int[] orderRight,
            TKey[] keysLeft, TKey[] keysRight,
            int[] icolsLeft, int[] icolsRight,
            string leftSuffix, string rightSuffix,
            JoinStrategy joinType,
            Func <TKey, DataFrameGroupKey[]> funcLeft,
            Func <TKey, DataFrameGroupKey[]> funcRight)
            where TKey : IEquatable <TKey>, IComparable <TKey>
        {
            var  groupLeft  = DataFrameGrouping.TGroupBy <TKey>(left, orderLeft, keysLeft, icolsLeft, funcLeft);
            var  groupRight = DataFrameGrouping.TGroupBy <TKey>(right, orderRight, keysRight, icolsRight, funcRight);
            var  iterLeft   = groupLeft.GetEnumerator();
            var  iterRight  = groupRight.GetEnumerator();
            bool contLeft   = iterLeft.MoveNext();
            bool contRight  = iterRight.MoveNext();

            leftSuffix  = string.IsNullOrEmpty(leftSuffix) ? string.Empty : leftSuffix;
            rightSuffix = string.IsNullOrEmpty(rightSuffix) ? string.Empty : rightSuffix;
            var newColsLeft  = left.Columns.Select(c => c + leftSuffix).ToArray();
            var newColsRight = right.Columns.Select(c => c + rightSuffix).ToArray();
            var existsCols   = new HashSet <string>(newColsLeft);

            for (int i = 0; i < newColsRight.Length; ++i)
            {
                while (existsCols.Contains(newColsRight[i]))
                {
                    newColsRight[i] += "_y";
                }
                existsCols.Add(newColsRight[i]);
            }
            var newCols = newColsLeft.Concat(newColsRight).ToArray();

            int r;

            while (contLeft || contRight)
            {
                r = contLeft && contRight
                    ? iterLeft.Current.Key.CompareTo(iterRight.Current.Key)
                    : (contRight ? 1 : -1);

                if (r < 0)
                {
                    if (joinType == JoinStrategy.Left || joinType == JoinStrategy.Outer)
                    {
                        var df = iterLeft.Current.Value.Copy();
                        if (!string.IsNullOrEmpty(leftSuffix))
                        {
                            df.RenameColumns(newColsLeft);
                        }
                        for (int i = 0; i < right.ColumnCount; ++i)
                        {
                            var kind = right.SchemaI.GetColumnType(i);
                            var col  = df.AddColumn(newColsRight[i], kind, df.Length);
                            df.GetColumn(col).Set(DataFrameMissingValue.GetMissingOrDefaultMissingValue(kind));
                        }
                        yield return(df);
                    }
                    contLeft = iterLeft.MoveNext();
                }
                else if (r > 0)
                {
                    if (joinType == JoinStrategy.Right || joinType == JoinStrategy.Outer)
                    {
                        var df = iterRight.Current.Value.Copy();
                        df.RenameColumns(newColsRight);
                        for (int i = 0; i < left.ColumnCount; ++i)
                        {
                            var kind = left.SchemaI.GetColumnType(i);
                            var col  = df.AddColumn(newColsLeft[i], kind, df.Length);
                            df.GetColumn(col).Set(DataFrameMissingValue.GetMissingOrDefaultMissingValue(kind));
                        }
                        df.OrderColumns(newCols);
                        yield return(df);
                    }
                    contRight = iterRight.MoveNext();
                }
                else
                {
                    var dfLeft  = iterLeft.Current.Value.Copy();
                    var dfRight = iterRight.Current.Value.Copy();
                    if (!string.IsNullOrEmpty(leftSuffix))
                    {
                        dfLeft.RenameColumns(newColsLeft);
                    }
                    dfRight.RenameColumns(newColsRight);
                    var vleft  = dfLeft.Multiply(dfRight.Length, MultiplyStrategy.Block).Copy();
                    var vright = dfRight.Multiply(dfLeft.Length, MultiplyStrategy.Row).Copy();
                    for (int i = 0; i < vright.ColumnCount; ++i)
                    {
                        vleft.AddColumn(newColsRight[i], vright.GetColumn(i));
                    }
                    yield return(vleft);

                    contLeft  = iterLeft.MoveNext();
                    contRight = iterRight.MoveNext();
                }
            }
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Joins the two specified enumerations of values into a single enumeration of paired values
        /// by applying the specified join strategy.
        /// </summary>
        /// <typeparam name="T1">The type of the values in the first enumeration.</typeparam>
        /// <typeparam name="T2">The type of the values in the first enumeration.</typeparam>
        /// <param name="values1">The first enumeration of values.</param>
        /// <param name="values2">The second enumeration of values.</param>
        /// <param name="joinStrategy">The join strategy to use.</param>
        /// <returns>The resulting enumeration of paired values.</returns>
        /// <exception cref="ArgumentNullException">Thrown if <paramref name="values1"/> or <paramref name="values2"/> is null.</exception>
        /// <seealso cref="Join{T1,T2}(IEnumerable{T1},IEnumerable{T2})"/>
        /// <seealso cref="CombinatorialJoinAttribute"/>
        /// <seealso cref="SequentialJoinAttribute"/>
        /// <seealso cref="PairwiseJoinAttribute"/>
        public static IEnumerable <Pair <T1, T2> > Join <T1, T2>(IEnumerable <T1> values1, IEnumerable <T2> values2, JoinStrategy joinStrategy)
        {
            var providers = new List <IDataProvider>
            {
                new ValueSequenceDataSet(values1, null, false),
                new ValueSequenceDataSet(values2, null, false)
            };

            var binding  = new DataBinding(0, null);
            var bindings = new[] { new[] { binding }, new[] { binding } };

            foreach (var items in map[joinStrategy].Join(providers, bindings, false))
            {
                yield return(new Pair <T1, T2>(
                                 (T1)items[0].GetValue(binding),
                                 (T2)items[1].GetValue(binding)));
            }
        }