Esempio n. 1
0
        public DataFrame Join(DataFrame other, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left)
        {
            DataFrame ret = new DataFrame();

            if (joinAlgorithm == JoinAlgorithm.Left)
            {
                for (int i = 0; i < ColumnCount; i++)
                {
                    BaseColumn newColumn = Column(i).Clone();
                    ret.InsertColumn(ret.ColumnCount, newColumn);
                }
                long minLength = Math.Min(RowCount, other.RowCount);
                PrimitiveColumn <long> mapIndices = new PrimitiveColumn <long>("mapIndices", minLength);
                for (long i = 0; i < minLength; i++)
                {
                    mapIndices[i] = i;
                }
                for (int i = 0; i < other.ColumnCount; i++)
                {
                    BaseColumn newColumn;
                    if (other.RowCount < RowCount)
                    {
                        newColumn = other.Column(i).Clone(numberOfNullsToAppend: RowCount - other.RowCount);
                    }
                    else
                    {
                        newColumn = other.Column(i).Clone(mapIndices);
                    }
                    SetSuffixForDuplicatedColumnNames(ret, newColumn, leftSuffix, rightSuffix);
                    ret.InsertColumn(ret.ColumnCount, newColumn);
                }
            }
            else if (joinAlgorithm == JoinAlgorithm.Right)
            {
                long minLength = Math.Min(RowCount, other.RowCount);
                PrimitiveColumn <long> mapIndices = new PrimitiveColumn <long>("mapIndices", minLength);
                for (long i = 0; i < minLength; i++)
                {
                    mapIndices[i] = i;
                }
                for (int i = 0; i < ColumnCount; i++)
                {
                    BaseColumn newColumn;
                    if (RowCount < other.RowCount)
                    {
                        newColumn = Column(i).Clone(numberOfNullsToAppend: other.RowCount - RowCount);
                    }
                    else
                    {
                        newColumn = Column(i).Clone(mapIndices);
                    }
                    ret.InsertColumn(ret.ColumnCount, newColumn);
                }
                for (int i = 0; i < other.ColumnCount; i++)
                {
                    BaseColumn newColumn = other.Column(i).Clone();
                    SetSuffixForDuplicatedColumnNames(ret, newColumn, leftSuffix, rightSuffix);
                    ret.InsertColumn(ret.ColumnCount, newColumn);
                }
            }
            else if (joinAlgorithm == JoinAlgorithm.FullOuter)
            {
                long newRowCount   = Math.Max(RowCount, other.RowCount);
                long numberOfNulls = newRowCount - RowCount;
                for (int i = 0; i < ColumnCount; i++)
                {
                    BaseColumn newColumn = Column(i).Clone(numberOfNullsToAppend: numberOfNulls);
                    ret.InsertColumn(ret.ColumnCount, newColumn);
                }
                numberOfNulls = newRowCount - other.RowCount;
                for (int i = 0; i < other.ColumnCount; i++)
                {
                    BaseColumn newColumn = other.Column(i).Clone(numberOfNullsToAppend: numberOfNulls);
                    SetSuffixForDuplicatedColumnNames(ret, newColumn, leftSuffix, rightSuffix);
                    ret.InsertColumn(ret.ColumnCount, newColumn);
                }
            }
            else if (joinAlgorithm == JoinAlgorithm.Inner)
            {
                long newRowCount = Math.Min(RowCount, other.RowCount);
                PrimitiveColumn <long> mapIndices = new PrimitiveColumn <long>("mapIndices", newRowCount);
                for (long i = 0; i < newRowCount; i++)
                {
                    mapIndices[i] = i;
                }
                for (int i = 0; i < ColumnCount; i++)
                {
                    BaseColumn newColumn = Column(i).Clone(mapIndices);
                    ret.InsertColumn(ret.ColumnCount, newColumn);
                }
                for (int i = 0; i < other.ColumnCount; i++)
                {
                    BaseColumn newColumn = other.Column(i).Clone(mapIndices);
                    SetSuffixForDuplicatedColumnNames(ret, newColumn, leftSuffix, rightSuffix);
                    ret.InsertColumn(ret.ColumnCount, newColumn);
                }
            }
            return(ret);
        }
Esempio n. 2
0
        private void VerifyJoin(DataFrame join, DataFrame left, DataFrame right, JoinAlgorithm joinAlgorithm)
        {
            PrimitiveColumn <long> mapIndices = new PrimitiveColumn <long>("map", join.RowCount);

            for (long i = 0; i < join.RowCount; i++)
            {
                mapIndices[i] = i;
            }
            for (int i = 0; i < join.ColumnCount; i++)
            {
                BaseColumn joinColumn = join.Column(i);
                BaseColumn isEqual;

                if (joinAlgorithm == JoinAlgorithm.Left)
                {
                    if (i < left.ColumnCount)
                    {
                        BaseColumn leftColumn = left.Column(i);
                        isEqual = joinColumn == leftColumn;
                    }
                    else
                    {
                        int        columnIndex   = i - left.ColumnCount;
                        BaseColumn rightColumn   = right.Column(columnIndex);
                        BaseColumn compareColumn = rightColumn.Length <= join.RowCount ? rightColumn.Clone(numberOfNullsToAppend: join.RowCount - rightColumn.Length) : rightColumn.Clone(mapIndices);
                        isEqual = joinColumn == compareColumn;
                    }
                }
                else if (joinAlgorithm == JoinAlgorithm.Right)
                {
                    if (i < left.ColumnCount)
                    {
                        BaseColumn leftColumn    = left.Column(i);
                        BaseColumn compareColumn = leftColumn.Length <= join.RowCount ? leftColumn.Clone(numberOfNullsToAppend: join.RowCount - leftColumn.Length) : leftColumn.Clone(mapIndices);
                        isEqual = joinColumn == compareColumn;
                    }
                    else
                    {
                        int        columnIndex = i - left.ColumnCount;
                        BaseColumn rightColumn = right.Column(columnIndex);
                        isEqual = joinColumn == rightColumn;
                    }
                }
                else if (joinAlgorithm == JoinAlgorithm.Inner)
                {
                    if (i < left.ColumnCount)
                    {
                        BaseColumn leftColumn = left.Column(i);
                        isEqual = joinColumn == leftColumn.Clone(mapIndices);
                    }
                    else
                    {
                        int        columnIndex = i - left.ColumnCount;
                        BaseColumn rightColumn = right.Column(columnIndex);
                        isEqual = joinColumn == rightColumn.Clone(mapIndices);
                    }
                }
                else
                {
                    if (i < left.ColumnCount)
                    {
                        BaseColumn leftColumn = left.Column(i);
                        isEqual = joinColumn == leftColumn.Clone(numberOfNullsToAppend: join.RowCount - leftColumn.Length);
                    }
                    else
                    {
                        int        columnIndex = i - left.ColumnCount;
                        BaseColumn rightColumn = right.Column(columnIndex);
                        isEqual = joinColumn == rightColumn.Clone(numberOfNullsToAppend: join.RowCount - rightColumn.Length);
                    }
                }
                for (int j = 0; j < join.RowCount; j++)
                {
                    Assert.Equal(true, isEqual[j]);
                }
            }
        }
Esempio n. 3
0
        // TODO: Merge API with an "On" parameter that merges on a column common to 2 dataframes

        /// <summary>
        /// Merge DataFrames with a database style join
        /// </summary>
        /// <param name="other"></param>
        /// <param name="leftJoinColumn"></param>
        /// <param name="rightJoinColumn"></param>
        /// <param name="leftSuffix"></param>
        /// <param name="rightSuffix"></param>
        /// <param name="joinAlgorithm"></param>
        /// <returns></returns>
        public DataFrame Merge <TKey>(DataFrame other, string leftJoinColumn, string rightJoinColumn, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left)
        {
            // A simple hash join
            DataFrame ret            = new DataFrame();
            DataFrame leftDataFrame  = this;
            DataFrame rightDataFrame = other;

            // The final table size is not known until runtime
            long rowNumber = 0;
            PrimitiveColumn <long> leftRowIndices  = new PrimitiveColumn <long>("LeftIndices");
            PrimitiveColumn <long> rightRowIndices = new PrimitiveColumn <long>("RightIndices");

            if (joinAlgorithm == JoinAlgorithm.Left)
            {
                // First hash other dataframe on the rightJoinColumn
                BaseColumn otherColumn = other[rightJoinColumn];
                Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>();

                // Go over the records in this dataframe and match with the dictionary
                BaseColumn thisColumn = this[leftJoinColumn];

                for (long i = 0; i < thisColumn.Length; i++)
                {
                    var  thisColumnValue          = thisColumn[i];
                    TKey thisColumnValueOrDefault = (TKey)(thisColumnValue == null ? default(TKey) : thisColumnValue);
                    if (multimap.TryGetValue(thisColumnValueOrDefault, out ICollection <long> rowNumbers))
                    {
                        foreach (long row in rowNumbers)
                        {
                            if (thisColumnValue == null)
                            {
                                // Match only with nulls in otherColumn
                                if (otherColumn[row] == null)
                                {
                                    leftRowIndices.Append(i);
                                    rightRowIndices.Append(row);
                                }
                            }
                            else
                            {
                                // Cannot match nulls in otherColumn
                                if (otherColumn[row] != null)
                                {
                                    leftRowIndices.Append(i);
                                    rightRowIndices.Append(row);
                                }
                            }
                        }
                    }
                    else
                    {
                        leftRowIndices.Append(i);
                        rightRowIndices.Append(null);
                    }
                }
            }
            else if (joinAlgorithm == JoinAlgorithm.Right)
            {
                BaseColumn thisColumn = this[leftJoinColumn];
                Dictionary <TKey, ICollection <long> > multimap = thisColumn.GroupColumnValues <TKey>();

                BaseColumn otherColumn = other[rightJoinColumn];
                for (long i = 0; i < otherColumn.Length; i++)
                {
                    var  otherColumnValue          = otherColumn[i];
                    TKey otherColumnValueOrDefault = (TKey)(otherColumnValue == null ? default(TKey) : otherColumnValue);
                    if (multimap.TryGetValue(otherColumnValueOrDefault, out ICollection <long> rowNumbers))
                    {
                        foreach (long row in rowNumbers)
                        {
                            if (otherColumnValue == null)
                            {
                                if (thisColumn[row] == null)
                                {
                                    leftRowIndices.Append(row);
                                    rightRowIndices.Append(i);
                                }
                            }
                            else
                            {
                                if (thisColumn[row] != null)
                                {
                                    leftRowIndices.Append(row);
                                    rightRowIndices.Append(i);
                                }
                            }
                        }
                    }
                    else
                    {
                        leftRowIndices.Append(null);
                        rightRowIndices.Append(i);
                    }
                }
            }
            else if (joinAlgorithm == JoinAlgorithm.Inner)
            {
                // Hash the column with the smaller RowCount
                long       leftRowCount     = RowCount;
                long       rightRowCount    = other.RowCount;
                DataFrame  longerDataFrame  = leftRowCount < rightRowCount ? other : this;
                DataFrame  shorterDataFrame = ReferenceEquals(longerDataFrame, this) ? other : this;
                BaseColumn hashColumn       = (leftRowCount < rightRowCount) ? this[leftJoinColumn] : other[rightJoinColumn];
                BaseColumn otherColumn      = ReferenceEquals(hashColumn, this[leftJoinColumn]) ? other[rightJoinColumn] : this[leftJoinColumn];
                Dictionary <TKey, ICollection <long> > multimap = hashColumn.GroupColumnValues <TKey>();

                for (long i = 0; i < otherColumn.Length; i++)
                {
                    var  otherColumnValue          = otherColumn[i];
                    TKey otherColumnValueOrDefault = (TKey)(otherColumnValue == null ? default(TKey) : otherColumnValue);
                    if (multimap.TryGetValue(otherColumnValueOrDefault, out ICollection <long> rowNumbers))
                    {
                        foreach (long row in rowNumbers)
                        {
                            if (otherColumnValue == null)
                            {
                                if (hashColumn[row] == null)
                                {
                                    leftRowIndices.Append(row);
                                    rightRowIndices.Append(i);
                                }
                            }
                            else
                            {
                                if (hashColumn[row] != null)
                                {
                                    leftRowIndices.Append(row);
                                    rightRowIndices.Append(i);
                                }
                            }
                        }
                    }
                }
                leftDataFrame  = shorterDataFrame;
                rightDataFrame = longerDataFrame;
            }
            else if (joinAlgorithm == JoinAlgorithm.FullOuter)
            {
                BaseColumn otherColumn = other[rightJoinColumn];
                Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>();
                Dictionary <TKey, long> intersection            = new Dictionary <TKey, long>(EqualityComparer <TKey> .Default);

                // Go over the records in this dataframe and match with the dictionary
                BaseColumn thisColumn = this[leftJoinColumn];

                for (long i = 0; i < thisColumn.Length; i++)
                {
                    var  thisColumnValue          = thisColumn[i];
                    TKey thisColumnValueOrDefault = (TKey)(thisColumnValue == null ? default(TKey) : thisColumnValue);
                    if (multimap.TryGetValue(thisColumnValueOrDefault, out ICollection <long> rowNumbers))
                    {
                        foreach (long row in rowNumbers)
                        {
                            if (thisColumnValue == null)
                            {
                                // Has to match only with nulls in otherColumn
                                if (otherColumn[row] == null)
                                {
                                    leftRowIndices.Append(i);
                                    rightRowIndices.Append(row);
                                    if (!intersection.ContainsKey(thisColumnValueOrDefault))
                                    {
                                        intersection.Add(thisColumnValueOrDefault, rowNumber);
                                    }
                                }
                            }
                            else
                            {
                                // Cannot match to nulls in otherColumn
                                if (otherColumn[row] != null)
                                {
                                    leftRowIndices.Append(i);
                                    rightRowIndices.Append(row);
                                    if (!intersection.ContainsKey(thisColumnValueOrDefault))
                                    {
                                        intersection.Add(thisColumnValueOrDefault, rowNumber);
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        leftRowIndices.Append(i);
                        rightRowIndices.Append(null);
                    }
                }
                for (long i = 0; i < otherColumn.Length; i++)
                {
                    TKey value = (TKey)(otherColumn[i] ?? default(TKey));
                    if (!intersection.ContainsKey(value))
                    {
                        leftRowIndices.Append(null);
                        rightRowIndices.Append(i);
                    }
                }
            }
            else
            {
                throw new NotImplementedException(nameof(joinAlgorithm));
            }

            for (int i = 0; i < leftDataFrame.ColumnCount; i++)
            {
                ret.InsertColumn(i, leftDataFrame.Column(i).Clone(leftRowIndices));
            }
            for (int i = 0; i < rightDataFrame.ColumnCount; i++)
            {
                BaseColumn column = rightDataFrame.Column(i).Clone(rightRowIndices);
                SetSuffixForDuplicatedColumnNames(ret, column, leftSuffix, rightSuffix);
                ret.InsertColumn(ret.ColumnCount, column);
            }
            return(ret);
        }
        public DataFrame Merge(DataFrame other, string[] leftJoinColumns, string[] rightJoinColumns, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left)
        {
            if (other == null)
            {
                throw new ArgumentNullException(nameof(other));
            }

            //In Outer join the joined dataframe retains each row — even if no other matching row exists in supplementary dataframe.
            //Outer joins subdivide further into left outer joins (left dataframe is retained), right outer joins (rightdataframe is retained), in full outer both are retained

            PrimitiveDataFrameColumn <long> retainedRowIndices;
            PrimitiveDataFrameColumn <long> supplementaryRowIndices;
            DataFrame supplementaryDataFrame;
            DataFrame retainedDataFrame;
            bool      isLeftDataFrameRetained;

            if (joinAlgorithm == JoinAlgorithm.Left || joinAlgorithm == JoinAlgorithm.Right)
            {
                isLeftDataFrameRetained = (joinAlgorithm == JoinAlgorithm.Left);

                supplementaryDataFrame = isLeftDataFrameRetained ? other : this;
                var supplementaryJoinColumns = isLeftDataFrameRetained ? rightJoinColumns : leftJoinColumns;

                retainedDataFrame = isLeftDataFrameRetained ? this : other;
                var retainedJoinColumns = isLeftDataFrameRetained ? leftJoinColumns : rightJoinColumns;

                Merge(retainedDataFrame, supplementaryDataFrame, retainedJoinColumns, supplementaryJoinColumns, out retainedRowIndices, out supplementaryRowIndices);
            }
            else if (joinAlgorithm == JoinAlgorithm.Inner)
            {
                // use as supplementary (for Hashing) the dataframe with the smaller RowCount
                isLeftDataFrameRetained = (Rows.Count > other.Rows.Count);

                supplementaryDataFrame = isLeftDataFrameRetained ? other : this;
                var supplementaryJoinColumns = isLeftDataFrameRetained ? rightJoinColumns : leftJoinColumns;

                retainedDataFrame = isLeftDataFrameRetained ? this : other;
                var retainedJoinColumns = isLeftDataFrameRetained ? leftJoinColumns : rightJoinColumns;

                Merge(retainedDataFrame, supplementaryDataFrame, retainedJoinColumns, supplementaryJoinColumns, out retainedRowIndices, out supplementaryRowIndices, true);
            }
            else if (joinAlgorithm == JoinAlgorithm.FullOuter)
            {
                //In full outer join we would like to retain data from both side, so we do it into 2 steps: one first we do LEFT JOIN and then add lost data from the RIGHT side

                //Step 1
                //Do LEFT JOIN
                isLeftDataFrameRetained = true;

                supplementaryDataFrame = isLeftDataFrameRetained ? other : this;
                var supplementaryJoinColumns = isLeftDataFrameRetained ? rightJoinColumns : leftJoinColumns;

                retainedDataFrame = isLeftDataFrameRetained ? this : other;
                var retainedJoinColumns = isLeftDataFrameRetained ? leftJoinColumns : rightJoinColumns;

                var intersection = Merge(retainedDataFrame, supplementaryDataFrame, retainedJoinColumns, supplementaryJoinColumns, out retainedRowIndices, out supplementaryRowIndices, calculateIntersection: true);

                //Step 2
                //Do RIGHT JOIN to retain all data from supplementary DataFrame too (take into account data intersection from the first step to avoid duplicates)
                for (long i = 0; i < supplementaryDataFrame.Columns.RowCount; i++)
                {
                    var columns = supplementaryJoinColumns.Select(name => supplementaryDataFrame.Columns[name]).ToArray();
                    if (!IsAnyNullValueInColumns(columns, i))
                    {
                        if (!intersection.Contains(i))
                        {
                            retainedRowIndices.Append(null);
                            supplementaryRowIndices.Append(i);
                        }
                    }
                }
            }
            else
            {
                throw new NotImplementedException(nameof(joinAlgorithm));
            }

            DataFrame ret = new DataFrame();

            //insert columns from left dataframe (this)
            for (int i = 0; i < this.Columns.Count; i++)
            {
                ret.Columns.Insert(i, this.Columns[i].Clone(isLeftDataFrameRetained ? retainedRowIndices : supplementaryRowIndices));
            }

            //insert columns from right dataframe (other)
            for (int i = 0; i < other.Columns.Count; i++)
            {
                DataFrameColumn column = other.Columns[i].Clone(isLeftDataFrameRetained ? supplementaryRowIndices : retainedRowIndices);
                SetSuffixForDuplicatedColumnNames(ret, column, leftSuffix, rightSuffix);
                ret.Columns.Insert(ret.Columns.Count, column);
            }
            return(ret);
        }
 /// <summary>
 /// Merge DataFrames with a database style join (for backward compatibility)
 /// </summary>
 /// <param name="other"></param>
 /// <param name="leftJoinColumn"></param>
 /// <param name="rightJoinColumn"></param>
 /// <param name="leftSuffix"></param>
 /// <param name="rightSuffix"></param>
 /// <param name="joinAlgorithm"></param>
 /// <returns></returns>
 public DataFrame Merge <TKey>(DataFrame other, string leftJoinColumn, string rightJoinColumn, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left)
 {
     return(Merge(other, new[] { leftJoinColumn }, new[] { rightJoinColumn }, leftSuffix, rightSuffix, joinAlgorithm));
 }
Esempio n. 6
0
        // TODO: Merge API with an "On" parameter that merges on a column common to 2 dataframes

        /// <summary>
        /// Merge DataFrames with a database style join
        /// </summary>
        /// <param name="other"></param>
        /// <param name="leftJoinColumn"></param>
        /// <param name="rightJoinColumn"></param>
        /// <param name="leftSuffix"></param>
        /// <param name="rightSuffix"></param>
        /// <param name="joinAlgorithm"></param>
        /// <returns></returns>
        public DataFrame Merge <TKey>(DataFrame other, string leftJoinColumn, string rightJoinColumn, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left)
        {
            // A simple hash join
            DataFrame ret = new DataFrame();
            PrimitiveColumn <long> emptyMap = new PrimitiveColumn <long>("Empty");

            for (int i = 0; i < ColumnCount; i++)
            {
                // Create empty columns
                BaseColumn column = Column(i).Clone(emptyMap);
                ret.InsertColumn(ret.ColumnCount, column);
            }

            for (int i = 0; i < other.ColumnCount; i++)
            {
                // Create empty columns
                BaseColumn column = other.Column(i).Clone(emptyMap);
                SetSuffixForDuplicatedColumnNames(ret, column, leftSuffix, rightSuffix);
                ret.InsertColumn(ret.ColumnCount, column);
            }

            // The final table size is not known until runtime
            long rowNumber = 0;

            if (joinAlgorithm == JoinAlgorithm.Left)
            {
                // First hash other dataframe on the rightJoinColumn
                BaseColumn otherColumn = other[rightJoinColumn];
                Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>();

                // Go over the records in this dataframe and match with the dictionary
                BaseColumn thisColumn = this[leftJoinColumn];
                for (int c = 0; c < ret.ColumnCount; c++)
                {
                    ret.Column(c).Resize(thisColumn.Length);
                }

                for (long i = 0; i < thisColumn.Length; i++)
                {
                    if (rowNumber >= thisColumn.Length)
                    {
                        for (int c = 0; c < ret.ColumnCount; c++)
                        {
                            ret.Column(c).Resize(rowNumber + 1);
                        }
                    }
                    TKey value = (TKey)(thisColumn[i] ?? default(TKey));
                    if (multimap.TryGetValue(value, out ICollection <long> rowNumbers))
                    {
                        foreach (long row in rowNumbers)
                        {
                            if (thisColumn[i] == null)
                            {
                                // Match only with nulls in otherColumn
                                if (otherColumn[row] == null)
                                {
                                    AppendForMerge(ret, rowNumber++, this, other, i, row);
                                }
                            }
                            else
                            {
                                // Cannot match nulls in otherColumn
                                if (otherColumn[row] != null)
                                {
                                    AppendForMerge(ret, rowNumber++, this, other, i, row);
                                }
                            }
                        }
                    }
                    else
                    {
                        AppendForMerge(ret, rowNumber++, this, other, i, -1);
                    }
                }
                ret._table.RowCount = rowNumber;
            }
            else if (joinAlgorithm == JoinAlgorithm.Right)
            {
                BaseColumn thisColumn = this[leftJoinColumn];
                Dictionary <TKey, ICollection <long> > multimap = thisColumn.GroupColumnValues <TKey>();

                BaseColumn otherColumn = other[rightJoinColumn];
                for (int c = 0; c < ret.ColumnCount; c++)
                {
                    ret.Column(c).Resize(otherColumn.Length);
                }

                for (long i = 0; i < otherColumn.Length; i++)
                {
                    if (rowNumber >= otherColumn.Length)
                    {
                        for (int c = 0; c < ret.ColumnCount; c++)
                        {
                            ret.Column(c).Resize(rowNumber + 1);
                        }
                    }
                    TKey value = (TKey)(otherColumn[i] ?? default(TKey));
                    if (multimap.TryGetValue(value, out ICollection <long> rowNumbers))
                    {
                        foreach (long row in rowNumbers)
                        {
                            if (otherColumn[i] == null)
                            {
                                if (thisColumn[row] == null)
                                {
                                    AppendForMerge(ret, rowNumber++, this, other, row, i);
                                }
                            }
                            else
                            {
                                if (thisColumn[row] != null)
                                {
                                    AppendForMerge(ret, rowNumber++, this, other, row, i);
                                }
                            }
                        }
                    }
                    else
                    {
                        AppendForMerge(ret, rowNumber++, this, other, -1, i);
                    }
                }
                ret._table.RowCount = rowNumber;
            }
            else if (joinAlgorithm == JoinAlgorithm.Inner)
            {
                // Hash the column with the smaller RowCount
                long       leftRowCount     = RowCount;
                long       rightRowCount    = other.RowCount;
                DataFrame  longerDataFrame  = leftRowCount < rightRowCount ? other : this;
                DataFrame  shorterDataFrame = ReferenceEquals(longerDataFrame, this) ? other : this;
                BaseColumn hashColumn       = (leftRowCount < rightRowCount) ? this[leftJoinColumn] : other[rightJoinColumn];
                BaseColumn otherColumn      = ReferenceEquals(hashColumn, this[leftJoinColumn]) ? other[rightJoinColumn] : this[leftJoinColumn];
                Dictionary <TKey, ICollection <long> > multimap = hashColumn.GroupColumnValues <TKey>();

                for (int c = 0; c < ret.ColumnCount; c++)
                {
                    ret.Column(c).Resize(1);
                }

                for (long i = 0; i < otherColumn.Length; i++)
                {
                    if (rowNumber >= ret.Column(0).Length)
                    {
                        for (int c = 0; c < ret.ColumnCount; c++)
                        {
                            ret.Column(c).Resize(rowNumber + 1);
                        }
                    }
                    TKey value = (TKey)(otherColumn[i] ?? default(TKey));
                    if (multimap.TryGetValue(value, out ICollection <long> rowNumbers))
                    {
                        foreach (long row in rowNumbers)
                        {
                            if (otherColumn[i] == null)
                            {
                                if (hashColumn[row] == null)
                                {
                                    AppendForMerge(ret, rowNumber++, this, other, ReferenceEquals(this, shorterDataFrame) ? row : i, ReferenceEquals(this, shorterDataFrame) ? i : row);
                                }
                            }
                            else
                            {
                                if (hashColumn[row] != null)
                                {
                                    AppendForMerge(ret, rowNumber++, this, other, ReferenceEquals(this, shorterDataFrame) ? row : i, ReferenceEquals(this, shorterDataFrame) ? i : row);
                                }
                            }
                        }
                    }
                }
                ret._table.RowCount = rowNumber;
            }
            else if (joinAlgorithm == JoinAlgorithm.FullOuter)
            {
                BaseColumn otherColumn = other[rightJoinColumn];
                Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>();
                Dictionary <TKey, long> intersection            = new Dictionary <TKey, long>(EqualityComparer <TKey> .Default);

                // Go over the records in this dataframe and match with the dictionary
                BaseColumn thisColumn = this[leftJoinColumn];
                for (int c = 0; c < ret.ColumnCount; c++)
                {
                    ret.Column(c).Resize(thisColumn.Length + 1);
                }

                for (long i = 0; i < thisColumn.Length; i++)
                {
                    if (rowNumber >= thisColumn.Length)
                    {
                        for (int c = 0; c < ret.ColumnCount; c++)
                        {
                            ret.Column(c).Resize(rowNumber + 1);
                        }
                    }
                    TKey value = (TKey)(thisColumn[i] ?? default(TKey));
                    if (multimap.TryGetValue(value, out ICollection <long> rowNumbers))
                    {
                        foreach (long row in rowNumbers)
                        {
                            if (thisColumn[i] == null)
                            {
                                // Has to match only with nulls in otherColumn
                                if (otherColumn[row] == null)
                                {
                                    AppendForMerge(ret, rowNumber++, this, other, i, row);
                                    if (!intersection.ContainsKey(value))
                                    {
                                        intersection.Add(value, rowNumber);
                                    }
                                }
                            }
                            else
                            {
                                // Cannot match to nulls in otherColumn
                                if (otherColumn[row] != null)
                                {
                                    AppendForMerge(ret, rowNumber++, this, other, i, row);
                                    if (!intersection.ContainsKey(value))
                                    {
                                        intersection.Add(value, rowNumber);
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        AppendForMerge(ret, rowNumber++, this, other, i, -1);
                    }
                }
                for (long i = 0; i < otherColumn.Length; i++)
                {
                    if (rowNumber >= ret.Column(0).Length)
                    {
                        for (int c = 0; c < ret.ColumnCount; c++)
                        {
                            ret.Column(c).Resize(rowNumber + 1);
                        }
                    }
                    TKey value = (TKey)(otherColumn[i] ?? default(TKey));
                    if (!intersection.ContainsKey(value))
                    {
                        if (rowNumber >= otherColumn.Length)
                        {
                            for (int c = 0; c < ret.ColumnCount; c++)
                            {
                                ret.Column(c).Resize(rowNumber + 1);
                            }
                        }
                        AppendForMerge(ret, rowNumber++, this, other, -1, i);
                    }
                }
                ret._table.RowCount = rowNumber;
            }
            return(ret);
        }
        // TODO: Merge API with an "On" parameter that merges on a column common to 2 dataframes

        /// <summary>
        /// Merge DataFrames with a database style join
        /// </summary>
        /// <param name="other"></param>
        /// <param name="leftJoinColumn"></param>
        /// <param name="rightJoinColumn"></param>
        /// <param name="leftSuffix"></param>
        /// <param name="rightSuffix"></param>
        /// <param name="joinAlgorithm"></param>
        /// <returns></returns>
        public DataFrame Merge <TKey>(DataFrame other, string leftJoinColumn, string rightJoinColumn, string leftSuffix = "_left", string rightSuffix = "_right", JoinAlgorithm joinAlgorithm = JoinAlgorithm.Left)
        {
            // A simple hash join
            DataFrame ret            = new DataFrame();
            DataFrame leftDataFrame  = this;
            DataFrame rightDataFrame = other;

            // The final table size is not known until runtime
            long rowNumber = 0;
            PrimitiveDataFrameColumn <long> leftRowIndices  = new PrimitiveDataFrameColumn <long>("LeftIndices");
            PrimitiveDataFrameColumn <long> rightRowIndices = new PrimitiveDataFrameColumn <long>("RightIndices");

            if (joinAlgorithm == JoinAlgorithm.Left)
            {
                // First hash other dataframe on the rightJoinColumn
                DataFrameColumn otherColumn = other.Columns[rightJoinColumn];
                Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>(out HashSet <long> otherColumnNullIndices);

                // Go over the records in this dataframe and match with the dictionary
                DataFrameColumn thisColumn = Columns[leftJoinColumn];

                for (long i = 0; i < thisColumn.Length; i++)
                {
                    var thisColumnValue = thisColumn[i];
                    if (thisColumnValue != null)
                    {
                        if (multimap.TryGetValue((TKey)thisColumnValue, out ICollection <long> rowNumbers))
                        {
                            foreach (long row in rowNumbers)
                            {
                                leftRowIndices.Append(i);
                                rightRowIndices.Append(row);
                            }
                        }
                        else
                        {
                            leftRowIndices.Append(i);
                            rightRowIndices.Append(null);
                        }
                    }
                    else
                    {
                        foreach (long row in otherColumnNullIndices)
                        {
                            leftRowIndices.Append(i);
                            rightRowIndices.Append(row);
                        }
                    }
                }
            }
            else if (joinAlgorithm == JoinAlgorithm.Right)
            {
                DataFrameColumn thisColumn = Columns[leftJoinColumn];
                Dictionary <TKey, ICollection <long> > multimap = thisColumn.GroupColumnValues <TKey>(out HashSet <long> thisColumnNullIndices);

                DataFrameColumn otherColumn = other.Columns[rightJoinColumn];
                for (long i = 0; i < otherColumn.Length; i++)
                {
                    var otherColumnValue = otherColumn[i];
                    if (otherColumnValue != null)
                    {
                        if (multimap.TryGetValue((TKey)otherColumnValue, out ICollection <long> rowNumbers))
                        {
                            foreach (long row in rowNumbers)
                            {
                                leftRowIndices.Append(row);
                                rightRowIndices.Append(i);
                            }
                        }
                        else
                        {
                            leftRowIndices.Append(null);
                            rightRowIndices.Append(i);
                        }
                    }
                    else
                    {
                        foreach (long thisColumnNullIndex in thisColumnNullIndices)
                        {
                            leftRowIndices.Append(thisColumnNullIndex);
                            rightRowIndices.Append(i);
                        }
                    }
                }
            }
            else if (joinAlgorithm == JoinAlgorithm.Inner)
            {
                // Hash the column with the smaller RowCount
                long leftRowCount  = Rows.Count;
                long rightRowCount = other.Rows.Count;

                bool            leftColumnIsSmaller             = leftRowCount <= rightRowCount;
                DataFrameColumn hashColumn                      = leftColumnIsSmaller ? Columns[leftJoinColumn] : other.Columns[rightJoinColumn];
                DataFrameColumn otherColumn                     = ReferenceEquals(hashColumn, Columns[leftJoinColumn]) ? other.Columns[rightJoinColumn] : Columns[leftJoinColumn];
                Dictionary <TKey, ICollection <long> > multimap = hashColumn.GroupColumnValues <TKey>(out HashSet <long> smallerDataFrameColumnNullIndices);

                for (long i = 0; i < otherColumn.Length; i++)
                {
                    var otherColumnValue = otherColumn[i];
                    if (otherColumnValue != null)
                    {
                        if (multimap.TryGetValue((TKey)otherColumnValue, out ICollection <long> rowNumbers))
                        {
                            foreach (long row in rowNumbers)
                            {
                                leftRowIndices.Append(leftColumnIsSmaller ? row : i);
                                rightRowIndices.Append(leftColumnIsSmaller ? i : row);
                            }
                        }
                    }
                    else
                    {
                        foreach (long nullIndex in smallerDataFrameColumnNullIndices)
                        {
                            leftRowIndices.Append(leftColumnIsSmaller ? nullIndex : i);
                            rightRowIndices.Append(leftColumnIsSmaller ? i : nullIndex);
                        }
                    }
                }
            }
            else if (joinAlgorithm == JoinAlgorithm.FullOuter)
            {
                DataFrameColumn otherColumn = other.Columns[rightJoinColumn];
                Dictionary <TKey, ICollection <long> > multimap = otherColumn.GroupColumnValues <TKey>(out HashSet <long> otherColumnNullIndices);
                Dictionary <TKey, long> intersection            = new Dictionary <TKey, long>(EqualityComparer <TKey> .Default);

                // Go over the records in this dataframe and match with the dictionary
                DataFrameColumn      thisColumn            = Columns[leftJoinColumn];
                Int64DataFrameColumn thisColumnNullIndices = new Int64DataFrameColumn("ThisColumnNullIndices");

                for (long i = 0; i < thisColumn.Length; i++)
                {
                    var thisColumnValue = thisColumn[i];
                    if (thisColumnValue != null)
                    {
                        if (multimap.TryGetValue((TKey)thisColumnValue, out ICollection <long> rowNumbers))
                        {
                            foreach (long row in rowNumbers)
                            {
                                leftRowIndices.Append(i);
                                rightRowIndices.Append(row);
                                if (!intersection.ContainsKey((TKey)thisColumnValue))
                                {
                                    intersection.Add((TKey)thisColumnValue, rowNumber);
                                }
                            }
                        }
                        else
                        {
                            leftRowIndices.Append(i);
                            rightRowIndices.Append(null);
                        }
                    }
                    else
                    {
                        thisColumnNullIndices.Append(i);
                    }
                }
                for (long i = 0; i < otherColumn.Length; i++)
                {
                    var value = otherColumn[i];
                    if (value != null)
                    {
                        if (!intersection.ContainsKey((TKey)value))
                        {
                            leftRowIndices.Append(null);
                            rightRowIndices.Append(i);
                        }
                    }
                }

                // Now handle the null rows
                foreach (long?thisColumnNullIndex in thisColumnNullIndices)
                {
                    foreach (long otherColumnNullIndex in otherColumnNullIndices)
                    {
                        leftRowIndices.Append(thisColumnNullIndex.Value);
                        rightRowIndices.Append(otherColumnNullIndex);
                    }
                    if (otherColumnNullIndices.Count == 0)
                    {
                        leftRowIndices.Append(thisColumnNullIndex.Value);
                        rightRowIndices.Append(null);
                    }
                }
                if (thisColumnNullIndices.Length == 0)
                {
                    foreach (long otherColumnNullIndex in otherColumnNullIndices)
                    {
                        leftRowIndices.Append(null);
                        rightRowIndices.Append(otherColumnNullIndex);
                    }
                }
            }
            else
            {
                throw new NotImplementedException(nameof(joinAlgorithm));
            }

            for (int i = 0; i < leftDataFrame.Columns.Count; i++)
            {
                ret.Columns.Insert(i, leftDataFrame.Columns[i].Clone(leftRowIndices));
            }
            for (int i = 0; i < rightDataFrame.Columns.Count; i++)
            {
                DataFrameColumn column = rightDataFrame.Columns[i].Clone(rightRowIndices);
                SetSuffixForDuplicatedColumnNames(ret, column, leftSuffix, rightSuffix);
                ret.Columns.Insert(ret.Columns.Count, column);
            }
            return(ret);
        }