Example #1
0
        /// <summary>
        /// Returns a new DataFrame sorted by the specified column(s).
        /// Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, sort(self, *cols, **kwargs)
        /// </summary>
        /// <param name="columns">List of Columns to sort by</param>
        /// <param name="ascending">List of boolean to specify multiple sort orders for <paramref name="columns"/>, TRUE for ascending, FALSE for descending.
        /// if not null, it will overwrite the order specified by Column.Asc() or Column Desc() in <paramref name="columns"/>, </param>
        /// <returns>A new DataFrame sorted by the specified column(s)</returns>
        public DataFrame Sort(Column[] columns, bool[] ascending = null)
        {
            if (columns == null || columns.Length == 0)
            {
                throw new ArgumentException("should sort by at least one column.");
            }
            if (ascending != null)
            {
                if (columns.Length != ascending.Length)
                {
                    throw new ArgumentException("ascending should have the same length with columns");
                }

                var columnsWithOrder = new Column[columns.Length];
                for (var i = 0; i < columns.Length; i++)
                {
                    columnsWithOrder[i] = ascending[i] ? columns[i].Asc() : columns[i].Desc();
                }
                return(new DataFrame(dataFrameProxy.Sort(columnsWithOrder.Select(c => c.ColumnProxy).ToArray()), sparkContext));
            }
            return(new DataFrame(dataFrameProxy.Sort(columns.Select(c => c.ColumnProxy).ToArray()), sparkContext));
        }