/// <summary> /// Returns a new DataFrame sorted by the specified column(s). /// Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, sort(self, *cols, **kwargs) /// </summary> /// <param name="columns">List of Columns to sort by</param> /// <param name="ascending">List of boolean to specify multiple sort orders for <paramref name="columns"/>, TRUE for ascending, FALSE for descending. /// if not null, it will overwrite the order specified by Column.Asc() or Column Desc() in <paramref name="columns"/>, </param> /// <returns>A new DataFrame sorted by the specified column(s)</returns> public DataFrame Sort(Column[] columns, bool[] ascending = null) { if (columns == null || columns.Length == 0) { throw new ArgumentException("should sort by at least one column."); } if (ascending != null) { if (columns.Length != ascending.Length) { throw new ArgumentException("ascending should have the same length with columns"); } var columnsWithOrder = new Column[columns.Length]; for (var i = 0; i < columns.Length; i++) { columnsWithOrder[i] = ascending[i] ? columns[i].Asc() : columns[i].Desc(); } return(new DataFrame(dataFrameProxy.Sort(columnsWithOrder.Select(c => c.ColumnProxy).ToArray()), sparkContext)); } return(new DataFrame(dataFrameProxy.Sort(columns.Select(c => c.ColumnProxy).ToArray()), sparkContext)); }