/// <summary> /// Returns a new DataFrame that has exactly `numPartitions` partitions. /// Similar to coalesce defined on an RDD, this operation results in a narrow dependency, /// e.g. if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of /// the 100 new partitions will claim 10 of the current partitions. /// </summary> // Python API: https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py coalesce(self, numPartitions) public DataFrame Coalesce(int numPartitions) { return(new DataFrame(dataFrameProxy.Coalesce(numPartitions), sparkContext)); }