/// <summary> /// Randomly splits this DataFrame with the provided weights. /// Reference to https://github.com/apache/spark/blob/branch-1.4/python/pyspark/sql/dataframe.py, randomSplit(self, weights, seed=None) /// </summary> /// <param name="weights">list of weights with which to split the DataFrame. Weights will be normalized if they don't sum up to 1.0</param> /// <param name="seed">The seed for sampling</param> /// <returns></returns> public IEnumerable <DataFrame> RandomSplit(IEnumerable <double> weights, int?seed = null) { foreach (var weight in weights) { if (weight < 0.0) { throw new ArgumentException(string.Format("Weights must be positive. Found weight value: {0}", weight)); } } if (seed == null) { seed = new Random().Next(); } return(dataFrameProxy.RandomSplit(weights, seed.Value).Select(dfProxy => new DataFrame(dfProxy, sparkContext))); }