/// <summary>
 /// Build the union of a list of RDDs.
 /// 
 /// This supports unions() of RDDs with different serialized formats,
 /// although this forces them to be reserialized using the default serializer:
 /// 
 /// >>> path = os.path.join(tempdir, "union-text.txt")
 /// >>> with open(path, "w") as testFile:
 /// ...    _ = testFile.write("Hello")
 /// >>> textFile = sc.textFile(path)
 /// >>> textFile.collect()
 /// [u'Hello']
 /// >>> parallelized = sc.parallelize(["World!"])
 /// >>> sorted(sc.union([textFile, parallelized]).collect())
 /// [u'Hello', 'World!']
 /// </summary>
 /// <typeparam name="T"></typeparam>
 /// <param name="rdds"></param>
 /// <returns></returns>
 public RDD<T> Union<T>(IEnumerable<RDD<T>> rdds)
 {
     if (rdds == null || !rdds.Any())
         return EmptyRDD<T>();
     if (rdds.Count() == 1)
         return rdds.First();
     return new RDD<T>(SparkContextProxy.Union(rdds.Select(rdd => rdd.RddProxy)), this, rdds.First().serializedMode);
 }
Exemple #2
0
 /// <summary>
 /// Build the union of a list of RDDs.
 ///
 /// This supports unions() of RDDs with different serialized formats,
 /// although this forces them to be reserialized using the default serializer:
 ///
 /// >>> path = os.path.join(tempdir, "union-text.txt")
 /// >>> with open(path, "w") as testFile:
 /// ...    _ = testFile.write("Hello")
 /// >>> textFile = sc.textFile(path)
 /// >>> textFile.collect()
 /// [u'Hello']
 /// >>> parallelized = sc.parallelize(["World!"])
 /// >>> sorted(sc.union([textFile, parallelized]).collect())
 /// [u'Hello', 'World!']
 /// </summary>
 /// <typeparam name="T"></typeparam>
 /// <param name="rdds"></param>
 /// <returns></returns>
 public RDD <T> Union <T>(IEnumerable <RDD <T> > rdds)
 {
     if (rdds == null || rdds.Count() == 0)
     {
         return(EmptyRDD <T>());
     }
     if (rdds.Count() == 1)
     {
         return(rdds.First());
     }
     return(new RDD <T>(SparkContextProxy.Union(rdds.Select(rdd => rdd.RddProxy)), this, rdds.First().serializedMode));
 }