/// <summary> /// Build the union of a list of RDDs. /// /// This supports unions() of RDDs with different serialized formats, /// although this forces them to be reserialized using the default serializer: /// /// >>> path = os.path.join(tempdir, "union-text.txt") /// >>> with open(path, "w") as testFile: /// ... _ = testFile.write("Hello") /// >>> textFile = sc.textFile(path) /// >>> textFile.collect() /// [u'Hello'] /// >>> parallelized = sc.parallelize(["World!"]) /// >>> sorted(sc.union([textFile, parallelized]).collect()) /// [u'Hello', 'World!'] /// </summary> /// <typeparam name="T"></typeparam> /// <param name="rdds"></param> /// <returns></returns> public RDD<T> Union<T>(IEnumerable<RDD<T>> rdds) { if (rdds == null || !rdds.Any()) return EmptyRDD<T>(); if (rdds.Count() == 1) return rdds.First(); return new RDD<T>(SparkContextProxy.Union(rdds.Select(rdd => rdd.RddProxy)), this, rdds.First().serializedMode); }
/// <summary> /// Build the union of a list of RDDs. /// /// This supports unions() of RDDs with different serialized formats, /// although this forces them to be reserialized using the default serializer: /// /// >>> path = os.path.join(tempdir, "union-text.txt") /// >>> with open(path, "w") as testFile: /// ... _ = testFile.write("Hello") /// >>> textFile = sc.textFile(path) /// >>> textFile.collect() /// [u'Hello'] /// >>> parallelized = sc.parallelize(["World!"]) /// >>> sorted(sc.union([textFile, parallelized]).collect()) /// [u'Hello', 'World!'] /// </summary> /// <typeparam name="T"></typeparam> /// <param name="rdds"></param> /// <returns></returns> public RDD <T> Union <T>(IEnumerable <RDD <T> > rdds) { if (rdds == null || rdds.Count() == 0) { return(EmptyRDD <T>()); } if (rdds.Count() == 1) { return(rdds.First()); } return(new RDD <T>(SparkContextProxy.Union(rdds.Select(rdd => rdd.RddProxy)), this, rdds.First().serializedMode)); }