public IRDDProxy Union(IEnumerable <IRDDProxy> rdds) { var jfirst = (rdds.First() as RDDIpcProxy).JvmRddReference; var jrest = JvmBridgeUtils.GetJavaList <JvmObjectReference>(rdds.Skip(1).Select(r => (r as RDDIpcProxy).JvmRddReference)); var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "union", new object[] { jfirst, jrest })); return(new RDDIpcProxy(jvmRddReference)); }
public IDStreamProxy Union(IDStreamProxy firstDStream, IDStreamProxy[] otherDStreams) { return(new DStreamIpcProxy( new JvmObjectReference( (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "union", new object[] { (firstDStream as DStreamIpcProxy).javaDStreamReference, JvmBridgeUtils.GetJavaList <JvmObjectReference>(otherDStreams.Select(x => (x as DStreamIpcProxy).javaDStreamReference)) } )))); }
public IUDFProxy CreateUserDefinedCSharpFunction(string name, byte[] command, string returnType = "string") { var jSqlContext = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.SQLContext", new object[] { jvmSparkContextReference }); var jDataType = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jSqlContext, "parseDataType", new object[] { "\"" + returnType + "\"" })); var jbroadcastVariables = JvmBridgeUtils.GetJavaList <JvmObjectReference>(jvmBroadcastReferences); var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { }); var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { }); return(new UDFIpcProxy(SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.UserDefinedPythonFunction", new object[] { name, command, hashTableReference, arrayListReference, SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(), "1.0", jbroadcastVariables, jvmAccumulatorReference, jDataType }))); }
public IRDDProxy CreateCSharpRdd(IRDDProxy prevJvmRddReference, byte[] command, Dictionary <string, string> environmentVariables, List <string> pythonIncludes, bool preservesPartitioning, List <Broadcast> broadcastVariables, List <byte[]> accumulator) { var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { }); var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { }); var jbroadcastVariables = JvmBridgeUtils.GetJavaList <JvmObjectReference>(jvmBroadcastReferences); var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod((prevJvmRddReference as RDDIpcProxy).JvmRddReference, "rdd")); var csRdd = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.csharp.CSharpRDD", new object[] { rdd, command, hashTableReference, arrayListReference, preservesPartitioning, SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(), "1.0", jbroadcastVariables, jvmAccumulatorReference }); return(new RDDIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(csRdd, "asJavaRDD")))); }
public int RunJob(IRDDProxy rdd, IEnumerable <int> partitions) { var jpartitions = JvmBridgeUtils.GetJavaList <int>(partitions); return(int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }).ToString())); }