Пример #1
0
        public IRDDProxy Union(IEnumerable <IRDDProxy> rdds)
        {
            var jfirst          = (rdds.First() as RDDIpcProxy).JvmRddReference;
            var jrest           = JvmBridgeUtils.GetJavaList <JvmObjectReference>(rdds.Skip(1).Select(r => (r as RDDIpcProxy).JvmRddReference));
            var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "union", new object[] { jfirst, jrest }));

            return(new RDDIpcProxy(jvmRddReference));
        }
Пример #2
0
 public IDStreamProxy Union(IDStreamProxy firstDStream, IDStreamProxy[] otherDStreams)
 {
     return(new DStreamIpcProxy(
                new JvmObjectReference(
                    (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "union",
                                                                               new object[]
     {
         (firstDStream as DStreamIpcProxy).javaDStreamReference,
         JvmBridgeUtils.GetJavaList <JvmObjectReference>(otherDStreams.Select(x => (x as DStreamIpcProxy).javaDStreamReference))
     }
                                                                               ))));
 }
Пример #3
0
        public IUDFProxy CreateUserDefinedCSharpFunction(string name, byte[] command, string returnType = "string")
        {
            var jSqlContext         = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.SQLContext", new object[] { jvmSparkContextReference });
            var jDataType           = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jSqlContext, "parseDataType", new object[] { "\"" + returnType + "\"" }));
            var jbroadcastVariables = JvmBridgeUtils.GetJavaList <JvmObjectReference>(jvmBroadcastReferences);

            var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { });
            var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { });

            return(new UDFIpcProxy(SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.UserDefinedPythonFunction",
                                                                              new object[]
            {
                name, command, hashTableReference, arrayListReference,
                SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(),
                "1.0",
                jbroadcastVariables, jvmAccumulatorReference, jDataType
            })));
        }
Пример #4
0
        public IRDDProxy CreateCSharpRdd(IRDDProxy prevJvmRddReference, byte[] command, Dictionary <string, string> environmentVariables, List <string> pythonIncludes, bool preservesPartitioning, List <Broadcast> broadcastVariables, List <byte[]> accumulator)
        {
            var hashTableReference  = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { });
            var arrayListReference  = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { });
            var jbroadcastVariables = JvmBridgeUtils.GetJavaList <JvmObjectReference>(jvmBroadcastReferences);

            var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod((prevJvmRddReference as RDDIpcProxy).JvmRddReference, "rdd"));

            var csRdd = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.csharp.CSharpRDD",
                                                                   new object[]
            {
                rdd, command, hashTableReference, arrayListReference, preservesPartitioning,
                SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(),
                "1.0",
                jbroadcastVariables, jvmAccumulatorReference
            });

            return(new RDDIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(csRdd, "asJavaRDD"))));
        }
Пример #5
0
        public int RunJob(IRDDProxy rdd, IEnumerable <int> partitions)
        {
            var jpartitions = JvmBridgeUtils.GetJavaList <int>(partitions);

            return(int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }).ToString()));
        }