示例#1
0
        public IRDDProxy Union(IEnumerable <IRDDProxy> rdds)
        {
            var jfirst          = (rdds.First() as RDDIpcProxy).JvmRddReference;
            var jrest           = JvmBridgeUtils.GetJavaList <JvmObjectReference>(rdds.Skip(1).Select(r => (r as RDDIpcProxy).JvmRddReference));
            var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "union", new object[] { jfirst, jrest }));

            return(new RDDIpcProxy(jvmRddReference));
        }
示例#2
0
        public IRDDProxy HadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable <Tuple <string, string> > conf, int batchSize)
        {
            var jconf           = JvmBridgeUtils.GetJavaHashMap <string, string>(conf);
            var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "hadoopRDD",
                                                                                                                 new object[] { jvmJavaContextReference, inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, jconf, batchSize }));

            return(new RDDIpcProxy(jvmRddReference));
        }
        public IDStreamProxy KafkaStream(Dictionary <string, int> topics, Dictionary <string, string> kafkaParams, StorageLevelType storageLevelType)
        {
            JvmObjectReference jtopics      = JvmBridgeUtils.GetJavaMap <string, int>(topics);
            JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap <string, string>(kafkaParams);
            JvmObjectReference jlevel       = SparkContextIpcProxy.GetJavaStorageLevel(storageLevelType);
            // KafkaUtilsPythonHelper: external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
            JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper", new object[] { });
            var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createStream", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jlevel }).ToString());

            return(new DStreamIpcProxy(jstream));
        }
示例#4
0
 public IColumnProxy CreateFunction(string name, object self)
 {
     if (self is ColumnIpcProxy)
     {
         self = (self as ColumnIpcProxy).ScalaColumnReference;
     }
     else if (self is IColumnProxy[])
     {
         self = JvmBridgeUtils.GetJavaSeq <JvmObjectReference>((self as IColumnProxy[]).Select(x => (x as ColumnIpcProxy).ScalaColumnReference));
     }
     return(new ColumnIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.functions", name, self))));
 }
 public IDStreamProxy Union(IDStreamProxy firstDStream, IDStreamProxy[] otherDStreams)
 {
     return(new DStreamIpcProxy(
                new JvmObjectReference(
                    (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "union",
                                                                               new object[]
     {
         (firstDStream as DStreamIpcProxy).javaDStreamReference,
         JvmBridgeUtils.GetJavaList <JvmObjectReference>(otherDStreams.Select(x => (x as DStreamIpcProxy).javaDStreamReference))
     }
                                                                               ))));
 }
        public IDStreamProxy CreateCSharpReducedWindowedDStream(IDStreamProxy jdstream, byte[] func, byte[] invFunc, int windowSeconds, int slideSeconds, string serializationMode)
        {
            var windowDurationReference = JvmBridgeUtils.GetJavaDuration(windowSeconds);
            var slideDurationReference  = JvmBridgeUtils.GetJavaDuration(slideSeconds);

            var jvmDStreamReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.csharp.CSharpReducedWindowedDStream",
                                                                                 new object[] { (jdstream as DStreamIpcProxy).jvmDStreamReference, func, invFunc, windowDurationReference, slideDurationReference, serializationMode });

            var javaDStreamReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDStreamReference, "asJavaDStream"));

            return(new DStreamIpcProxy(javaDStreamReference, jvmDStreamReference));
        }
        public IDStreamProxy EventHubsUnionStream(Dictionary <string, string> eventHubsParams, StorageLevelType storageLevelType)
        {
            JvmObjectReference eventHubsParamsReference  = JvmBridgeUtils.GetScalaMutableMap <string, string>(eventHubsParams);
            JvmObjectReference storageLevelTypeReference = SparkContextIpcProxy.GetJavaStorageLevel(storageLevelType);

            return
                (new DStreamIpcProxy(
                     new JvmObjectReference(
                         SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod(
                             "org.apache.spark.streaming.api.csharp.EventHubsUtils", "createUnionStream",
                             new object[] { jvmJavaStreamingReference, eventHubsParamsReference, storageLevelTypeReference })
                         .ToString())));
        }
        public StreamingContextIpcProxy(SparkContext sparkContext, int durationSeconds)
        {
            this.sparkContext = sparkContext;
            sparkContextProxy = sparkContext.SparkContextProxy;
            var jduration = JvmBridgeUtils.GetJavaDuration(durationSeconds);

            JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference;

            jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration });
            jvmJavaStreamingReference    = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference });

            StartAccumulatorServer(sparkContext);
            StartCallbackServer();
        }
示例#9
0
        public IDStreamProxy Window(int windowSeconds, int slideSeconds = 0)
        {
            string windowId = null;
            var    windowDurationReference = JvmBridgeUtils.GetJavaDuration((long)windowSeconds * 1000);

            if (slideSeconds <= 0)
            {
                windowId = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(javaDStreamReference, "window", new object[] { windowDurationReference });
                return(new DStreamIpcProxy(new JvmObjectReference(windowId)));
            }

            var slideDurationReference = JvmBridgeUtils.GetJavaDuration((long)slideSeconds * 1000);

            windowId = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(javaDStreamReference, "window", new object[] { windowDurationReference, slideDurationReference });

            return(new DStreamIpcProxy(new JvmObjectReference(windowId)));
        }
示例#10
0
        public IUDFProxy CreateUserDefinedCSharpFunction(string name, byte[] command, string returnType = "string")
        {
            var jSqlContext         = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.SQLContext", new object[] { jvmSparkContextReference });
            var jDataType           = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jSqlContext, "parseDataType", new object[] { "\"" + returnType + "\"" }));
            var jbroadcastVariables = JvmBridgeUtils.GetJavaList <JvmObjectReference>(jvmBroadcastReferences);

            var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { });
            var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { });

            return(new UDFIpcProxy(SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.UserDefinedPythonFunction",
                                                                              new object[]
            {
                name, command, hashTableReference, arrayListReference,
                SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(),
                "1.0",
                jbroadcastVariables, jvmAccumulatorReference, jDataType
            })));
        }
示例#11
0
        public IRDDProxy CreateCSharpRdd(IRDDProxy prevJvmRddReference, byte[] command, Dictionary <string, string> environmentVariables, List <string> pythonIncludes, bool preservesPartitioning, List <Broadcast> broadcastVariables, List <byte[]> accumulator)
        {
            var hashTableReference  = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { });
            var arrayListReference  = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { });
            var jbroadcastVariables = JvmBridgeUtils.GetJavaList <JvmObjectReference>(jvmBroadcastReferences);

            var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod((prevJvmRddReference as RDDIpcProxy).JvmRddReference, "rdd"));

            var csRdd = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.csharp.CSharpRDD",
                                                                   new object[]
            {
                rdd, command, hashTableReference, arrayListReference, preservesPartitioning,
                SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(),
                "1.0",
                jbroadcastVariables, jvmAccumulatorReference
            });

            return(new RDDIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(csRdd, "asJavaRDD"))));
        }
        public IDStreamProxy DirectKafkaStream(List <string> topics, Dictionary <string, string> kafkaParams, Dictionary <string, long> fromOffsets)
        {
            JvmObjectReference jtopics      = JvmBridgeUtils.GetJavaSet <string>(topics);
            JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap <string, string>(kafkaParams);

            var jTopicAndPartitions = fromOffsets.Select(x =>
                                                         new KeyValuePair <JvmObjectReference, long>
                                                         (
                                                             SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Key.Split(':')[0], int.Parse(x.Key.Split(':')[1]) }),
                                                             x.Value
                                                         )
                                                         );

            JvmObjectReference jfromOffsets = JvmBridgeUtils.GetJavaMap <JvmObjectReference, long>(jTopicAndPartitions);
            // KafkaUtilsPythonHelper: external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
            JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper", new object[] { });
            var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createDirectStreamWithoutMessageHandler", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jfromOffsets }).ToString());

            return(new DStreamIpcProxy(jstream));
        }
示例#13
0
        public IDStreamProxy DirectKafkaStreamWithRepartition(List <string> topics, IEnumerable <Tuple <string, string> > kafkaParams, IEnumerable <Tuple <string, long> > fromOffsets, int numPartitions, byte[] readFunc, string serializationMode)
        {
            JvmObjectReference jtopics      = JvmBridgeUtils.GetJavaSet <string>(topics);
            JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap <string, string>(kafkaParams);

            var jTopicAndPartitions = fromOffsets.Select(x =>
                                                         new Tuple <JvmObjectReference, long>
                                                         (
                                                             SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Item1.Split(':')[0], int.Parse(x.Item1.Split(':')[1]) }),
                                                             x.Item2
                                                         )
                                                         );

            JvmObjectReference jfromOffsets = JvmBridgeUtils.GetJavaMap <JvmObjectReference, long>(jTopicAndPartitions);
            // SparkCLR\scala\src\main\org\apache\spark\streaming\api\kafka\KafkaUtilsCSharpHelper.scala
            JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsCSharpHelper", new object[] { });
            var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createDirectStreamWithoutMessageHandler",
                                                                                                    new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jfromOffsets, (int)numPartitions, readFunc, serializationMode }).ToString());

            return(new DStreamIpcProxy(jstream));
        }
示例#14
0
        public IRDDProxy SampleByKey(bool withReplacement, IEnumerable <Tuple <string, double> > fractions, long seed)
        {
            var jfractions = JvmBridgeUtils.GetJavaMap(fractions) as JvmObjectReference;

            return(new RDDIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "sampleByKey", new object[] { withReplacement, jfractions, seed }))));
        }
示例#15
0
        public int RunJob(IRDDProxy rdd, IEnumerable <int> partitions)
        {
            var jpartitions = JvmBridgeUtils.GetJavaList <int>(partitions);

            return(int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }).ToString()));
        }
        public void Remember(int durationSeconds)
        {
            var jduration = JvmBridgeUtils.GetJavaDuration(durationSeconds);

            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "remember", new object[] { jduration });
        }
示例#17
0
        public void SaveAsHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable <Tuple <string, string> > conf, string compressionCodecClass)
        {
            var jconf = JvmBridgeUtils.GetJavaMap <string, string>(conf);

            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "saveAsHadoopFile", new object[] { jvmRddReference, false, path, outputFormatClass, keyClass, valueClass, null, null, jconf, compressionCodecClass });
        }
示例#18
0
        public void SaveAsHadoopDataset(IEnumerable <Tuple <string, string> > conf)
        {
            var jconf = JvmBridgeUtils.GetJavaMap <string, string>(conf);

            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "saveAsHadoopDataset", new object[] { jvmRddReference, false, jconf, null, null, false });
        }
示例#19
0
        public void Checkpoint(long intervalMs)
        {
            var jinterval = JvmBridgeUtils.GetJavaDuration(intervalMs);

            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDStreamReference, "checkpoint", new object[] { jinterval });
        }