public IRDDProxy Union(IEnumerable <IRDDProxy> rdds) { var jfirst = (rdds.First() as RDDIpcProxy).JvmRddReference; var jrest = JvmBridgeUtils.GetJavaList <JvmObjectReference>(rdds.Skip(1).Select(r => (r as RDDIpcProxy).JvmRddReference)); var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "union", new object[] { jfirst, jrest })); return(new RDDIpcProxy(jvmRddReference)); }
public IRDDProxy HadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable <Tuple <string, string> > conf, int batchSize) { var jconf = JvmBridgeUtils.GetJavaHashMap <string, string>(conf); var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "hadoopRDD", new object[] { jvmJavaContextReference, inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, jconf, batchSize })); return(new RDDIpcProxy(jvmRddReference)); }
public IDStreamProxy KafkaStream(Dictionary <string, int> topics, Dictionary <string, string> kafkaParams, StorageLevelType storageLevelType) { JvmObjectReference jtopics = JvmBridgeUtils.GetJavaMap <string, int>(topics); JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap <string, string>(kafkaParams); JvmObjectReference jlevel = SparkContextIpcProxy.GetJavaStorageLevel(storageLevelType); // KafkaUtilsPythonHelper: external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper", new object[] { }); var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createStream", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jlevel }).ToString()); return(new DStreamIpcProxy(jstream)); }
public IColumnProxy CreateFunction(string name, object self) { if (self is ColumnIpcProxy) { self = (self as ColumnIpcProxy).ScalaColumnReference; } else if (self is IColumnProxy[]) { self = JvmBridgeUtils.GetJavaSeq <JvmObjectReference>((self as IColumnProxy[]).Select(x => (x as ColumnIpcProxy).ScalaColumnReference)); } return(new ColumnIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.functions", name, self)))); }
public IDStreamProxy Union(IDStreamProxy firstDStream, IDStreamProxy[] otherDStreams) { return(new DStreamIpcProxy( new JvmObjectReference( (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "union", new object[] { (firstDStream as DStreamIpcProxy).javaDStreamReference, JvmBridgeUtils.GetJavaList <JvmObjectReference>(otherDStreams.Select(x => (x as DStreamIpcProxy).javaDStreamReference)) } )))); }
public IDStreamProxy CreateCSharpReducedWindowedDStream(IDStreamProxy jdstream, byte[] func, byte[] invFunc, int windowSeconds, int slideSeconds, string serializationMode) { var windowDurationReference = JvmBridgeUtils.GetJavaDuration(windowSeconds); var slideDurationReference = JvmBridgeUtils.GetJavaDuration(slideSeconds); var jvmDStreamReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.csharp.CSharpReducedWindowedDStream", new object[] { (jdstream as DStreamIpcProxy).jvmDStreamReference, func, invFunc, windowDurationReference, slideDurationReference, serializationMode }); var javaDStreamReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDStreamReference, "asJavaDStream")); return(new DStreamIpcProxy(javaDStreamReference, jvmDStreamReference)); }
public IDStreamProxy EventHubsUnionStream(Dictionary <string, string> eventHubsParams, StorageLevelType storageLevelType) { JvmObjectReference eventHubsParamsReference = JvmBridgeUtils.GetScalaMutableMap <string, string>(eventHubsParams); JvmObjectReference storageLevelTypeReference = SparkContextIpcProxy.GetJavaStorageLevel(storageLevelType); return (new DStreamIpcProxy( new JvmObjectReference( SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod( "org.apache.spark.streaming.api.csharp.EventHubsUtils", "createUnionStream", new object[] { jvmJavaStreamingReference, eventHubsParamsReference, storageLevelTypeReference }) .ToString()))); }
public StreamingContextIpcProxy(SparkContext sparkContext, int durationSeconds) { this.sparkContext = sparkContext; sparkContextProxy = sparkContext.SparkContextProxy; var jduration = JvmBridgeUtils.GetJavaDuration(durationSeconds); JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference; jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration }); jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference }); StartAccumulatorServer(sparkContext); StartCallbackServer(); }
public IDStreamProxy Window(int windowSeconds, int slideSeconds = 0) { string windowId = null; var windowDurationReference = JvmBridgeUtils.GetJavaDuration((long)windowSeconds * 1000); if (slideSeconds <= 0) { windowId = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(javaDStreamReference, "window", new object[] { windowDurationReference }); return(new DStreamIpcProxy(new JvmObjectReference(windowId))); } var slideDurationReference = JvmBridgeUtils.GetJavaDuration((long)slideSeconds * 1000); windowId = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(javaDStreamReference, "window", new object[] { windowDurationReference, slideDurationReference }); return(new DStreamIpcProxy(new JvmObjectReference(windowId))); }
public IUDFProxy CreateUserDefinedCSharpFunction(string name, byte[] command, string returnType = "string") { var jSqlContext = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.SQLContext", new object[] { jvmSparkContextReference }); var jDataType = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jSqlContext, "parseDataType", new object[] { "\"" + returnType + "\"" })); var jbroadcastVariables = JvmBridgeUtils.GetJavaList <JvmObjectReference>(jvmBroadcastReferences); var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { }); var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { }); return(new UDFIpcProxy(SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.UserDefinedPythonFunction", new object[] { name, command, hashTableReference, arrayListReference, SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(), "1.0", jbroadcastVariables, jvmAccumulatorReference, jDataType }))); }
public IRDDProxy CreateCSharpRdd(IRDDProxy prevJvmRddReference, byte[] command, Dictionary <string, string> environmentVariables, List <string> pythonIncludes, bool preservesPartitioning, List <Broadcast> broadcastVariables, List <byte[]> accumulator) { var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { }); var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { }); var jbroadcastVariables = JvmBridgeUtils.GetJavaList <JvmObjectReference>(jvmBroadcastReferences); var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod((prevJvmRddReference as RDDIpcProxy).JvmRddReference, "rdd")); var csRdd = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.csharp.CSharpRDD", new object[] { rdd, command, hashTableReference, arrayListReference, preservesPartitioning, SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(), "1.0", jbroadcastVariables, jvmAccumulatorReference }); return(new RDDIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(csRdd, "asJavaRDD")))); }
public IDStreamProxy DirectKafkaStream(List <string> topics, Dictionary <string, string> kafkaParams, Dictionary <string, long> fromOffsets) { JvmObjectReference jtopics = JvmBridgeUtils.GetJavaSet <string>(topics); JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap <string, string>(kafkaParams); var jTopicAndPartitions = fromOffsets.Select(x => new KeyValuePair <JvmObjectReference, long> ( SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Key.Split(':')[0], int.Parse(x.Key.Split(':')[1]) }), x.Value ) ); JvmObjectReference jfromOffsets = JvmBridgeUtils.GetJavaMap <JvmObjectReference, long>(jTopicAndPartitions); // KafkaUtilsPythonHelper: external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper", new object[] { }); var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createDirectStreamWithoutMessageHandler", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jfromOffsets }).ToString()); return(new DStreamIpcProxy(jstream)); }
public IDStreamProxy DirectKafkaStreamWithRepartition(List <string> topics, IEnumerable <Tuple <string, string> > kafkaParams, IEnumerable <Tuple <string, long> > fromOffsets, int numPartitions, byte[] readFunc, string serializationMode) { JvmObjectReference jtopics = JvmBridgeUtils.GetJavaSet <string>(topics); JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap <string, string>(kafkaParams); var jTopicAndPartitions = fromOffsets.Select(x => new Tuple <JvmObjectReference, long> ( SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Item1.Split(':')[0], int.Parse(x.Item1.Split(':')[1]) }), x.Item2 ) ); JvmObjectReference jfromOffsets = JvmBridgeUtils.GetJavaMap <JvmObjectReference, long>(jTopicAndPartitions); // SparkCLR\scala\src\main\org\apache\spark\streaming\api\kafka\KafkaUtilsCSharpHelper.scala JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsCSharpHelper", new object[] { }); var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createDirectStreamWithoutMessageHandler", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jfromOffsets, (int)numPartitions, readFunc, serializationMode }).ToString()); return(new DStreamIpcProxy(jstream)); }
public IRDDProxy SampleByKey(bool withReplacement, IEnumerable <Tuple <string, double> > fractions, long seed) { var jfractions = JvmBridgeUtils.GetJavaMap(fractions) as JvmObjectReference; return(new RDDIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "sampleByKey", new object[] { withReplacement, jfractions, seed })))); }
public int RunJob(IRDDProxy rdd, IEnumerable <int> partitions) { var jpartitions = JvmBridgeUtils.GetJavaList <int>(partitions); return(int.Parse(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "runJob", new object[] { jvmSparkContextReference, (rdd as RDDIpcProxy).JvmRddReference, jpartitions }).ToString())); }
public void Remember(int durationSeconds) { var jduration = JvmBridgeUtils.GetJavaDuration(durationSeconds); SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "remember", new object[] { jduration }); }
public void SaveAsHadoopFile(string path, string outputFormatClass, string keyClass, string valueClass, IEnumerable <Tuple <string, string> > conf, string compressionCodecClass) { var jconf = JvmBridgeUtils.GetJavaMap <string, string>(conf); SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "saveAsHadoopFile", new object[] { jvmRddReference, false, path, outputFormatClass, keyClass, valueClass, null, null, jconf, compressionCodecClass }); }
public void SaveAsHadoopDataset(IEnumerable <Tuple <string, string> > conf) { var jconf = JvmBridgeUtils.GetJavaMap <string, string>(conf); SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "saveAsHadoopDataset", new object[] { jvmRddReference, false, jconf, null, null, false }); }
public void Checkpoint(long intervalMs) { var jinterval = JvmBridgeUtils.GetJavaDuration(intervalMs); SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDStreamReference, "checkpoint", new object[] { jinterval }); }