/// <summary> /// Create a PairwiseRDD. /// </summary> /// <param name="jvmReferenceOfByteArrayRdd"></param> /// <param name="numPartitions"></param> /// <param name="partitionFuncId">Global unique id of partitioner which is used for comparison PythonPartitioners in JVM.</param> /// <returns></returns> public IRDDProxy CreatePairwiseRDD(IRDDProxy jvmReferenceOfByteArrayRdd, int numPartitions, long partitionFuncId) { var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod((jvmReferenceOfByteArrayRdd as RDDIpcProxy).JvmRddReference, "rdd")); var pairwiseRdd = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PairwiseRDD", rdd); var pairRddJvmReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(pairwiseRdd, "asJavaPairRDD", new object[] { }).ToString()); var jpartitionerJavaReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonPartitioner", new object[] { numPartitions, partitionFuncId }); var partitionedPairRddJvmReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(pairRddJvmReference, "partitionBy", new object[] { jpartitionerJavaReference }).ToString()); var jvmRddReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "valueOfPair", new object[] { partitionedPairRddJvmReference }).ToString()); //var jvmRddReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(partitionedRddJvmReference, "rdd", new object[] { }).ToString()); return(new RDDIpcProxy(jvmRddReference)); }
private static T WrapAsType(JvmObjectReference reference) { ConstructorInfo constructor = typeof(T) .GetConstructors(BindingFlags.NonPublic | BindingFlags.Instance) .Single(c => { ParameterInfo[] parameters = c.GetParameters(); return((parameters.Length == 1) && (parameters[0].ParameterType == typeof(JvmObjectReference))); }); return((T)constructor.Invoke(new object[] { reference })); }
public IDStreamProxy EventHubsUnionStream(Dictionary <string, string> eventHubsParams, StorageLevelType storageLevelType) { JvmObjectReference eventHubsParamsReference = JvmBridgeUtils.GetScalaMutableMap <string, string>(eventHubsParams); JvmObjectReference storageLevelTypeReference = SparkContextIpcProxy.GetJavaStorageLevel(storageLevelType); return (new DStreamIpcProxy( new JvmObjectReference( SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod( "org.apache.spark.streaming.api.csharp.EventHubsUtils", "createUnionStream", new object[] { jvmJavaStreamingReference, eventHubsParamsReference, storageLevelTypeReference }) .ToString()))); }
public StreamingContextIpcProxy(SparkContext sparkContext, long durationMs) { this.sparkContext = sparkContext; sparkContextProxy = sparkContext.SparkContextProxy; var jduration = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { durationMs }); JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference; jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration }); jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference }); StartAccumulatorServer(sparkContext); }
public IDataFrameProxy Agg(IGroupedDataProxy scalaGroupedDataReference, Dictionary <string, string> columnNameAggFunctionDictionary) { var mapReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.HashMap").ToString()); foreach (var key in columnNameAggFunctionDictionary.Keys) { SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(mapReference, "put", new object[] { key, columnNameAggFunctionDictionary[key] }); } return (new DataFrameIpcProxy(new JvmObjectReference( SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod( (scalaGroupedDataReference as GroupedDataIpcProxy).ScalaGroupedDataReference, "agg", new object[] { mapReference }).ToString()), sqlContextProxy)); }
public StreamingContextIpcProxy(string checkpointPath) { jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { checkpointPath }); jvmStreamingContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "ssc")); JvmObjectReference jvmSparkContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "sc")); JvmObjectReference jvmSparkConfReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "conf")); JvmObjectReference jvmJavaContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "sparkContext")); sparkContextProxy = new SparkContextIpcProxy(jvmSparkContextReference, jvmJavaContextReference); var sparkConfProxy = new SparkConfIpcProxy(jvmSparkConfReference); sparkContext = new SparkContext(sparkContextProxy, new SparkConf(sparkConfProxy)); }
public StorageLevel GetStorageLevel() { var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "rdd")); var storageLevel = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(rdd, "getStorageLevel")); return(new StorageLevel ( (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(storageLevel, "useDisk"), (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(storageLevel, "useMemory"), (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(storageLevel, "useOffHeap"), (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(storageLevel, "deserialized"), (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(storageLevel, "replication") )); }
/// <summary> /// Constructor mainly called by <see cref="PipelinedRDD{T}"/>. /// </summary> /// <param name="prevRddJvmObjRef"> /// The reference to the RDD JVM object from which pipeline is created /// </param> /// <param name="sparkContext">SparkContext object</param> /// <param name="serializedMode">Serialization mode for the current RDD</param> /// <param name="prevSerializedMode">Serialization mode for the previous RDD</param> internal RDD( JvmObjectReference prevRddJvmObjRef, SparkContext sparkContext, SerializedMode serializedMode, SerializedMode prevSerializedMode) { // This constructor is called from PipelineRDD constructor // where the _jvmObject is not yet created. _prevRddJvmObjRef = prevRddJvmObjRef; _sparkContext = sparkContext; _serializedMode = serializedMode; _prevSerializedMode = prevSerializedMode; }
public bool CheckpointExists(string checkpointPath) { if (checkpointPath == null) { return(false); } var path = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", checkpointPath); var conf = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.conf.Configuration"); var fs = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(path, "getFileSystem", conf)); return((bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(fs, "exists", path) && SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(fs, "listStatus", path) != null); }
public void SaveAsTextFile(string path, string compressionCodecClass) { var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "rdd")); if (!string.IsNullOrEmpty(compressionCodecClass)) { var codec = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("java.lang.Class", "forName", new object[] { compressionCodecClass })); SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "saveAsTextFile", new object[] { path, codec }); } else { SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "saveAsTextFile", new object[] { path }); } }
public IDStreamProxy CreateConstantInputDStream(IRDDProxy rddProxy) { var rddReference = new JvmObjectReference( (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(((RDDIpcProxy)rddProxy).JvmRddReference, "rdd")); var jvmDStreamReference = SparkCLRIpcProxy.JvmBridge.CallConstructor( "org.apache.spark.streaming.api.csharp.CSharpConstantInputDStream", jvmStreamingContextReference, rddReference); var javaDStreamReference = new JvmObjectReference((String)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDStreamReference, "asJavaDStream")); return(new DStreamIpcProxy(javaDStreamReference, jvmDStreamReference)); }
public IDataFrameProxy Join(IDataFrameProxy otherScalaDataFrameReference, string[] joinColumnNames) { var stringSequenceReference = new JvmObjectReference( SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "toSeq", new object[] { joinColumnNames }).ToString()); return (new DataFrameIpcProxy(new JvmObjectReference( SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDataFrameReference, "join", new object[] { (otherScalaDataFrameReference as DataFrameIpcProxy).jvmDataFrameReference, stringSequenceReference }).ToString() ), sqlContextProxy)); }
public StreamingContextIpcProxy(SparkContext sparkContext, int durationSeconds) { this.sparkContext = sparkContext; sparkContextProxy = sparkContext.SparkContextProxy; var jduration = JvmBridgeUtils.GetJavaDuration(durationSeconds); JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference; jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration }); jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference }); StartAccumulatorServer(sparkContext); StartCallbackServer(); }
/// <summary> /// Creates the PythonFunction object on the JVM side wrapping the given command bytes. /// </summary> /// <param name="jvm">JVM bridge to use</param> /// <param name="command">Serialized command bytes</param> /// <returns>JvmObjectReference object to the PythonFunction object</returns> internal static JvmObjectReference CreatePythonFunction(IJvmBridge jvm, byte[] command) { JvmObjectReference arrayListReference = jvm.CallConstructor("java.util.ArrayList"); return((JvmObjectReference)jvm.CallStaticJavaMethod( "org.apache.spark.sql.api.dotnet.SQLUtils", "createPythonFunction", command, CreateEnvVarsForPythonFunction(jvm), arrayListReference, // Python includes SparkEnvironment.ConfigurationService.GetWorkerExePath(), Versions.CurrentVersion, arrayListReference, // Broadcast variables null)); // Accumulator }
public StreamingContextIpcProxy(SparkContext sparkContext, long durationMs) { this.sparkContext = sparkContext; sparkContextProxy = sparkContext.SparkContextProxy; var jduration = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { durationMs }); JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference; jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration }); jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference }); int port = StartCallback(); SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("SparkCLRHandler", "connectCallback", port); //className and methodName hardcoded in CSharpBackendHandler }
public IStructTypeProxy CreateStructType(List <StructField> fields) { var fieldsReference = fields.Select(s => (s.StructFieldProxy as StructFieldIpcProxy).JvmStructFieldReference).ToList().Cast <JvmObjectReference>(); var seq = new JvmObjectReference( JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "toSeq", new object[] { fieldsReference }).ToString()); return(new StructTypeIpcProxy( new JvmObjectReference( JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "createStructType", new object[] { seq }).ToString() ) )); }
public SparkJobInfo GetJobInfo(int jobId) { var jobInfoId = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStatusTrackerReference, "getJobInfo", new object[] { jobId }); if (jobInfoId == null) { return(null); } JvmObjectReference jJobInfo = new JvmObjectReference((string)jobInfoId); int[] stageIds = (int[])SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jJobInfo, "stageIds"); string status = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jJobInfo, "status").ToString(); return(new SparkJobInfo(jobId, stageIds, status)); }
/// <summary> /// <see cref="IndexConfig"/> specifies the configuration of an index. /// </summary> /// <param name="indexName">Index name.</param> /// <param name="indexedColumns">Columns from which an index is created.</param> /// <param name="includedColumns">Columns to be included in the index.</param> public IndexConfig( string indexName, IEnumerable <string> indexedColumns, IEnumerable <string> includedColumns) { IndexName = indexName; IndexedColumns = new List <string>(indexedColumns); IncludedColumns = new List <string>(includedColumns); _jvmObject = (JvmObjectReference)SparkEnvironment.JvmBridge.CallStaticJavaMethod( s_className, "apply", IndexName, IndexedColumns, IncludedColumns); }
internal SparkConf(JvmObjectReference jvmObject) { _jvmObject = jvmObject; // Special handling for debug mode because spark.master and spark.app.name will not // be set in debug mode. Driver code may override these values if SetMaster or // SetAppName methods are used. if (string.IsNullOrWhiteSpace(Get("spark.master", ""))) { SetMaster("local"); } if (string.IsNullOrWhiteSpace(Get("spark.app.name", ""))) { SetAppName("debug app"); } }
public void RegisterFunction(string name, byte[] command, string returnType) { var judf = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "udf")); var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { }); var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { }); SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(judf, "registerPython", new object[] { name, command, hashTableReference, arrayListReference, SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(), "1.0", arrayListReference, null, "\"" + returnType + "\"" }); }
public IDStreamProxy CreateCSharpInputDStream(byte[] func, string serializationMode) { var jvmDStreamReference = SparkCLRIpcProxy.JvmBridge.CallConstructor( "org.apache.spark.streaming.api.csharp.CSharpInputDStream", new object[] { jvmStreamingContextReference, func, serializationMode }); var javaDStreamReference = new JvmObjectReference((String)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDStreamReference, "asJavaDStream")); return(new DStreamIpcProxy(javaDStreamReference, jvmDStreamReference)); }
public StreamingContextIpcProxy(string checkpointPath) { jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { checkpointPath }); jvmStreamingContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "ssc")); JvmObjectReference jvmSparkContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "sc")); JvmObjectReference jvmSparkConfReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "conf")); JvmObjectReference jvmJavaContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "sparkContext")); sparkContextProxy = new SparkContextIpcProxy(jvmSparkContextReference, jvmJavaContextReference); var sparkConfProxy = new SparkConfIpcProxy(jvmSparkConfReference); sparkContext = new SparkContext(sparkContextProxy, new SparkConf(sparkConfProxy)); // TODO: We don't know whether accumulator variable is used before restart. We just start accumuator server for safety. sparkContext.StartAccumulatorServer(); }
private static JvmObjectReference CreateEnvVarsForPythonFunction(IJvmBridge jvm) { JvmObjectReference environmentVars = jvm.CallConstructor("java.util.Hashtable"); string assemblySearchPath = Environment.GetEnvironmentVariable( AssemblySearchPathResolver.AssemblySearchPathsEnvVarName); if (!string.IsNullOrEmpty(assemblySearchPath)) { jvm.CallNonStaticJavaMethod( environmentVars, "put", AssemblySearchPathResolver.AssemblySearchPathsEnvVarName, assemblySearchPath); } return(environmentVars); }
public IUDFProxy CreateUserDefinedCSharpFunction(string name, byte[] command, string returnType = "string") { var jSqlContext = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.SQLContext", new object[] { jvmSparkContextReference }); var jDataType = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jSqlContext, "parseDataType", new object[] { "\"" + returnType + "\"" })); var jbroadcastVariables = JvmBridgeUtils.GetJavaList <JvmObjectReference>(jvmBroadcastReferences); var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { }); var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { }); return(new UDFIpcProxy(SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.UserDefinedPythonFunction", new object[] { name, command, hashTableReference, arrayListReference, SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(), "1.0", jbroadcastVariables, jvmAccumulatorReference, jDataType }))); }
public IRDDProxy CreateCSharpRdd(IRDDProxy prevJvmRddReference, byte[] command, Dictionary <string, string> environmentVariables, List <string> pythonIncludes, bool preservesPartitioning, List <Broadcast> broadcastVariables, List <byte[]> accumulator) { var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { }); var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { }); var jbroadcastVariables = JvmBridgeUtils.GetJavaList <JvmObjectReference>(jvmBroadcastReferences); var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod((prevJvmRddReference as RDDIpcProxy).JvmRddReference, "rdd")); var csRdd = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.csharp.CSharpRDD", new object[] { rdd, command, hashTableReference, arrayListReference, preservesPartitioning, SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(), "1.0", jbroadcastVariables, jvmAccumulatorReference }); return(new RDDIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(csRdd, "asJavaRDD")))); }
public IDStreamProxy DirectKafkaStream(List <string> topics, Dictionary <string, string> kafkaParams, Dictionary <string, long> fromOffsets) { JvmObjectReference jtopics = SparkContextIpcProxy.GetJavaSet <string>(topics); JvmObjectReference jkafkaParams = SparkContextIpcProxy.GetJavaMap <string, string>(kafkaParams); var jTopicAndPartitions = fromOffsets.Select(x => new KeyValuePair <JvmObjectReference, long> ( SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Key.Split(':')[0], int.Parse(x.Key.Split(':')[1]) }), x.Value ) ); JvmObjectReference jfromOffsets = SparkContextIpcProxy.GetJavaMap <JvmObjectReference, long>(jTopicAndPartitions); JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper", new object[] { }); var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createDirectStream", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jfromOffsets }).ToString()); return(new DStreamIpcProxy(jstream)); }
public SparkStageInfo GetStageInfo(int stageId) { var stageInfoId = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStatusTrackerReference, "getStageInfo", new object[] { stageId }); if (stageInfoId == null) { return(null); } JvmObjectReference jStageInfo = new JvmObjectReference((string)stageInfoId); int currentAttemptId = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "currentAttemptId"); int submissionTime = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "submissionTime"); string name = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "name"); int numTasks = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "numTasks"); int numActiveTasks = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "numActiveTasks"); int numCompletedTasks = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "numCompletedTasks"); int numFailedTasks = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "numFailedTasks"); return(new SparkStageInfo(stageId, currentAttemptId, (long)submissionTime, name, numTasks, numActiveTasks, numCompletedTasks, numFailedTasks)); }
/// <summary> /// List the names of all the files under the given path. /// </summary> public IEnumerable <string> EnumerateFiles(string path) { var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path); var statusList = (List <JvmObjectReference>)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "listStatus", pathJvmReference); if (statusList == null || statusList.Count == 0) { return(new string[0]); } var files = new string[statusList.Count]; for (var i = 0; i < statusList.Count; i++) { var subPathJvmReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(statusList[i], "getPath")); files[i] = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(subPathJvmReference, "getName"); } return(files); }
public IDStreamProxy DirectKafkaStreamWithRepartition(List <string> topics, Dictionary <string, string> kafkaParams, Dictionary <string, long> fromOffsets, int numPartitions) { JvmObjectReference jtopics = JvmBridgeUtils.GetJavaSet <string>(topics); JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap <string, string>(kafkaParams); var jTopicAndPartitions = fromOffsets.Select(x => new KeyValuePair <JvmObjectReference, long> ( SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Key.Split(':')[0], int.Parse(x.Key.Split(':')[1]) }), x.Value ) ); JvmObjectReference jfromOffsets = JvmBridgeUtils.GetJavaMap <JvmObjectReference, long>(jTopicAndPartitions); // SparkCLR\scala\src\main\org\apache\spark\streaming\api\kafka\KafkaUtilsCSharpHelper.scala JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsCSharpHelper", new object[] { }); var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createDirectStreamWithoutMessageHandler", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jfromOffsets, (int)numPartitions }).ToString()); return(new DStreamIpcProxy(jstream)); }
public IDStreamProxy DirectKafkaStream(List <string> topics, IEnumerable <Tuple <string, string> > kafkaParams, IEnumerable <Tuple <string, long> > fromOffsets) { JvmObjectReference jtopics = JvmBridgeUtils.GetJavaSet <string>(topics); JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap <string, string>(kafkaParams); var jTopicAndPartitions = fromOffsets.Select(x => new Tuple <JvmObjectReference, long> ( SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Item1.Split(':')[0], int.Parse(x.Item1.Split(':')[1]) }), x.Item2 ) ); JvmObjectReference jfromOffsets = JvmBridgeUtils.GetJavaMap <JvmObjectReference, long>(jTopicAndPartitions); // KafkaUtilsPythonHelper: external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper", new object[] { }); var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createDirectStreamWithoutMessageHandler", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jfromOffsets }).ToString()); return(new DStreamIpcProxy(jstream)); }