public IDStreamProxy CreateCSharpDStream(IDStreamProxy jdstream, byte[] func, string deserializer) { var jvmDStreamReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.csharp.CSharpDStream", new object[] { (jdstream as DStreamIpcProxy).jvmDStreamReference, func, deserializer }); var javaDStreamReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDStreamReference, "asJavaDStream")); return new DStreamIpcProxy(javaDStreamReference, jvmDStreamReference); }
public StreamingContextIpcProxy(SparkContext sparkContext, long durationMs) { this.sparkContext = sparkContext; sparkContextProxy = sparkContext.SparkContextProxy; var jduration = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { durationMs }); JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference; jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration }); jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference }); }
public IDataFrameProxy CreateDataFrame(IRDDProxy rddProxy, IStructTypeProxy structTypeProxy) { var rdd = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "byteArrayRDDToAnyArrayRDD", new object[] { (rddProxy as RDDIpcProxy).JvmRddReference }).ToString()); return new DataFrameIpcProxy( new JvmObjectReference( SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "applySchemaToPythonRDD", new object[] { rdd, (structTypeProxy as StructTypeIpcProxy).JvmStructTypeReference }).ToString()), this); }
public StreamingContextIpcProxy(string checkpointPath) { jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { checkpointPath }); jvmStreamingContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "ssc")); JvmObjectReference jvmSparkContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "sc")); JvmObjectReference jvmSparkConfReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "conf")); JvmObjectReference jvmJavaContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "sparkContext")); sparkContextProxy = new SparkContextIpcProxy(jvmSparkContextReference, jvmJavaContextReference); var sparkConfProxy = new SparkConfIpcProxy(jvmSparkConfReference); sparkContext = new SparkContext(sparkContextProxy, new SparkConf(sparkConfProxy)); }
public SparkJobInfo GetJobInfo(int jobId) { var jobInfoId = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStatusTrackerReference, "getJobInfo", new object[] { jobId }); if (jobInfoId == null) return null; JvmObjectReference jJobInfo = new JvmObjectReference((string)jobInfoId); int[] stageIds = (int[])SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jJobInfo, "stageIds"); string status = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jJobInfo, "status").ToString(); return new SparkJobInfo(jobId, stageIds, status); }
public bool CheckpointExists(string checkpointPath) { if (checkpointPath == null) return false; var path = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", checkpointPath); var conf = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.conf.Configuration"); var fs = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(path, "getFileSystem", conf)); return (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(fs, "exists", path) && SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(fs, "listStatus", path) != null; }
public StreamingContextIpcProxy(SparkContext sparkContext, int durationSeconds) { this.sparkContext = sparkContext; sparkContextProxy = sparkContext.SparkContextProxy; var jduration = JvmBridgeUtils.GetJavaDuration(durationSeconds); JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference; jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration }); jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference }); StartAccumulatorServer(sparkContext); StartCallbackServer(); }
public StreamingContextIpcProxy(SparkContext sparkContext, long durationMs) { this.sparkContext = sparkContext; sparkContextProxy = sparkContext.SparkContextProxy; var jduration = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { durationMs }); JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference; jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration }); jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference }); int port = StartCallback(); SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("SparkCLRHandler", "connectCallback", port); //className and methodName hardcoded in CSharpBackendHandler }
public StreamingContextIpcProxy(string checkpointPath) { jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { checkpointPath }); jvmStreamingContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "ssc")); JvmObjectReference jvmSparkContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "sc")); JvmObjectReference jvmSparkConfReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "conf")); JvmObjectReference jvmJavaContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "sparkContext")); sparkContextProxy = new SparkContextIpcProxy(jvmSparkContextReference, jvmJavaContextReference); var sparkConfProxy = new SparkConfIpcProxy(jvmSparkConfReference); sparkContext = new SparkContext(sparkContextProxy, new SparkConf(sparkConfProxy)); // TODO: We don't know whether accumulator variable is used before restart. We just start accumuator server for safety. sparkContext.StartAccumulatorServer(); }
public void RegisterFunction(string name, byte[] command, string returnType) { var judf = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "udf")); var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { }); var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { }); SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(judf, "registerPython", new object[] { name, command, hashTableReference, arrayListReference, SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(), "1.0", arrayListReference, null, "\"" + returnType + "\"" }); }
public SparkStageInfo GetStageInfo(int stageId) { var stageInfoId = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStatusTrackerReference, "getStageInfo", new object[] { stageId }); if (stageInfoId == null) return null; JvmObjectReference jStageInfo = new JvmObjectReference((string)stageInfoId); int currentAttemptId = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "currentAttemptId"); int submissionTime = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "submissionTime"); string name = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "name"); int numTasks = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "numTasks"); int numActiveTasks = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "numActiveTasks"); int numCompletedTasks = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "numCompletedTasks"); int numFailedTasks = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "numFailedTasks"); return new SparkStageInfo(stageId, currentAttemptId, (long)submissionTime, name, numTasks, numActiveTasks, numCompletedTasks, numFailedTasks); }
public void TestJVMObjectRelease() { //make weak object manager wait for 2 secs for initial validation before start releasing objects var weakObjectManager = new WeakObjectManagerImpl(TimeSpan.FromSeconds(2)) { ObjectReleaser = new MockObjectReleaser() }; //reset WeakObjectManager for validation - this is to avoid side effects *from* other tests SparkCLREnvironment.WeakObjectManager = weakObjectManager; var waitEndTime = DateTime.Now + TimeSpan.FromSeconds(4); //no items added Assert.AreEqual(0, weakObjectManager.GetReferencesCount()); var totalItemCount = 10; for (var k = 1; k <= totalItemCount; k++) { //each object adds itself to WeakObjectManager var obj = new JvmObjectReference(k.ToString()); } //all items added should be available Assert.AreEqual(totalItemCount, weakObjectManager.GetReferencesCount()); //reset check interval to start releasing objects weakObjectManager.CheckInterval = TimeSpan.FromMilliseconds(200); GC.Collect(); GC.WaitForPendingFinalizers(); //reset check interval to default weakObjectManager.CheckInterval = WeakObjectManagerImpl.DefaultCheckInterval; var remainingTimeToWait = waitEndTime - DateTime.Now; if (remainingTimeToWait.TotalMilliseconds > 0) { Thread.Sleep(remainingTimeToWait); } var countAfterReleasingObjects = weakObjectManager.GetReferencesCount(); var aliveCount = weakObjectManager.GetAliveCount(); //validate that some items are released Assert.AreEqual(1, countAfterReleasingObjects); Assert.IsTrue(countAfterReleasingObjects < totalItemCount); //validate that unreleased items are alive items Assert.AreEqual(0, countAfterReleasingObjects - aliveCount); }
/// <summary> /// List the names of all the files under the given path. /// </summary> public IEnumerable<string> EnumerateFiles(string path) { var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path); var statusList = (List<JvmObjectReference>)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "listStatus", pathJvmReference); if (statusList == null || statusList.Count == 0) { return new string[0]; } var files = new string[statusList.Count]; for (var i = 0; i < statusList.Count; i++) { var subPathJvmReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(statusList[i], "getPath")); files[i] = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(subPathJvmReference, "getName"); } return files; }
public StreamingContextIpcProxy(string checkpointPath) { sparkContext = SparkContext.GetActiveSparkContext(); StartCallbackServer(); jvmJavaStreamingReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { checkpointPath }); jvmStreamingContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "ssc")); if (sparkContext == null) { JvmObjectReference jvmSparkContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "sc")); JvmObjectReference jvmSparkConfReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "conf")); JvmObjectReference jvmJavaContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "sparkContext")); sparkContextProxy = new SparkContextIpcProxy(jvmSparkContextReference, jvmJavaContextReference); var sparkConfProxy = new SparkConfIpcProxy(jvmSparkConfReference); sparkContext = new SparkContext(sparkContextProxy, new SparkConf(sparkConfProxy)); } else { sparkContextProxy = sparkContext.SparkContextProxy; } StartAccumulatorServer(sparkContext); }
public IBroadcastProxy ReadBroadcastFromFile(string path, out long broadcastId) { JvmObjectReference jbroadcast = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "readBroadcastFromFile", new object[] { jvmJavaContextReference, path })); broadcastId = (long)(double)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jbroadcast, "id"); jvmBroadcastReferences.Add(jbroadcast); return new BroadcastIpcProxy(jbroadcast, this); }
public IRDDProxy Parallelize(IEnumerable<byte[]> values, int numSlices) { var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.csharp.CSharpRDD", "createRDDFromArray", new object[] { jvmSparkContextReference, values, numSlices })); return new RDDIpcProxy(jvmRddReference); }
public IRDDProxy NewAPIHadoopRDD(string inputFormatClass, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, IEnumerable<KeyValuePair<string, string>> conf, int batchSize) { var jconf = JvmBridgeUtils.GetJavaHashMap<string, string>(conf); var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "newAPIHadoopRDD", new object[] { jvmJavaContextReference, inputFormatClass, keyClass, valueClass, keyConverterClass, valueConverterClass, jconf, batchSize })); return new RDDIpcProxy(jvmRddReference); }
public BroadcastIpcProxy(JvmObjectReference jvmBroadcastReference, SparkContextIpcProxy sparkContextIpcProxy) { this.jvmBroadcastReference = jvmBroadcastReference; this.sparkContextIpcProxy = sparkContextIpcProxy; }
public IRDDProxy EmptyRDD() { var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "emptyRDD")); return new RDDIpcProxy(jvmRddReference); }
public object CallNonStaticJavaMethod(JvmObjectReference objectId, string methodName, params object[] parameters) { return(CallJavaMethod(false, objectId, methodName, parameters)); }
public IRDDProxy WholeTextFiles(string filePath, int minPartitions) { var jvmRddReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "wholeTextFiles", new object[] { filePath, minPartitions }).ToString()); return new RDDIpcProxy(jvmRddReference); }
public IRDDProxy CreatePairwiseRDD(IRDDProxy jvmReferenceOfByteArrayRdd, int numPartitions) { var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod((jvmReferenceOfByteArrayRdd as RDDIpcProxy).JvmRddReference, "rdd")); var pairwiseRdd = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PairwiseRDD", rdd); var pairRddJvmReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(pairwiseRdd, "asJavaPairRDD", new object[] { }).ToString()); var jpartitionerJavaReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonPartitioner", new object[] { numPartitions, (long)0 }); var partitionedPairRddJvmReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(pairRddJvmReference, "partitionBy", new object[] { jpartitionerJavaReference }).ToString()); var jvmRddReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "valueOfPair", new object[] { partitionedPairRddJvmReference }).ToString()); //var jvmRddReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(partitionedRddJvmReference, "rdd", new object[] { }).ToString()); return new RDDIpcProxy(jvmRddReference); }
public IRDDProxy CreateCSharpRdd(IRDDProxy prevJvmRddReference, byte[] command, Dictionary<string, string> environmentVariables, List<string> pythonIncludes, bool preservesPartitioning, List<Broadcast> broadcastVariables, List<byte[]> accumulator) { var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { }); var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { }); var jbroadcastVariables = JvmBridgeUtils.GetJavaList<JvmObjectReference>(jvmBroadcastReferences); var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod((prevJvmRddReference as RDDIpcProxy).JvmRddReference, "rdd")); var csRdd = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.csharp.CSharpRDD", new object[] { rdd, command, hashTableReference, arrayListReference, preservesPartitioning, SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(), "1.0", jbroadcastVariables, jvmAccumulatorReference }); return new RDDIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(csRdd, "asJavaRDD"))); }
internal StructDataTypeIpcProxy(JvmObjectReference jvmStructDataTypeReference) { this.jvmStructDataTypeReference = jvmStructDataTypeReference; }
public object CallNonStaticJavaMethod(JvmObjectReference objectId, string methodName) { return(CallJavaMethod(false, objectId, methodName, new object[] { })); }
public IRDDProxy SequenceFile(string filePath, string keyClass, string valueClass, string keyConverterClass, string valueConverterClass, int minSplits, int batchSize) { var jvmRddReference = new JvmObjectReference((string) SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "sequenceFile", new object[] { jvmJavaContextReference, filePath, keyClass, valueClass, keyConverterClass, valueConverterClass, minSplits, batchSize })); return new RDDIpcProxy(jvmRddReference); }
public IRDDProxy Union(IEnumerable<IRDDProxy> rdds) { var jfirst = (rdds.First() as RDDIpcProxy).JvmRddReference; var jrest = JvmBridgeUtils.GetJavaList<JvmObjectReference>(rdds.Skip(1).Select(r => (r as RDDIpcProxy).JvmRddReference)); var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "union", new object[] { jfirst, jrest })); return new RDDIpcProxy(jvmRddReference); }
public void CreateSparkContext(string master, string appName, string sparkHome, ISparkConfProxy conf) { object[] args = (new object[] { master, appName, sparkHome, (conf == null ? null : (conf as SparkConfIpcProxy).JvmSparkConfReference) }).Where(x => x != null).ToArray(); jvmSparkContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.SparkContext", args); jvmJavaContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.java.JavaSparkContext", new object[] { jvmSparkContextReference }); }
public SparkContextIpcProxy(JvmObjectReference jvmSparkContextReference, JvmObjectReference jvmJavaContextReference) { this.jvmSparkContextReference = jvmSparkContextReference; this.jvmJavaContextReference = jvmJavaContextReference; }
public IUDFProxy CreateUserDefinedCSharpFunction(string name, byte[] command, string returnType = "string") { var jSqlContext = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.SQLContext", new object[] { jvmSparkContextReference }); var jDataType = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jSqlContext, "parseDataType", new object[] { "\"" + returnType + "\"" })); var jbroadcastVariables = JvmBridgeUtils.GetJavaList<JvmObjectReference>(jvmBroadcastReferences); var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { }); var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { }); return new UDFIpcProxy(SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.UserDefinedPythonFunction", new object[] { name, command, hashTableReference, arrayListReference, SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(), "1.0", jbroadcastVariables, jvmAccumulatorReference, jDataType })); }
internal StructFieldIpcProxy(JvmObjectReference jvmStructFieldReference) { this.jvmStructFieldReference = jvmStructFieldReference; }
public IRDDProxy CheckpointFile(string filePath) { var jvmRddReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaContextReference, "checkpointFile", new object[] { filePath })); return new RDDIpcProxy(jvmRddReference); }