Exemple #1
0
        /// <summary>
        /// Create a PairwiseRDD.
        /// </summary>
        /// <param name="jvmReferenceOfByteArrayRdd"></param>
        /// <param name="numPartitions"></param>
        /// <param name="partitionFuncId">Global unique id of partitioner which is used for comparison PythonPartitioners in JVM.</param>
        /// <returns></returns>
        public IRDDProxy CreatePairwiseRDD(IRDDProxy jvmReferenceOfByteArrayRdd, int numPartitions, long partitionFuncId)
        {
            var rdd                 = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod((jvmReferenceOfByteArrayRdd as RDDIpcProxy).JvmRddReference, "rdd"));
            var pairwiseRdd         = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PairwiseRDD", rdd);
            var pairRddJvmReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(pairwiseRdd, "asJavaPairRDD", new object[] { }).ToString());

            var jpartitionerJavaReference      = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.python.PythonPartitioner", new object[] { numPartitions, partitionFuncId });
            var partitionedPairRddJvmReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(pairRddJvmReference, "partitionBy", new object[] { jpartitionerJavaReference }).ToString());
            var jvmRddReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.api.python.PythonRDD", "valueOfPair", new object[] { partitionedPairRddJvmReference }).ToString());

            //var jvmRddReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(partitionedRddJvmReference, "rdd", new object[] { }).ToString());
            return(new RDDIpcProxy(jvmRddReference));
        }
        private static T WrapAsType(JvmObjectReference reference)
        {
            ConstructorInfo constructor = typeof(T)
                                          .GetConstructors(BindingFlags.NonPublic | BindingFlags.Instance)
                                          .Single(c =>
            {
                ParameterInfo[] parameters = c.GetParameters();
                return((parameters.Length == 1) &&
                       (parameters[0].ParameterType == typeof(JvmObjectReference)));
            });

            return((T)constructor.Invoke(new object[] { reference }));
        }
        public IDStreamProxy EventHubsUnionStream(Dictionary <string, string> eventHubsParams, StorageLevelType storageLevelType)
        {
            JvmObjectReference eventHubsParamsReference  = JvmBridgeUtils.GetScalaMutableMap <string, string>(eventHubsParams);
            JvmObjectReference storageLevelTypeReference = SparkContextIpcProxy.GetJavaStorageLevel(storageLevelType);

            return
                (new DStreamIpcProxy(
                     new JvmObjectReference(
                         SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod(
                             "org.apache.spark.streaming.api.csharp.EventHubsUtils", "createUnionStream",
                             new object[] { jvmJavaStreamingReference, eventHubsParamsReference, storageLevelTypeReference })
                         .ToString())));
        }
Exemple #4
0
        public StreamingContextIpcProxy(SparkContext sparkContext, long durationMs)
        {
            this.sparkContext = sparkContext;
            sparkContextProxy = sparkContext.SparkContextProxy;
            var jduration = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { durationMs });

            JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference;

            jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration });
            jvmJavaStreamingReference    = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference });

            StartAccumulatorServer(sparkContext);
        }
        public IDataFrameProxy Agg(IGroupedDataProxy scalaGroupedDataReference, Dictionary <string, string> columnNameAggFunctionDictionary)
        {
            var mapReference = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.HashMap").ToString());

            foreach (var key in columnNameAggFunctionDictionary.Keys)
            {
                SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(mapReference, "put", new object[] { key, columnNameAggFunctionDictionary[key] });
            }
            return
                (new DataFrameIpcProxy(new JvmObjectReference(
                                           SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(
                                               (scalaGroupedDataReference as GroupedDataIpcProxy).ScalaGroupedDataReference, "agg", new object[] { mapReference }).ToString()), sqlContextProxy));
        }
        public StreamingContextIpcProxy(string checkpointPath)
        {
            jvmJavaStreamingReference    = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { checkpointPath });
            jvmStreamingContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "ssc"));
            JvmObjectReference jvmSparkContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "sc"));
            JvmObjectReference jvmSparkConfReference    = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "conf"));
            JvmObjectReference jvmJavaContextReference  = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "sparkContext"));

            sparkContextProxy = new SparkContextIpcProxy(jvmSparkContextReference, jvmJavaContextReference);
            var sparkConfProxy = new SparkConfIpcProxy(jvmSparkConfReference);

            sparkContext = new SparkContext(sparkContextProxy, new SparkConf(sparkConfProxy));
        }
Exemple #7
0
        public StorageLevel GetStorageLevel()
        {
            var rdd          = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "rdd"));
            var storageLevel = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(rdd, "getStorageLevel"));

            return(new StorageLevel
                   (
                       (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(storageLevel, "useDisk"),
                       (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(storageLevel, "useMemory"),
                       (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(storageLevel, "useOffHeap"),
                       (bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(storageLevel, "deserialized"),
                       (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(storageLevel, "replication")
                   ));
        }
Exemple #8
0
        /// <summary>
        /// Constructor mainly called by <see cref="PipelinedRDD{T}"/>.
        /// </summary>
        /// <param name="prevRddJvmObjRef">
        /// The reference to the RDD JVM object from which pipeline is created
        /// </param>
        /// <param name="sparkContext">SparkContext object</param>
        /// <param name="serializedMode">Serialization mode for the current RDD</param>
        /// <param name="prevSerializedMode">Serialization mode for the previous RDD</param>
        internal RDD(
            JvmObjectReference prevRddJvmObjRef,
            SparkContext sparkContext,
            SerializedMode serializedMode,
            SerializedMode prevSerializedMode)
        {
            // This constructor is called from PipelineRDD constructor
            // where the _jvmObject is not yet created.

            _prevRddJvmObjRef   = prevRddJvmObjRef;
            _sparkContext       = sparkContext;
            _serializedMode     = serializedMode;
            _prevSerializedMode = prevSerializedMode;
        }
        public bool CheckpointExists(string checkpointPath)
        {
            if (checkpointPath == null)
            {
                return(false);
            }

            var path = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", checkpointPath);
            var conf = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.conf.Configuration");
            var fs   = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(path, "getFileSystem", conf));

            return((bool)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(fs, "exists", path) &&
                   SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(fs, "listStatus", path) != null);
        }
Exemple #10
0
        public void SaveAsTextFile(string path, string compressionCodecClass)
        {
            var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "rdd"));

            if (!string.IsNullOrEmpty(compressionCodecClass))
            {
                var codec = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("java.lang.Class", "forName", new object[] { compressionCodecClass }));
                SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "saveAsTextFile", new object[] { path, codec });
            }
            else
            {
                SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmRddReference, "saveAsTextFile", new object[] { path });
            }
        }
        public IDStreamProxy CreateConstantInputDStream(IRDDProxy rddProxy)
        {
            var rddReference =
                new JvmObjectReference(
                    (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(((RDDIpcProxy)rddProxy).JvmRddReference, "rdd"));

            var jvmDStreamReference = SparkCLRIpcProxy.JvmBridge.CallConstructor(
                "org.apache.spark.streaming.api.csharp.CSharpConstantInputDStream", jvmStreamingContextReference, rddReference);

            var javaDStreamReference =
                new JvmObjectReference((String)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDStreamReference, "asJavaDStream"));

            return(new DStreamIpcProxy(javaDStreamReference, jvmDStreamReference));
        }
Exemple #12
0
        public IDataFrameProxy Join(IDataFrameProxy otherScalaDataFrameReference, string[] joinColumnNames)
        {
            var stringSequenceReference = new JvmObjectReference(
                SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "toSeq", new object[] { joinColumnNames }).ToString());

            return
                (new DataFrameIpcProxy(new JvmObjectReference(
                                           SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDataFrameReference, "join", new object[]
            {
                (otherScalaDataFrameReference as DataFrameIpcProxy).jvmDataFrameReference,
                stringSequenceReference
            }).ToString()
                                           ), sqlContextProxy));
        }
        public StreamingContextIpcProxy(SparkContext sparkContext, int durationSeconds)
        {
            this.sparkContext = sparkContext;
            sparkContextProxy = sparkContext.SparkContextProxy;
            var jduration = JvmBridgeUtils.GetJavaDuration(durationSeconds);

            JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference;

            jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration });
            jvmJavaStreamingReference    = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference });

            StartAccumulatorServer(sparkContext);
            StartCallbackServer();
        }
Exemple #14
0
        /// <summary>
        /// Creates the PythonFunction object on the JVM side wrapping the given command bytes.
        /// </summary>
        /// <param name="jvm">JVM bridge to use</param>
        /// <param name="command">Serialized command bytes</param>
        /// <returns>JvmObjectReference object to the PythonFunction object</returns>
        internal static JvmObjectReference CreatePythonFunction(IJvmBridge jvm, byte[] command)
        {
            JvmObjectReference arrayListReference = jvm.CallConstructor("java.util.ArrayList");

            return((JvmObjectReference)jvm.CallStaticJavaMethod(
                       "org.apache.spark.sql.api.dotnet.SQLUtils",
                       "createPythonFunction",
                       command,
                       CreateEnvVarsForPythonFunction(jvm),
                       arrayListReference, // Python includes
                       SparkEnvironment.ConfigurationService.GetWorkerExePath(),
                       Versions.CurrentVersion,
                       arrayListReference, // Broadcast variables
                       null));             // Accumulator
        }
        public StreamingContextIpcProxy(SparkContext sparkContext, long durationMs)
        {
            this.sparkContext = sparkContext;
            sparkContextProxy = sparkContext.SparkContextProxy;
            var jduration = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.Duration", new object[] { durationMs });

            JvmObjectReference jvmSparkContextReference = (sparkContextProxy as SparkContextIpcProxy).JvmSparkContextReference;

            jvmStreamingContextReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.StreamingContext", new object[] { jvmSparkContextReference, jduration });
            jvmJavaStreamingReference    = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { jvmStreamingContextReference });

            int port = StartCallback();

            SparkCLRIpcProxy.JvmBridge.CallStaticJavaMethod("SparkCLRHandler", "connectCallback", port); //className and methodName hardcoded in CSharpBackendHandler
        }
Exemple #16
0
        public IStructTypeProxy CreateStructType(List <StructField> fields)
        {
            var fieldsReference = fields.Select(s => (s.StructFieldProxy as StructFieldIpcProxy).JvmStructFieldReference).ToList().Cast <JvmObjectReference>();

            var seq =
                new JvmObjectReference(
                    JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils",
                                                   "toSeq", new object[] { fieldsReference }).ToString());

            return(new StructTypeIpcProxy(
                       new JvmObjectReference(
                           JvmBridge.CallStaticJavaMethod("org.apache.spark.sql.api.csharp.SQLUtils", "createStructType", new object[] { seq }).ToString()
                           )
                       ));
        }
Exemple #17
0
        public SparkJobInfo GetJobInfo(int jobId)
        {
            var jobInfoId = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStatusTrackerReference, "getJobInfo", new object[] { jobId });

            if (jobInfoId == null)
            {
                return(null);
            }

            JvmObjectReference jJobInfo = new JvmObjectReference((string)jobInfoId);

            int[]  stageIds = (int[])SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jJobInfo, "stageIds");
            string status   = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jJobInfo, "status").ToString();

            return(new SparkJobInfo(jobId, stageIds, status));
        }
Exemple #18
0
        /// <summary>
        /// <see cref="IndexConfig"/> specifies the configuration of an index.
        /// </summary>
        /// <param name="indexName">Index name.</param>
        /// <param name="indexedColumns">Columns from which an index is created.</param>
        /// <param name="includedColumns">Columns to be included in the index.</param>
        public IndexConfig(
            string indexName,
            IEnumerable <string> indexedColumns,
            IEnumerable <string> includedColumns)
        {
            IndexName       = indexName;
            IndexedColumns  = new List <string>(indexedColumns);
            IncludedColumns = new List <string>(includedColumns);

            _jvmObject = (JvmObjectReference)SparkEnvironment.JvmBridge.CallStaticJavaMethod(
                s_className,
                "apply",
                IndexName,
                IndexedColumns,
                IncludedColumns);
        }
Exemple #19
0
        internal SparkConf(JvmObjectReference jvmObject)
        {
            _jvmObject = jvmObject;

            // Special handling for debug mode because spark.master and spark.app.name will not
            // be set in debug mode. Driver code may override these values if SetMaster or
            // SetAppName methods are used.
            if (string.IsNullOrWhiteSpace(Get("spark.master", "")))
            {
                SetMaster("local");
            }
            if (string.IsNullOrWhiteSpace(Get("spark.app.name", "")))
            {
                SetAppName("debug app");
            }
        }
Exemple #20
0
        public void RegisterFunction(string name, byte[] command, string returnType)
        {
            var judf = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmSqlContextReference, "udf"));

            var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { });
            var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { });

            SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(judf, "registerPython",
                                                               new object[]
            {
                name, command, hashTableReference, arrayListReference,
                SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(),
                "1.0",
                arrayListReference, null, "\"" + returnType + "\""
            });
        }
Exemple #21
0
        public IDStreamProxy CreateCSharpInputDStream(byte[] func, string serializationMode)
        {
            var jvmDStreamReference = SparkCLRIpcProxy.JvmBridge.CallConstructor(
                "org.apache.spark.streaming.api.csharp.CSharpInputDStream",
                new object[]
            {
                jvmStreamingContextReference,
                func,
                serializationMode
            });

            var javaDStreamReference =
                new JvmObjectReference((String)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmDStreamReference, "asJavaDStream"));

            return(new DStreamIpcProxy(javaDStreamReference, jvmDStreamReference));
        }
Exemple #22
0
        public StreamingContextIpcProxy(string checkpointPath)
        {
            jvmJavaStreamingReference    = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.api.java.JavaStreamingContext", new object[] { checkpointPath });
            jvmStreamingContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "ssc"));
            JvmObjectReference jvmSparkContextReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "sc"));
            JvmObjectReference jvmSparkConfReference    = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStreamingContextReference, "conf"));
            JvmObjectReference jvmJavaContextReference  = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmJavaStreamingReference, "sparkContext"));

            sparkContextProxy = new SparkContextIpcProxy(jvmSparkContextReference, jvmJavaContextReference);
            var sparkConfProxy = new SparkConfIpcProxy(jvmSparkConfReference);

            sparkContext = new SparkContext(sparkContextProxy, new SparkConf(sparkConfProxy));

            // TODO: We don't know whether accumulator variable is used before restart. We just start accumuator server for safety.
            sparkContext.StartAccumulatorServer();
        }
Exemple #23
0
        private static JvmObjectReference CreateEnvVarsForPythonFunction(IJvmBridge jvm)
        {
            JvmObjectReference environmentVars    = jvm.CallConstructor("java.util.Hashtable");
            string             assemblySearchPath = Environment.GetEnvironmentVariable(
                AssemblySearchPathResolver.AssemblySearchPathsEnvVarName);

            if (!string.IsNullOrEmpty(assemblySearchPath))
            {
                jvm.CallNonStaticJavaMethod(
                    environmentVars,
                    "put",
                    AssemblySearchPathResolver.AssemblySearchPathsEnvVarName,
                    assemblySearchPath);
            }

            return(environmentVars);
        }
Exemple #24
0
        public IUDFProxy CreateUserDefinedCSharpFunction(string name, byte[] command, string returnType = "string")
        {
            var jSqlContext         = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.SQLContext", new object[] { jvmSparkContextReference });
            var jDataType           = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jSqlContext, "parseDataType", new object[] { "\"" + returnType + "\"" }));
            var jbroadcastVariables = JvmBridgeUtils.GetJavaList <JvmObjectReference>(jvmBroadcastReferences);

            var hashTableReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { });
            var arrayListReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { });

            return(new UDFIpcProxy(SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.sql.UserDefinedPythonFunction",
                                                                              new object[]
            {
                name, command, hashTableReference, arrayListReference,
                SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(),
                "1.0",
                jbroadcastVariables, jvmAccumulatorReference, jDataType
            })));
        }
Exemple #25
0
        public IRDDProxy CreateCSharpRdd(IRDDProxy prevJvmRddReference, byte[] command, Dictionary <string, string> environmentVariables, List <string> pythonIncludes, bool preservesPartitioning, List <Broadcast> broadcastVariables, List <byte[]> accumulator)
        {
            var hashTableReference  = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.Hashtable", new object[] { });
            var arrayListReference  = SparkCLRIpcProxy.JvmBridge.CallConstructor("java.util.ArrayList", new object[] { });
            var jbroadcastVariables = JvmBridgeUtils.GetJavaList <JvmObjectReference>(jvmBroadcastReferences);

            var rdd = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod((prevJvmRddReference as RDDIpcProxy).JvmRddReference, "rdd"));

            var csRdd = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.api.csharp.CSharpRDD",
                                                                   new object[]
            {
                rdd, command, hashTableReference, arrayListReference, preservesPartitioning,
                SparkCLREnvironment.ConfigurationService.GetCSharpWorkerExePath(),
                "1.0",
                jbroadcastVariables, jvmAccumulatorReference
            });

            return(new RDDIpcProxy(new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(csRdd, "asJavaRDD"))));
        }
Exemple #26
0
        public IDStreamProxy DirectKafkaStream(List <string> topics, Dictionary <string, string> kafkaParams, Dictionary <string, long> fromOffsets)
        {
            JvmObjectReference jtopics      = SparkContextIpcProxy.GetJavaSet <string>(topics);
            JvmObjectReference jkafkaParams = SparkContextIpcProxy.GetJavaMap <string, string>(kafkaParams);

            var jTopicAndPartitions = fromOffsets.Select(x =>
                                                         new KeyValuePair <JvmObjectReference, long>
                                                         (
                                                             SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Key.Split(':')[0], int.Parse(x.Key.Split(':')[1]) }),
                                                             x.Value
                                                         )
                                                         );

            JvmObjectReference jfromOffsets = SparkContextIpcProxy.GetJavaMap <JvmObjectReference, long>(jTopicAndPartitions);
            JvmObjectReference jhelper      = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper", new object[] { });
            var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createDirectStream", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jfromOffsets }).ToString());

            return(new DStreamIpcProxy(jstream));
        }
Exemple #27
0
        public SparkStageInfo GetStageInfo(int stageId)
        {
            var stageInfoId = SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmStatusTrackerReference, "getStageInfo", new object[] { stageId });

            if (stageInfoId == null)
            {
                return(null);
            }

            JvmObjectReference jStageInfo = new JvmObjectReference((string)stageInfoId);
            int    currentAttemptId       = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "currentAttemptId");
            int    submissionTime         = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "submissionTime");
            string name              = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "name");
            int    numTasks          = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "numTasks");
            int    numActiveTasks    = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "numActiveTasks");
            int    numCompletedTasks = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "numCompletedTasks");
            int    numFailedTasks    = (int)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jStageInfo, "numFailedTasks");

            return(new SparkStageInfo(stageId, currentAttemptId, (long)submissionTime, name, numTasks, numActiveTasks, numCompletedTasks, numFailedTasks));
        }
        /// <summary>
        /// List the names of all the files under the given path.
        /// </summary>
        public IEnumerable <string> EnumerateFiles(string path)
        {
            var pathJvmReference = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.hadoop.fs.Path", path);
            var statusList       = (List <JvmObjectReference>)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jvmHdfsReference, "listStatus", pathJvmReference);

            if (statusList == null || statusList.Count == 0)
            {
                return(new string[0]);
            }

            var files = new string[statusList.Count];

            for (var i = 0; i < statusList.Count; i++)
            {
                var subPathJvmReference = new JvmObjectReference((string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(statusList[i], "getPath"));
                files[i] = (string)SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(subPathJvmReference, "getName");
            }

            return(files);
        }
        public IDStreamProxy DirectKafkaStreamWithRepartition(List <string> topics, Dictionary <string, string> kafkaParams, Dictionary <string, long> fromOffsets, int numPartitions)
        {
            JvmObjectReference jtopics      = JvmBridgeUtils.GetJavaSet <string>(topics);
            JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap <string, string>(kafkaParams);

            var jTopicAndPartitions = fromOffsets.Select(x =>
                                                         new KeyValuePair <JvmObjectReference, long>
                                                         (
                                                             SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Key.Split(':')[0], int.Parse(x.Key.Split(':')[1]) }),
                                                             x.Value
                                                         )
                                                         );

            JvmObjectReference jfromOffsets = JvmBridgeUtils.GetJavaMap <JvmObjectReference, long>(jTopicAndPartitions);
            // SparkCLR\scala\src\main\org\apache\spark\streaming\api\kafka\KafkaUtilsCSharpHelper.scala
            JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsCSharpHelper", new object[] { });
            var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createDirectStreamWithoutMessageHandler", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jfromOffsets, (int)numPartitions }).ToString());

            return(new DStreamIpcProxy(jstream));
        }
Exemple #30
0
        public IDStreamProxy DirectKafkaStream(List <string> topics, IEnumerable <Tuple <string, string> > kafkaParams, IEnumerable <Tuple <string, long> > fromOffsets)
        {
            JvmObjectReference jtopics      = JvmBridgeUtils.GetJavaSet <string>(topics);
            JvmObjectReference jkafkaParams = JvmBridgeUtils.GetJavaMap <string, string>(kafkaParams);

            var jTopicAndPartitions = fromOffsets.Select(x =>
                                                         new Tuple <JvmObjectReference, long>
                                                         (
                                                             SparkCLRIpcProxy.JvmBridge.CallConstructor("kafka.common.TopicAndPartition", new object[] { x.Item1.Split(':')[0], int.Parse(x.Item1.Split(':')[1]) }),
                                                             x.Item2
                                                         )
                                                         );

            JvmObjectReference jfromOffsets = JvmBridgeUtils.GetJavaMap <JvmObjectReference, long>(jTopicAndPartitions);
            // KafkaUtilsPythonHelper: external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala
            JvmObjectReference jhelper = SparkCLRIpcProxy.JvmBridge.CallConstructor("org.apache.spark.streaming.kafka.KafkaUtilsPythonHelper", new object[] { });
            var jstream = new JvmObjectReference(SparkCLRIpcProxy.JvmBridge.CallNonStaticJavaMethod(jhelper, "createDirectStreamWithoutMessageHandler", new object[] { jvmJavaStreamingReference, jkafkaParams, jtopics, jfromOffsets }).ToString());

            return(new DStreamIpcProxy(jstream));
        }