コード例 #1
0
 /// <summary>
 /// Parses a JSON string to create a <see cref="JvmObjectReference"/>.
 /// It references a <see cref="StructType"/> on the JVM side.
 /// </summary>
 /// <param name="jvm">JVM bridge to use</param>
 /// <param name="json">JSON string to parse</param>
 /// <returns>The new JvmObjectReference created from the JSON string</returns>
 internal static JvmObjectReference FromJson(IJvmBridge jvm, string json)
 {
     return((JvmObjectReference)jvm.CallStaticJavaMethod(
                "org.apache.spark.sql.types.DataType",
                "fromJson",
                json));
 }
コード例 #2
0
ファイル: UdfUtils.cs プロジェクト: zwhrenxiaoxue/spark
        private static IJvmObjectReferenceProvider CreateEnvVarsForPythonFunction(IJvmBridge jvm)
        {
            var    environmentVars    = new Hashtable(jvm);
            string assemblySearchPath = Environment.GetEnvironmentVariable(
                AssemblySearchPathResolver.AssemblySearchPathsEnvVarName);

            if (!string.IsNullOrEmpty(assemblySearchPath))
            {
                environmentVars.Put(
                    AssemblySearchPathResolver.AssemblySearchPathsEnvVarName,
                    assemblySearchPath);
            }
            // DOTNET_WORKER_SPARK_VERSION is used to handle different versions
            // of Spark on the worker.
            environmentVars.Put(
                "DOTNET_WORKER_SPARK_VERSION",
                SparkEnvironment.SparkVersion.ToString());

            if (EnvironmentUtils.GetEnvironmentVariableAsBool(Constants.RunningREPLEnvVar))
            {
                environmentVars.Put(Constants.RunningREPLEnvVar, "true");
            }

            return(environmentVars);
        }
コード例 #3
0
ファイル: CallbackServer.cs プロジェクト: zwhrenxiaoxue/spark
        internal CallbackServer(IJvmBridge jvm, bool run = true)
        {
            AppDomain.CurrentDomain.ProcessExit += (s, e) => Shutdown();
            _jvm = jvm;

            if (run)
            {
                Run();
            }
        }
コード例 #4
0
        /// <summary>
        /// Construct the JvmThreadPoolGC.
        /// </summary>
        /// <param name="loggerService">Logger service.</param>
        /// <param name="jvmBridge">The JvmBridge used to call JVM methods.</param>
        /// <param name="threadGCInterval">The interval to GC finished threads.</param>
        public JvmThreadPoolGC(ILoggerService loggerService, IJvmBridge jvmBridge, TimeSpan threadGCInterval)
        {
            _loggerService    = loggerService;
            _jvmBridge        = jvmBridge;
            _threadGCInterval = threadGCInterval;
            _activeThreads    = new ConcurrentDictionary <int, Thread>();

            _activeThreadGCTimerLock = new object();
            _activeThreadGCTimer     = null;
        }
コード例 #5
0
        /// <summary>
        /// This function may be used to get or instantiate a SparkContext and register it as a
        /// singleton object. Because we can only have one active SparkContext per JVM,
        /// this is useful when applications may wish to share a SparkContext.
        /// </summary>
        /// <param name="conf"><see cref="SparkConf"/> that will be used for creating SparkContext
        /// </param>
        /// <returns>
        /// Current SparkContext (or a new one if it wasn't created before the function call)
        /// </returns>
        public static SparkContext GetOrCreate(SparkConf conf)
        {
            IJvmBridge jvm = ((IJvmObjectReferenceProvider)conf).Reference.Jvm;

            return(new SparkContext(
                       (JvmObjectReference)jvm.CallStaticJavaMethod(
                           "org.apache.spark.SparkContext",
                           "getOrCreate",
                           conf)));
        }
コード例 #6
0
        /// <summary>
        /// Constructor for the JvmObjectReference class.
        /// </summary>
        /// <param name="id">Id for the JVM object</param>
        /// <param name="jvm">IJvmBridge instance that created the JVM object</param>
        internal JvmObjectReference(string id, IJvmBridge jvm)
        {
            if (id is null)
            {
                throw new ArgumentNullException("JvmReferenceId cannot be null.");
            }

            Id = new JvmObjectId(id, jvm);

            _creationTime = DateTime.UtcNow;
        }
コード例 #7
0
ファイル: Broadcast.cs プロジェクト: zwhrenxiaoxue/spark
        /// <summary>
        /// Function that creates a temporary directory inside the given directory and returns the
        /// absolute filepath of temporary file name in that directory.
        /// </summary>
        /// <param name="conf">SparkConf object</param>
        /// <returns>Absolute filepath of the created random file</returns>
        private string CreateTempFilePath(SparkConf conf)
        {
            IJvmBridge jvm      = ((IJvmObjectReferenceProvider)conf).Reference.Jvm;
            var        localDir = (string)jvm.CallStaticJavaMethod(
                "org.apache.spark.util.Utils",
                "getLocalDir",
                conf);
            string dir = Path.Combine(localDir, "sparkdotnet");

            Directory.CreateDirectory(dir);
            return(Path.Combine(dir, Path.GetRandomFileName()));
        }
コード例 #8
0
        /// <summary>
        /// Creates the PythonFunction object on the JVM side wrapping the given command bytes.
        /// </summary>
        /// <param name="jvm">JVM bridge to use</param>
        /// <param name="command">Serialized command bytes</param>
        /// <returns>JvmObjectReference object to the PythonFunction object</returns>
        internal static JvmObjectReference CreatePythonFunction(IJvmBridge jvm, byte[] command)
        {
            var arrayList = new ArrayList(jvm);

            return((JvmObjectReference)jvm.CallStaticJavaMethod(
                       "org.apache.spark.sql.api.dotnet.SQLUtils",
                       "createPythonFunction",
                       command,
                       CreateEnvVarsForPythonFunction(jvm),
                       arrayList, // Python includes
                       SparkEnvironment.ConfigurationService.GetWorkerExePath(),
                       Versions.CurrentVersion,
                       arrayList, // Broadcast variables
                       null));    // Accumulator
        }
コード例 #9
0
ファイル: Broadcast.cs プロジェクト: viskumar-microsoft/spark
        /// <summary>
        /// Function to create the Broadcast variable (org.apache.spark.broadcast.Broadcast)
        /// </summary>
        /// <param name="sc">SparkContext object of type <see cref="SparkContext"/></param>
        /// <param name="value">Broadcast value of type object</param>
        /// <returns>Returns broadcast variable of type <see cref="JvmObjectReference"/></returns>
        private JvmObjectReference CreateBroadcast(SparkContext sc, T value)
        {
            IJvmBridge jvm = ((IJvmObjectReferenceProvider)sc).Reference.Jvm;
            var        javaSparkContext = (JvmObjectReference)jvm.CallStaticJavaMethod(
                "org.apache.spark.api.java.JavaSparkContext",
                "fromSparkContext",
                sc);

            Version version = SparkEnvironment.SparkVersion;

            return((version.Major, version.Minor) switch
            {
                (2, 4) => CreateBroadcast_V2_4_X(javaSparkContext, sc, value),
                (3, _) => CreateBroadcast_V2_4_X(javaSparkContext, sc, value),
                _ => throw new NotSupportedException($"Spark {version} not supported.")
            });
コード例 #10
0
ファイル: UdfUtils.cs プロジェクト: imback82/spark-3
        /// <summary>
        /// Creates the PythonFunction object on the JVM side wrapping the given command bytes.
        /// </summary>
        /// <param name="jvm">JVM bridge to use</param>
        /// <param name="command">Serialized command bytes</param>
        /// <returns>JvmObjectReference object to the PythonFunction object</returns>
        internal static JvmObjectReference CreatePythonFunction(IJvmBridge jvm, byte[] command)
        {
            JvmObjectReference hashTableReference = jvm.CallConstructor("java.util.Hashtable");
            JvmObjectReference arrayListReference = jvm.CallConstructor("java.util.ArrayList");

            return((JvmObjectReference)jvm.CallStaticJavaMethod(
                       "org.apache.spark.sql.api.dotnet.SQLUtils",
                       "createPythonFunction",
                       command,
                       hashTableReference, // Environment variables
                       arrayListReference, // Python includes
                       SparkEnvironment.ConfigurationService.GetWorkerExePath(),
                       "1.0",
                       arrayListReference, // Broadcast variables
                       null));             // Accumulator
        }
コード例 #11
0
ファイル: UdfUtils.cs プロジェクト: yannify/spark
        private static JvmObjectReference CreateEnvVarsForPythonFunction(IJvmBridge jvm)
        {
            JvmObjectReference environmentVars    = jvm.CallConstructor("java.util.Hashtable");
            string             assemblySearchPath = Environment.GetEnvironmentVariable(
                AssemblySearchPathResolver.AssemblySearchPathsEnvVarName);

            if (!string.IsNullOrEmpty(assemblySearchPath))
            {
                jvm.CallNonStaticJavaMethod(
                    environmentVars,
                    "put",
                    AssemblySearchPathResolver.AssemblySearchPathsEnvVarName,
                    assemblySearchPath);
            }

            return(environmentVars);
        }
コード例 #12
0
 internal static UserDefinedFunction Create(
     IJvmBridge jvm,
     string name,
     byte[] command,
     UdfUtils.PythonEvalType evalType,
     string returnType)
 {
     return(new UserDefinedFunction(
                jvm.CallConstructor(
                    "org.apache.spark.sql.execution.python.UserDefinedPythonFunction",
                    name,
                    UdfUtils.CreatePythonFunction(jvm, command),
                    DataType.FromJson(jvm, returnType),
                    (int)evalType,
                    true // udfDeterministic
                    )));
 }
コード例 #13
0
ファイル: UdfUtils.cs プロジェクト: ScriptBox21/dotnet-spark
        /// <summary>
        /// Creates the PythonFunction object on the JVM side wrapping the given command bytes.
        /// </summary>
        /// <param name="jvm">JVM bridge to use</param>
        /// <param name="command">Serialized command bytes</param>
        /// <returns>JvmObjectReference object to the PythonFunction object</returns>
        internal static JvmObjectReference CreatePythonFunction(IJvmBridge jvm, byte[] command)
        {
            var arrayList          = new ArrayList(jvm);
            var broadcastVariables = new ArrayList(jvm);

            broadcastVariables.AddAll(JvmBroadcastRegistry.GetAll());
            JvmBroadcastRegistry.Clear();

            return((JvmObjectReference)jvm.CallStaticJavaMethod(
                       "org.apache.spark.sql.api.dotnet.SQLUtils",
                       "createPythonFunction",
                       command,
                       CreateEnvVarsForPythonFunction(jvm),
                       arrayList, // Python includes
                       SparkEnvironment.ConfigurationService.GetWorkerExePath(),
                       // Used to check the compatibility of UDFs between the driver and worker.
                       AssemblyInfoProvider.MicrosoftSparkAssemblyInfo().AssemblyVersion,
                       broadcastVariables,
                       null)); // Accumulator
        }
コード例 #14
0
        private static IJvmObjectReferenceProvider CreateEnvVarsForPythonFunction(IJvmBridge jvm)
        {
            var    environmentVars    = new Hashtable(jvm);
            string assemblySearchPath = string.Join(",",
                                                    new[]
            {
                Environment.GetEnvironmentVariable(
                    AssemblySearchPathResolver.AssemblySearchPathsEnvVarName),
                SparkFiles.GetRootDirectory()
            }.Where(s => !string.IsNullOrWhiteSpace(s)));

            if (!string.IsNullOrEmpty(assemblySearchPath))
            {
                environmentVars.Put(
                    AssemblySearchPathResolver.AssemblySearchPathsEnvVarName,
                    assemblySearchPath);
            }

            return(environmentVars);
        }
コード例 #15
0
        private static IJvmObjectReferenceProvider CreateEnvVarsForPythonFunction(IJvmBridge jvm)
        {
            var    environmentVars    = new Hashtable(jvm);
            string assemblySearchPath = string.Join(",",
                                                    new[]
            {
                Environment.GetEnvironmentVariable(
                    AssemblySearchPathResolver.AssemblySearchPathsEnvVarName),
                SparkFiles.GetRootDirectory()
            }.Where(s => !string.IsNullOrWhiteSpace(s)));

            if (!string.IsNullOrEmpty(assemblySearchPath))
            {
                environmentVars.Put(
                    AssemblySearchPathResolver.AssemblySearchPathsEnvVarName,
                    assemblySearchPath);
            }
            // DOTNET_WORKER_SPARK_VERSION is used to handle different versions of Spark on the worker.
            environmentVars.Put("DOTNET_WORKER_SPARK_VERSION", SparkEnvironment.SparkVersion.ToString());

            return(environmentVars);
        }
コード例 #16
0
        internal static UserDefinedFunction Create(
            string name,
            byte[] command,
            UdfUtils.PythonEvalType evalType,
            string returnType)
        {
            IJvmBridge jvm = SparkEnvironment.JvmBridge;

            JvmObjectReference hashTableReference = jvm.CallConstructor("java.util.Hashtable");
            JvmObjectReference arrayListReference = jvm.CallConstructor("java.util.ArrayList");

            var dataType = (JvmObjectReference)jvm.CallStaticJavaMethod(
                "org.apache.spark.sql.types.DataType",
                "fromJson",
                $"\"{returnType}\"");

            var pythonFunction = (JvmObjectReference)jvm.CallStaticJavaMethod(
                "org.apache.spark.sql.api.dotnet.SQLUtils",
                "createPythonFunction",
                command,
                hashTableReference, // Environment variables
                arrayListReference, // Python includes
                SparkEnvironment.ConfigurationService.GetWorkerExePath(),
                "1.0",
                arrayListReference, // Broadcast variables
                null);              // Accumulator

            return(new UserDefinedFunction(
                       jvm.CallConstructor(
                           "org.apache.spark.sql.execution.python.UserDefinedPythonFunction",
                           name,
                           pythonFunction,
                           dataType,
                           (int)evalType,
                           true // udfDeterministic
                           )));
        }
コード例 #17
0
        internal static UserDefinedFunction Create(
            IJvmBridge jvm,
            string name,
            byte[] command,
            UdfUtils.PythonEvalType evalType,
            string returnType)
        {
            var pythonFunction = UdfUtils.CreatePythonFunction(jvm, command);

            var dataType = (JvmObjectReference)jvm.CallStaticJavaMethod(
                "org.apache.spark.sql.types.DataType",
                "fromJson",
                $"{returnType}");

            return(new UserDefinedFunction(
                       jvm.CallConstructor(
                           "org.apache.spark.sql.execution.python.UserDefinedPythonFunction",
                           name,
                           pythonFunction,
                           dataType,
                           (int)evalType,
                           true // udfDeterministic
                           )));
        }
コード例 #18
0
 /// <summary>
 /// Create a <c>java.util.Properties</c> JVM object
 /// </summary>
 /// <param name="jvm">JVM bridge to use</param>
 internal Properties(IJvmBridge jvm) =>
コード例 #19
0
 public Hyperspace(SparkSession spark)
 {
     _spark     = spark;
     _jvmBridge = ((IJvmObjectReferenceProvider)spark).Reference.Jvm;
     _jvmObject = _jvmBridge.CallConstructor(s_hyperspaceClassName, spark);
 }
コード例 #20
0
 public JvmThreadPoolGCTests(SparkFixture fixture)
 {
     _loggerService = LoggerServiceFactory.GetLogger(typeof(JvmThreadPoolGCTests));
     _spark         = fixture.Spark;
     _jvmBridge     = _spark.Reference.Jvm;
 }
コード例 #21
0
 /// <summary>
 /// Create a <c>java.util.Hashtable</c> JVM object
 /// </summary>
 /// <param name="jvm">JVM bridge to use</param>
 internal Hashtable(IJvmBridge jvm) =>
コード例 #22
0
 public JvmThreadPoolGCTests(SparkFixture fixture)
 {
     _loggerService = LoggerServiceFactory.GetLogger(typeof(JvmThreadPoolGCTests));
     _spark         = fixture.Spark;
     _jvmBridge     = ((IJvmObjectReferenceProvider)_spark).Reference.Jvm;
 }
コード例 #23
0
 internal ForeachBatchCallbackHandler(IJvmBridge jvm, Action <DataFrame, long> func)
 {
     _jvm  = jvm;
     _func = func;
 }
コード例 #24
0
 /// <summary>
 /// Constructor for JvmObjectId class.
 /// </summary>
 /// <param name="id">Unique identifier</param>
 /// <param name="jvm">JVM bridge object</param>
 internal JvmObjectId(string id, IJvmBridge jvm)
 {
     Id = id;
     Jvm = jvm;
 }
コード例 #25
0
 public Hyperspace(SparkSession spark)
 {
     _spark     = spark;
     _jvmBridge = spark.Reference.Jvm;
     Reference  = _jvmBridge.CallConstructor(s_hyperspaceClassName, spark);
 }
コード例 #26
0
ファイル: ArrayList.cs プロジェクト: ScriptBox21/dotnet-spark
 /// <summary>
 /// Create a <c>java.util.ArrayList</c> JVM object
 /// </summary>
 /// <param name="jvm">JVM bridge to use</param>
 internal ArrayList(IJvmBridge jvm)
 {
     Reference = jvm.CallConstructor("java.util.ArrayList");
 }
コード例 #27
0
ファイル: HashMap.cs プロジェクト: dotnet/spark
 /// <summary>
 /// Create a <c>java.util.HashMap</c> JVM object
 /// </summary>
 /// <param name="jvm">JVM bridge to use</param>
 internal HashMap(IJvmBridge jvm) =>
コード例 #28
0
        public SparkFixture()
        {
            // The worker directory must be set for the Microsoft.Spark.Worker executable.
            if (string.IsNullOrEmpty(
                    Environment.GetEnvironmentVariable(EnvironmentVariableNames.WorkerDir)))
            {
                throw new Exception(
                          $"Environment variable '{EnvironmentVariableNames.WorkerDir}' must be set.");
            }

            BuildSparkCmd(out var filename, out var args);

            // Configure the process using the StartInfo properties.
            _process.StartInfo.FileName  = filename;
            _process.StartInfo.Arguments = args;
            // UseShellExecute defaults to true in .NET Framework,
            // but defaults to false in .NET Core. To support both, set it
            // to false which is required for stream redirection.
            _process.StartInfo.UseShellExecute        = false;
            _process.StartInfo.RedirectStandardInput  = true;
            _process.StartInfo.RedirectStandardOutput = true;
            _process.StartInfo.RedirectStandardError  = true;

            bool isSparkReady = false;

            _process.OutputDataReceived += (sender, arguments) =>
            {
                // Scala-side driver for .NET emits the following message after it is
                // launched and ready to accept connections.
                if (!isSparkReady &&
                    arguments.Data.Contains("Backend running debug mode"))
                {
                    isSparkReady = true;
                }
            };

            _process.Start();
            _process.BeginErrorReadLine();
            _process.BeginOutputReadLine();

            bool processExited = false;

            while (!isSparkReady && !processExited)
            {
                processExited = _process.WaitForExit(500);
            }

            if (processExited)
            {
                _process.Dispose();

                // The process should not have been exited.
                throw new Exception(
                          $"Process exited prematurely with '{filename} {args}'.");
            }

            Spark = SparkSession
                    .Builder()
                    // Lower the shuffle partitions to speed up groupBy() operations.
                    .Config("spark.sql.shuffle.partitions", "3")
                    .Config("spark.ui.enabled", false)
                    .Config("spark.ui.showConsoleProgress", false)
                    .AppName("Microsoft.Spark.E2ETest")
                    .GetOrCreate();

            Spark.SparkContext.SetLogLevel(DefaultLogLevel);

            Jvm = Spark.Reference.Jvm;
        }
コード例 #29
0
 /// <summary>
 /// Create a <c>java.util.ArrayList</c> JVM object
 /// </summary>
 /// <param name="jvm">JVM bridge to use</param>
 internal ArrayList(IJvmBridge jvm)
 {
     _jvmObject = jvm.CallConstructor("java.util.ArrayList");
 }