Beispiel #1
0
        /// <summary>
        /// Returns the paths to search when loading assemblies in the following order of
        /// precedence:
        /// 1) Comma-separated paths specified in DOTNET_ASSEMBLY_SEARCH_PATHS environment
        /// variable. Note that if a path starts with ".", the working directory will be prepended.
        /// 2) The path of the files added through
        /// <see cref="SparkContext.AddFile(string, bool)"/>.
        /// 3) The working directory.
        /// 4) The directory of the application.
        /// </summary>
        /// <remarks>
        /// The reason that the working directory has higher precedence than the directory
        /// of the application is for cases when spark is launched on YARN. The executors are run
        /// inside 'containers' and files that are passed via 'spark-submit --files' will be pushed
        /// to these 'containers'. This path is the working directory and the 1st probing path that
        /// will be checked.
        /// </remarks>
        /// <returns>Assembly search paths</returns>
        internal static string[] GetAssemblySearchPaths()
        {
            var    searchPaths    = new List <string>();
            string searchPathsStr =
                Environment.GetEnvironmentVariable(AssemblySearchPathsEnvVarName);

            if (!string.IsNullOrEmpty(searchPathsStr))
            {
                foreach (string searchPath in searchPathsStr.Split(','))
                {
                    string trimmedSearchPath = searchPath.Trim();
                    if (trimmedSearchPath.StartsWith("."))
                    {
                        searchPaths.Add(
                            Path.Combine(Directory.GetCurrentDirectory(), trimmedSearchPath));
                    }
                    else
                    {
                        searchPaths.Add(trimmedSearchPath);
                    }
                }
            }

            string sparkFilesPath = SparkFiles.GetRootDirectory();

            if (!string.IsNullOrWhiteSpace(sparkFilesPath))
            {
                searchPaths.Add(sparkFilesPath);
            }

            searchPaths.Add(Directory.GetCurrentDirectory());
            searchPaths.Add(AppDomain.CurrentDomain.BaseDirectory);

            return(searchPaths.ToArray());
        }
        public void TestAssemblySearchPathResolver()
        {
            string sparkFilesDir = SparkFiles.GetRootDirectory();
            string curDir        = Directory.GetCurrentDirectory();
            string appDir        = AppDomain.CurrentDomain.BaseDirectory;

            // Test the default scenario.
            string[] searchPaths = AssemblySearchPathResolver.GetAssemblySearchPaths();
            Assert.Equal(new[] { sparkFilesDir, curDir, appDir }, searchPaths);

            // Test the case where DOTNET_ASSEMBLY_SEARCH_PATHS is defined.
            char sep = Path.PathSeparator;

            Environment.SetEnvironmentVariable(
                AssemblySearchPathResolver.AssemblySearchPathsEnvVarName,
                $"mydir1, mydir2, .{sep}mydir3,.{sep}mydir4");

            searchPaths = AssemblySearchPathResolver.GetAssemblySearchPaths();
            Assert.Equal(
                new[] {
                "mydir1",
                "mydir2",
                Path.Combine(curDir, $".{sep}mydir3"),
                Path.Combine(curDir, $".{sep}mydir4"),
                sparkFilesDir,
                curDir,
                appDir
            },
                searchPaths);

            Environment.SetEnvironmentVariable(
                AssemblySearchPathResolver.AssemblySearchPathsEnvVarName,
                null);
        }
        /// <summary>
        /// In a dotnet-interactive REPL session (driver), nuget dependencies will be
        /// systematically added using <see cref="SparkContext.AddFile(string, bool)"/>.
        ///
        /// These files include:
        /// - "{packagename}.{version}.nupkg"
        ///   The nuget packages
        /// - <see cref="DependencyProviderUtils.CreateFileName(long)"/>
        ///   Serialized <see cref="DependencyProviderUtils.Metadata"/> object.
        ///
        /// On the Worker, in order to resolve the nuget dependencies referenced by
        /// the dotnet-interactive session, we instantiate a
        /// <see cref="DotNet.DependencyManager.DependencyProvider"/>.
        /// This provider will register an event handler to the Assembly Load Resolving event.
        /// By using <see cref="SparkFiles.GetRootDirectory"/>, we can access the
        /// required files added to the <see cref="SparkContext"/>.
        /// </summary>
        internal static void RegisterAssemblyHandler()
        {
            if (!s_runningREPL)
            {
                return;
            }

            string sparkFilesPath = SparkFiles.GetRootDirectory();

            string[] metadataFiles =
                DependencyProviderUtils.GetMetadataFiles(sparkFilesPath);
            foreach (string metdatafile in metadataFiles)
            {
                // The execution of the delegate passed to GetOrAdd is not guaranteed to run once.
                // Multiple Lazy objects may be created, but only one of them will be added to the
                // ConcurrentDictionary. The Lazy value is retrieved to materialize the
                // DependencyProvider object if it hasn't already been created.
                Lazy <DependencyProvider> dependecyProvider = s_dependencyProviders.GetOrAdd(
                    metdatafile,
                    mdf => new Lazy <DependencyProvider>(
                        () =>
                {
                    s_logger.LogInfo($"Creating {nameof(DependencyProvider)} using {mdf}");
                    return(new DependencyProvider(
                               mdf,
                               sparkFilesPath,
                               Directory.GetCurrentDirectory()));
                }));
                _ = dependecyProvider.Value;
            }
        }
Beispiel #4
0
        /// <summary>
        /// Processes the given stream to construct a Payload object.
        /// </summary>
        /// <param name="stream">The stream to read from</param>
        /// <returns>
        /// Returns a valid payload object if the stream contains all the necessary data.
        /// Returns null if the stream is already closed at the beginning of the read.
        /// </returns>
        internal Payload Process(Stream stream)
        {
            var payload = new Payload();

            byte[] splitIndexBytes;
            try
            {
                splitIndexBytes = SerDe.ReadBytes(stream, sizeof(int));
                // For socket stream, read on the stream returns 0, which
                // SerDe.ReadBytes() returns as null to denote the stream is closed.
                if (splitIndexBytes == null)
                {
                    return(null);
                }
            }
            catch (ObjectDisposedException)
            {
                // For stream implementation such as MemoryStream will throw
                // ObjectDisposedException if the stream is already closed.
                return(null);
            }

            payload.SplitIndex = BinaryPrimitives.ReadInt32BigEndian(splitIndexBytes);
            payload.Version    = SerDe.ReadString(stream);

            payload.TaskContext = new TaskContextProcessor(_version).Process(stream);
            TaskContextHolder.Set(payload.TaskContext);

            payload.SparkFilesDir = SerDe.ReadString(stream);
            SparkFiles.SetRootDirectory(payload.SparkFilesDir);

            // Register additional assembly handlers after SparkFilesDir has been set
            // and before any deserialization occurs. BroadcastVariableProcessor may
            // deserialize objects from assemblies that are not currently loaded within
            // our current context.
            AssemblyLoaderHelper.RegisterAssemblyHandler();

            if (ConfigurationService.IsDatabricks)
            {
                SerDe.ReadString(stream);
                SerDe.ReadString(stream);
            }

            payload.IncludeItems       = ReadIncludeItems(stream);
            payload.BroadcastVariables = new BroadcastVariableProcessor(_version).Process(stream);

            // TODO: Accumulate registration should be done here.

            payload.Command = new CommandProcessor(_version).Process(stream);

            return(payload);
        }
Beispiel #5
0
        private static IJvmObjectReferenceProvider CreateEnvVarsForPythonFunction(IJvmBridge jvm)
        {
            var    environmentVars    = new Hashtable(jvm);
            string assemblySearchPath = string.Join(",",
                                                    new[]
            {
                Environment.GetEnvironmentVariable(
                    AssemblySearchPathResolver.AssemblySearchPathsEnvVarName),
                SparkFiles.GetRootDirectory()
            }.Where(s => !string.IsNullOrWhiteSpace(s)));

            if (!string.IsNullOrEmpty(assemblySearchPath))
            {
                environmentVars.Put(
                    AssemblySearchPathResolver.AssemblySearchPathsEnvVarName,
                    assemblySearchPath);
            }

            return(environmentVars);
        }
Beispiel #6
0
        private static IJvmObjectReferenceProvider CreateEnvVarsForPythonFunction(IJvmBridge jvm)
        {
            var    environmentVars    = new Hashtable(jvm);
            string assemblySearchPath = string.Join(",",
                                                    new[]
            {
                Environment.GetEnvironmentVariable(
                    AssemblySearchPathResolver.AssemblySearchPathsEnvVarName),
                SparkFiles.GetRootDirectory()
            }.Where(s => !string.IsNullOrWhiteSpace(s)));

            if (!string.IsNullOrEmpty(assemblySearchPath))
            {
                environmentVars.Put(
                    AssemblySearchPathResolver.AssemblySearchPathsEnvVarName,
                    assemblySearchPath);
            }
            // DOTNET_WORKER_SPARK_VERSION is used to handle different versions of Spark on the worker.
            environmentVars.Put("DOTNET_WORKER_SPARK_VERSION", SparkEnvironment.SparkVersion.ToString());

            return(environmentVars);
        }
 public void TestSparkFiles()
 {
     Assert.IsType <string>(SparkFiles.Get("people.json"));
     Assert.IsType <string>(SparkFiles.GetRootDirectory());
 }