/// <summary> /// Returns the paths to search when loading assemblies in the following order of /// precedence: /// 1) Comma-separated paths specified in DOTNET_ASSEMBLY_SEARCH_PATHS environment /// variable. Note that if a path starts with ".", the working directory will be prepended. /// 2) The path of the files added through /// <see cref="SparkContext.AddFile(string, bool)"/>. /// 3) The working directory. /// 4) The directory of the application. /// </summary> /// <remarks> /// The reason that the working directory has higher precedence than the directory /// of the application is for cases when spark is launched on YARN. The executors are run /// inside 'containers' and files that are passed via 'spark-submit --files' will be pushed /// to these 'containers'. This path is the working directory and the 1st probing path that /// will be checked. /// </remarks> /// <returns>Assembly search paths</returns> internal static string[] GetAssemblySearchPaths() { var searchPaths = new List <string>(); string searchPathsStr = Environment.GetEnvironmentVariable(AssemblySearchPathsEnvVarName); if (!string.IsNullOrEmpty(searchPathsStr)) { foreach (string searchPath in searchPathsStr.Split(',')) { string trimmedSearchPath = searchPath.Trim(); if (trimmedSearchPath.StartsWith(".")) { searchPaths.Add( Path.Combine(Directory.GetCurrentDirectory(), trimmedSearchPath)); } else { searchPaths.Add(trimmedSearchPath); } } } string sparkFilesPath = SparkFiles.GetRootDirectory(); if (!string.IsNullOrWhiteSpace(sparkFilesPath)) { searchPaths.Add(sparkFilesPath); } searchPaths.Add(Directory.GetCurrentDirectory()); searchPaths.Add(AppDomain.CurrentDomain.BaseDirectory); return(searchPaths.ToArray()); }
public void TestAssemblySearchPathResolver() { string sparkFilesDir = SparkFiles.GetRootDirectory(); string curDir = Directory.GetCurrentDirectory(); string appDir = AppDomain.CurrentDomain.BaseDirectory; // Test the default scenario. string[] searchPaths = AssemblySearchPathResolver.GetAssemblySearchPaths(); Assert.Equal(new[] { sparkFilesDir, curDir, appDir }, searchPaths); // Test the case where DOTNET_ASSEMBLY_SEARCH_PATHS is defined. char sep = Path.PathSeparator; Environment.SetEnvironmentVariable( AssemblySearchPathResolver.AssemblySearchPathsEnvVarName, $"mydir1, mydir2, .{sep}mydir3,.{sep}mydir4"); searchPaths = AssemblySearchPathResolver.GetAssemblySearchPaths(); Assert.Equal( new[] { "mydir1", "mydir2", Path.Combine(curDir, $".{sep}mydir3"), Path.Combine(curDir, $".{sep}mydir4"), sparkFilesDir, curDir, appDir }, searchPaths); Environment.SetEnvironmentVariable( AssemblySearchPathResolver.AssemblySearchPathsEnvVarName, null); }
/// <summary> /// In a dotnet-interactive REPL session (driver), nuget dependencies will be /// systematically added using <see cref="SparkContext.AddFile(string, bool)"/>. /// /// These files include: /// - "{packagename}.{version}.nupkg" /// The nuget packages /// - <see cref="DependencyProviderUtils.CreateFileName(long)"/> /// Serialized <see cref="DependencyProviderUtils.Metadata"/> object. /// /// On the Worker, in order to resolve the nuget dependencies referenced by /// the dotnet-interactive session, we instantiate a /// <see cref="DotNet.DependencyManager.DependencyProvider"/>. /// This provider will register an event handler to the Assembly Load Resolving event. /// By using <see cref="SparkFiles.GetRootDirectory"/>, we can access the /// required files added to the <see cref="SparkContext"/>. /// </summary> internal static void RegisterAssemblyHandler() { if (!s_runningREPL) { return; } string sparkFilesPath = SparkFiles.GetRootDirectory(); string[] metadataFiles = DependencyProviderUtils.GetMetadataFiles(sparkFilesPath); foreach (string metdatafile in metadataFiles) { // The execution of the delegate passed to GetOrAdd is not guaranteed to run once. // Multiple Lazy objects may be created, but only one of them will be added to the // ConcurrentDictionary. The Lazy value is retrieved to materialize the // DependencyProvider object if it hasn't already been created. Lazy <DependencyProvider> dependecyProvider = s_dependencyProviders.GetOrAdd( metdatafile, mdf => new Lazy <DependencyProvider>( () => { s_logger.LogInfo($"Creating {nameof(DependencyProvider)} using {mdf}"); return(new DependencyProvider( mdf, sparkFilesPath, Directory.GetCurrentDirectory())); })); _ = dependecyProvider.Value; } }
/// <summary> /// Processes the given stream to construct a Payload object. /// </summary> /// <param name="stream">The stream to read from</param> /// <returns> /// Returns a valid payload object if the stream contains all the necessary data. /// Returns null if the stream is already closed at the beginning of the read. /// </returns> internal Payload Process(Stream stream) { var payload = new Payload(); byte[] splitIndexBytes; try { splitIndexBytes = SerDe.ReadBytes(stream, sizeof(int)); // For socket stream, read on the stream returns 0, which // SerDe.ReadBytes() returns as null to denote the stream is closed. if (splitIndexBytes == null) { return(null); } } catch (ObjectDisposedException) { // For stream implementation such as MemoryStream will throw // ObjectDisposedException if the stream is already closed. return(null); } payload.SplitIndex = BinaryPrimitives.ReadInt32BigEndian(splitIndexBytes); payload.Version = SerDe.ReadString(stream); payload.TaskContext = new TaskContextProcessor(_version).Process(stream); TaskContextHolder.Set(payload.TaskContext); payload.SparkFilesDir = SerDe.ReadString(stream); SparkFiles.SetRootDirectory(payload.SparkFilesDir); // Register additional assembly handlers after SparkFilesDir has been set // and before any deserialization occurs. BroadcastVariableProcessor may // deserialize objects from assemblies that are not currently loaded within // our current context. AssemblyLoaderHelper.RegisterAssemblyHandler(); if (ConfigurationService.IsDatabricks) { SerDe.ReadString(stream); SerDe.ReadString(stream); } payload.IncludeItems = ReadIncludeItems(stream); payload.BroadcastVariables = new BroadcastVariableProcessor(_version).Process(stream); // TODO: Accumulate registration should be done here. payload.Command = new CommandProcessor(_version).Process(stream); return(payload); }
private static IJvmObjectReferenceProvider CreateEnvVarsForPythonFunction(IJvmBridge jvm) { var environmentVars = new Hashtable(jvm); string assemblySearchPath = string.Join(",", new[] { Environment.GetEnvironmentVariable( AssemblySearchPathResolver.AssemblySearchPathsEnvVarName), SparkFiles.GetRootDirectory() }.Where(s => !string.IsNullOrWhiteSpace(s))); if (!string.IsNullOrEmpty(assemblySearchPath)) { environmentVars.Put( AssemblySearchPathResolver.AssemblySearchPathsEnvVarName, assemblySearchPath); } return(environmentVars); }
private static IJvmObjectReferenceProvider CreateEnvVarsForPythonFunction(IJvmBridge jvm) { var environmentVars = new Hashtable(jvm); string assemblySearchPath = string.Join(",", new[] { Environment.GetEnvironmentVariable( AssemblySearchPathResolver.AssemblySearchPathsEnvVarName), SparkFiles.GetRootDirectory() }.Where(s => !string.IsNullOrWhiteSpace(s))); if (!string.IsNullOrEmpty(assemblySearchPath)) { environmentVars.Put( AssemblySearchPathResolver.AssemblySearchPathsEnvVarName, assemblySearchPath); } // DOTNET_WORKER_SPARK_VERSION is used to handle different versions of Spark on the worker. environmentVars.Put("DOTNET_WORKER_SPARK_VERSION", SparkEnvironment.SparkVersion.ToString()); return(environmentVars); }
public void TestSparkFiles() { Assert.IsType <string>(SparkFiles.Get("people.json")); Assert.IsType <string>(SparkFiles.GetRootDirectory()); }