/// <summary> /// Sweeps and processes the shared cache area to clean up stale and orphaned /// files. /// </summary> internal virtual void Process() { // mark the beginning of the run in the metrics metrics.ReportCleaningStart(); try { // now traverse individual directories and process them // the directory structure is specified by the nested level parameter // (e.g. 9/c/d/<checksum>) string pattern = SharedCacheUtil.GetCacheEntryGlobPattern(nestedLevel); FileStatus[] resources = fs.GlobStatus(new Path(root, pattern)); int numResources = resources == null ? 0 : resources.Length; Log.Info("Processing " + numResources + " resources in the shared cache"); long beginMs = Runtime.CurrentTimeMillis(); if (resources != null) { foreach (FileStatus resource in resources) { // check for interruption so it can abort in a timely manner in case // of shutdown if (Sharpen.Thread.CurrentThread().IsInterrupted()) { Log.Warn("The cleaner task was interrupted. Aborting."); break; } if (resource.IsDirectory()) { ProcessSingleResource(resource); } else { Log.Warn("Invalid file at path " + resource.GetPath().ToString() + " when a directory was expected" ); } // add sleep time between cleaning each directory if it is non-zero if (sleepTime > 0) { Sharpen.Thread.Sleep(sleepTime); } } } long endMs = Runtime.CurrentTimeMillis(); long durationMs = endMs - beginMs; Log.Info("Processed " + numResources + " resource(s) in " + durationMs + " ms."); } catch (IOException e1) { Log.Error("Unable to complete the cleaner task", e1); } catch (Exception) { Sharpen.Thread.CurrentThread().Interrupt(); } }
internal virtual IDictionary <string, string> GetInitialCachedResources(FileSystem fs, Configuration conf) { // get the root directory for the shared cache string location = conf.Get(YarnConfiguration.SharedCacheRoot, YarnConfiguration.DefaultSharedCacheRoot ); Path root = new Path(location); if (!fs.Exists(root)) { string message = "The shared cache root directory " + location + " was not found"; Log.Error(message); throw new IOException(message); } int nestedLevel = SharedCacheUtil.GetCacheDepth(conf); // now traverse individual directories and process them // the directory structure is specified by the nested level parameter // (e.g. 9/c/d/<checksum>/file) string pattern = SharedCacheUtil.GetCacheEntryGlobPattern(nestedLevel + 1); Log.Info("Querying for all individual cached resource files"); FileStatus[] entries = fs.GlobStatus(new Path(root, pattern)); int numEntries = entries == null ? 0 : entries.Length; Log.Info("Found " + numEntries + " files: processing for one resource per " + "key" ); IDictionary <string, string> initialCachedEntries = new Dictionary <string, string> (); if (entries != null) { foreach (FileStatus entry in entries) { Path file = entry.GetPath(); string fileName = file.GetName(); if (entry.IsFile()) { // get the parent to get the checksum Path parent = file.GetParent(); if (parent != null) { // the name of the immediate parent directory is the checksum string key = parent.GetName(); // make sure we insert only one file per checksum whichever comes // first if (initialCachedEntries.Contains(key)) { Log.Warn("Key " + key + " is already mapped to file " + initialCachedEntries[key] + "; file " + fileName + " will not be added"); } else { initialCachedEntries[key] = fileName; } } } } } Log.Info("A total of " + initialCachedEntries.Count + " files are now mapped"); return(initialCachedEntries); }