/// <summary>List input directories.</summary> /// <remarks> /// List input directories. /// Subclasses may override to, e.g., select only files matching a regular /// expression. /// </remarks> /// <param name="job">the job to list input paths for</param> /// <returns>array of FileStatus objects</returns> /// <exception cref="System.IO.IOException">if zero items.</exception> protected internal virtual FileStatus[] ListStatus(JobConf job) { Path[] dirs = GetInputPaths(job); if (dirs.Length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), dirs, job); // Whether we need to recursive look into the directory structure bool recursive = job.GetBoolean(InputDirRecursive, false); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). IList <PathFilter> filters = new AList <PathFilter>(); filters.AddItem(hiddenFileFilter); PathFilter jobFilter = GetInputPathFilter(job); if (jobFilter != null) { filters.AddItem(jobFilter); } PathFilter inputFilter = new FileInputFormat.MultiPathFilter(filters); FileStatus[] result; int numThreads = job.GetInt(FileInputFormat.ListStatusNumThreads, FileInputFormat .DefaultListStatusNumThreads); StopWatch sw = new StopWatch().Start(); if (numThreads == 1) { IList <FileStatus> locatedFiles = SingleThreadedListStatus(job, dirs, inputFilter, recursive); result = Sharpen.Collections.ToArray(locatedFiles, new FileStatus[locatedFiles.Count ]); } else { IEnumerable <FileStatus> locatedFiles = null; try { LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher( job, dirs, recursive, inputFilter, false); locatedFiles = locatedFileStatusFetcher.GetFileStatuses(); } catch (Exception) { throw new IOException("Interrupted while getting file statuses"); } result = Iterables.ToArray <FileStatus>(locatedFiles); } sw.Stop(); if (Log.IsDebugEnabled()) { Log.Debug("Time taken to get FileStatuses: " + sw.Now(TimeUnit.Milliseconds)); } Log.Info("Total input paths to process : " + result.Length); return(result); }
internal ProcessInitialInputPathCallback(LocatedFileStatusFetcher _enclosing) { this._enclosing = _enclosing; }