Exemplo n.º 1
0
        /// <summary>List input directories.</summary>
        /// <remarks>
        /// List input directories.
        /// Subclasses may override to, e.g., select only files matching a regular
        /// expression.
        /// </remarks>
        /// <param name="job">the job to list input paths for</param>
        /// <returns>array of FileStatus objects</returns>
        /// <exception cref="System.IO.IOException">if zero items.</exception>
        protected internal virtual FileStatus[] ListStatus(JobConf job)
        {
            Path[] dirs = GetInputPaths(job);
            if (dirs.Length == 0)
            {
                throw new IOException("No input paths specified in job");
            }
            // get tokens for all the required FileSystems..
            TokenCache.ObtainTokensForNamenodes(job.GetCredentials(), dirs, job);
            // Whether we need to recursive look into the directory structure
            bool recursive = job.GetBoolean(InputDirRecursive, false);
            // creates a MultiPathFilter with the hiddenFileFilter and the
            // user provided one (if any).
            IList <PathFilter> filters = new AList <PathFilter>();

            filters.AddItem(hiddenFileFilter);
            PathFilter jobFilter = GetInputPathFilter(job);

            if (jobFilter != null)
            {
                filters.AddItem(jobFilter);
            }
            PathFilter inputFilter = new FileInputFormat.MultiPathFilter(filters);

            FileStatus[] result;
            int          numThreads = job.GetInt(FileInputFormat.ListStatusNumThreads, FileInputFormat
                                                 .DefaultListStatusNumThreads);
            StopWatch sw = new StopWatch().Start();

            if (numThreads == 1)
            {
                IList <FileStatus> locatedFiles = SingleThreadedListStatus(job, dirs, inputFilter,
                                                                           recursive);
                result = Sharpen.Collections.ToArray(locatedFiles, new FileStatus[locatedFiles.Count
                                                     ]);
            }
            else
            {
                IEnumerable <FileStatus> locatedFiles = null;
                try
                {
                    LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
                        job, dirs, recursive, inputFilter, false);
                    locatedFiles = locatedFileStatusFetcher.GetFileStatuses();
                }
                catch (Exception)
                {
                    throw new IOException("Interrupted while getting file statuses");
                }
                result = Iterables.ToArray <FileStatus>(locatedFiles);
            }
            sw.Stop();
            if (Log.IsDebugEnabled())
            {
                Log.Debug("Time taken to get FileStatuses: " + sw.Now(TimeUnit.Milliseconds));
            }
            Log.Info("Total input paths to process : " + result.Length);
            return(result);
        }
 internal ProcessInitialInputPathCallback(LocatedFileStatusFetcher _enclosing)
 {
     this._enclosing = _enclosing;
 }