/// <exception cref="System.IO.IOException"/> private void CheckExpansion(string filePattern, params string[] expectedExpansions ) { IList <string> actualExpansions = GlobExpander.Expand(filePattern); Assert.Equal("Different number of expansions", expectedExpansions .Length, actualExpansions.Count); for (int i = 0; i < expectedExpansions.Length; i++) { Assert.Equal("Expansion of " + filePattern, expectedExpansions [i], actualExpansions[i]); } }
/// <exception cref="System.IO.IOException"/> public virtual FileStatus[] Glob() { // First we get the scheme and authority of the pattern that was passed // in. string scheme = SchemeFromPath(pathPattern); string authority = AuthorityFromPath(pathPattern); // Next we strip off everything except the pathname itself, and expand all // globs. Expansion is a process which turns "grouping" clauses, // expressed as brackets, into separate path patterns. string pathPatternString = pathPattern.ToUri().GetPath(); IList <string> flattenedPatterns = GlobExpander.Expand(pathPatternString); // Now loop over all flattened patterns. In every case, we'll be trying to // match them to entries in the filesystem. AList <FileStatus> results = new AList <FileStatus>(flattenedPatterns.Count); bool sawWildcard = false; foreach (string flatPattern in flattenedPatterns) { // Get the absolute path for this flattened pattern. We couldn't do // this prior to flattening because of patterns like {/,a}, where which // path you go down influences how the path must be made absolute. Path absPattern = FixRelativePart(new Path(flatPattern.IsEmpty() ? Path.CurDir : flatPattern)); // Now we break the flattened, absolute pattern into path components. // For example, /a/*/c would be broken into the list [a, *, c] IList <string> components = GetPathComponents(absPattern.ToUri().GetPath()); // Starting out at the root of the filesystem, we try to match // filesystem entries against pattern components. AList <FileStatus> candidates = new AList <FileStatus>(1); // To get the "real" FileStatus of root, we'd have to do an expensive // RPC to the NameNode. So we create a placeholder FileStatus which has // the correct path, but defaults for the rest of the information. // Later, if it turns out we actually want the FileStatus of root, we'll // replace the placeholder with a real FileStatus obtained from the // NameNode. FileStatus rootPlaceholder; if (Path.Windows && !components.IsEmpty() && Path.IsWindowsAbsolutePath(absPattern .ToUri().GetPath(), true)) { // On Windows the path could begin with a drive letter, e.g. /E:/foo. // We will skip matching the drive letter and start from listing the // root of the filesystem on that drive. string driveLetter = components.Remove(0); rootPlaceholder = new FileStatus(0, true, 0, 0, 0, new Path(scheme, authority, Path .Separator + driveLetter + Path.Separator)); } else { rootPlaceholder = new FileStatus(0, true, 0, 0, 0, new Path(scheme, authority, Path .Separator)); } candidates.AddItem(rootPlaceholder); for (int componentIdx = 0; componentIdx < components.Count; componentIdx++) { AList <FileStatus> newCandidates = new AList <FileStatus>(candidates.Count); GlobFilter globFilter = new GlobFilter(components[componentIdx]); string component = UnescapePathComponent(components[componentIdx]); if (globFilter.HasPattern()) { sawWildcard = true; } if (candidates.IsEmpty() && sawWildcard) { // Optimization: if there are no more candidates left, stop examining // the path components. We can only do this if we've already seen // a wildcard component-- otherwise, we still need to visit all path // components in case one of them is a wildcard. break; } if ((componentIdx < components.Count - 1) && (!globFilter.HasPattern())) { // Optimization: if this is not the terminal path component, and we // are not matching against a glob, assume that it exists. If it // doesn't exist, we'll find out later when resolving a later glob // or the terminal path component. foreach (FileStatus candidate in candidates) { candidate.SetPath(new Path(candidate.GetPath(), component)); } continue; } foreach (FileStatus candidate_1 in candidates) { if (globFilter.HasPattern()) { FileStatus[] children = ListStatus(candidate_1.GetPath()); if (children.Length == 1) { // If we get back only one result, this could be either a listing // of a directory with one entry, or it could reflect the fact // that what we listed resolved to a file. // // Unfortunately, we can't just compare the returned paths to // figure this out. Consider the case where you have /a/b, where // b is a symlink to "..". In that case, listing /a/b will give // back "/a/b" again. If we just went by returned pathname, we'd // incorrectly conclude that /a/b was a file and should not match // /a/*/*. So we use getFileStatus of the path we just listed to // disambiguate. if (!GetFileStatus(candidate_1.GetPath()).IsDirectory()) { continue; } } foreach (FileStatus child in children) { if (componentIdx < components.Count - 1) { // Don't try to recurse into non-directories. See HADOOP-10957. if (!child.IsDirectory()) { continue; } } // Set the child path based on the parent path. child.SetPath(new Path(candidate_1.GetPath(), child.GetPath().GetName())); if (globFilter.Accept(child.GetPath())) { newCandidates.AddItem(child); } } } else { // When dealing with non-glob components, use getFileStatus // instead of listStatus. This is an optimization, but it also // is necessary for correctness in HDFS, since there are some // special HDFS directories like .reserved and .snapshot that are // not visible to listStatus, but which do exist. (See HADOOP-9877) FileStatus childStatus = GetFileStatus(new Path(candidate_1.GetPath(), component) ); if (childStatus != null) { newCandidates.AddItem(childStatus); } } } candidates = newCandidates; } foreach (FileStatus status in candidates) { // Use object equality to see if this status is the root placeholder. // See the explanation for rootPlaceholder above for more information. if (status == rootPlaceholder) { status = GetFileStatus(rootPlaceholder.GetPath()); if (status == null) { continue; } } // HADOOP-3497 semantics: the user-defined filter is applied at the // end, once the full path is built up. if (filter.Accept(status.GetPath())) { results.AddItem(status); } } } /* * When the input pattern "looks" like just a simple filename, and we * can't find it, we return null rather than an empty array. * This is a special case which the shell relies on. * * To be more precise: if there were no results, AND there were no * groupings (aka brackets), and no wildcards in the input (aka stars), * we return null. */ if ((!sawWildcard) && results.IsEmpty() && (flattenedPatterns.Count <= 1)) { return(null); } return(Collections.ToArray(results, new FileStatus[0])); }