// parse a component of the expanded set. // At this point, no pattern may contain "/" in it // so we're going to return a 2d array, where each entry is the full // pattern, split on '/', and then turned into a regular expression. // A regexp is made at the end which joins each array with an // escaped /, and another full one which joins each regexp with |. // // Following the lead of Bash 4.1, note that "**" only has special meaning // when it is the *only* thing in a path portion. Otherwise, any series // of * is equivalent to a single *. Globstar behavior is enabled by // default, and can be disabled by setting options.noglobstar. private Tuple <ParseItem, bool> Parse(string pattern, bool isSub) { // shortcuts if (!options.NoGlobStar && pattern == "**") { return(Tuple.Create(GlobStar.Instance, false)); } if (pattern == "") { return(Tuple.Create(ParseItem.Empty, false)); } string re = ""; bool hasMagic = options.NoCase, escaping = false, inClass = false; // ? => one single character var patternListStack = new Stack <PatternListEntry>(); char plType; char?stateChar = null; int reClassStart = -1, classStart = -1; // . and .. never match anything that doesn't start with ., // even when options.dot is set. string patternStart = pattern[0] == '.' ? "" // anything // not (start or / followed by . or .. followed by / or end) : options.Dot ? "(?!(?:^|\\/)\\.{1,2}(?:$|\\/))" : "(?!\\.)"; Action clearStateChar = () => { if (stateChar != null) { // we had some state-tracking character // that wasn't consumed by this pass. switch (stateChar) { case '*': re += star; hasMagic = true; break; case '?': re += qmark; hasMagic = true; break; default: re += "\\" + stateChar; break; } stateChar = null; } }; for (var i = 0; i < pattern.Length; i++) { var c = pattern[i]; //if (options.debug) { // console.error("%s\t%s %s %j", pattern, i, re, c) //} // skip over any that are escaped. if (escaping && reSpecials.Contains(c)) { re += "\\" + c; escaping = false; continue; } switch (c) { case '/': // completely not allowed, even escaped. // Should already be path-split by now. return(null); case '\\': clearStateChar(); escaping = true; continue; // the various stateChar values // for the 'extglob' stuff. case '?': case '*': case '+': case '@': case '!': //if (options.debug) { // console.error("%s\t%s %s %j <-- stateChar", pattern, i, re, c) //} // all of those are literals inside a class, except that // the glob [!a] means [^a] in regexp if (inClass) { if (c == '!' && i == classStart + 1) { c = '^'; } re += c; continue; } // if we already have a stateChar, then it means // that there was something like ** or +? in there. // Handle the stateChar, then proceed with this one. clearStateChar(); stateChar = c; // if extglob is disabled, then +(asdf|foo) isn't a thing. // just clear the statechar *now*, rather than even diving into // the patternList stuff. if (options.NoExt) { clearStateChar(); } continue; case '(': if (inClass) { re += "("; continue; } if (stateChar == null) { re += "\\("; continue; } plType = stateChar.Value; patternListStack.Push(new PatternListEntry { Type = plType, Start = i - 1, ReStart = re.Length }); // negation is (?:(?!js)[^/]*) re += stateChar == '!' ? "(?:(?!" : "(?:"; stateChar = null; continue; case ')': if (inClass || !patternListStack.Any()) { re += "\\)"; continue; } hasMagic = true; re += ')'; plType = patternListStack.Pop().Type; // negation is (?:(?!js)[^/]*) // The others are (?:<pattern>)<type> switch (plType) { case '!': re += "[^/]*?)"; break; case '?': case '+': case '*': re += plType; break; case '@': break; // the default anyway } continue; case '|': if (inClass || !patternListStack.Any() || escaping) { re += "\\|"; escaping = false; continue; } re += "|"; continue; // these are mostly the same in regexp and glob case '[': // swallow any state-tracking char before the [ clearStateChar(); if (inClass) { re += "\\" + c; continue; } inClass = true; classStart = i; reClassStart = re.Length; re += c; continue; case ']': // a right bracket shall lose its special // meaning and represent itself in // a bracket expression if it occurs // first in the list. -- POSIX.2 2.8.3.2 if (i == classStart + 1 || !inClass) { re += "\\" + c; escaping = false; continue; } // finish up the class. hasMagic = true; inClass = false; re += c; continue; default: // swallow any state char that wasn't consumed clearStateChar(); if (escaping) { // no need escaping = false; } else if (reSpecials.Contains(c) && !(c == '^' && inClass)) { re += "\\"; } re += c; break; } // switch } // for // handle the case where we left a class open. // "[abc" is valid, equivalent to "\[abc" if (inClass) { // split where the last [ was, and escape it // this is a huge pita. We now have to re-walk // the contents of the would-be class to re-translate // any characters that were passed through as-is string cs = pattern.Substring(classStart + 1); var sp = this.Parse(cs, true); re = re.Substring(0, reClassStart) + "\\[" + sp.Item1.Source; hasMagic = hasMagic || sp.Item2; } // handle the case where we had a +( thing at the *end* // of the pattern. // each pattern list stack adds 3 chars, and we need to go through // and escape any | chars that were passed through as-is for the regexp. // Go through and escape them, taking care not to double-escape any // | chars that were already escaped. while (patternListStack.Any()) { var pl = patternListStack.Pop(); var tail = re.Substring(pl.ReStart + 3); // maybe some even number of \, then maybe 1 \, followed by a | tail = escapeCheck.Replace(tail, m => { string escape = m.Groups[2].Value; // the | isn't already escaped, so escape it. if (String.IsNullOrEmpty(escape)) { escape = "\\"; } // need to escape all those slashes *again*, without escaping the // one that we need for escaping the | character. As it works out, // escaping an even number of slashes can be done by simply repeating // it exactly after itself. That's why this trick works. // // I am sorry that you have to see this. return(m.Groups[1].Value + m.Groups[1].Value + escape + "|"); }); // console.error("tail=%j\n %s", tail, tail) var t = pl.Type == '*' ? star : pl.Type == '?' ? qmark : "\\" + pl.Type; hasMagic = true; re = re.Remove(pl.ReStart) + t + "\\(" + tail; } // handle trailing things that only matter at the very end. clearStateChar(); if (escaping) { // trailing \\ re += "\\\\"; } // only need to apply the nodot start if the re starts with // something that could conceivably capture a dot var addPatternStart = false; switch (re[0]) { case '.': case '[': case '(': addPatternStart = true; break; } // if the re is not "" at this point, then we need to make sure // it doesn't match against an empty path part. // Otherwise a/* will match a/, which it should not. if (re != "" && hasMagic) { re = "(?=.)" + re; } if (addPatternStart) { re = patternStart + re; } // parsing just a piece of a larger pattern. if (isSub) { return(Tuple.Create(ParseItem.Literal(re), hasMagic)); } // skip the regexp for non-magical patterns // unescape anything in it, though, so that it'll be // an exact match against a file etc. if (!hasMagic) { return(Tuple.Create(ParseItem.Literal(GlobUnescape(pattern)), false)); } return(new Tuple <ParseItem, bool>(new MagicItem(re, options), false)); }