Beispiel #1
0
            // parse a component of the expanded set.
            // At this point, no pattern may contain "/" in it
            // so we're going to return a 2d array, where each entry is the full
            // pattern, split on '/', and then turned into a regular expression.
            // A regexp is made at the end which joins each array with an
            // escaped /, and another full one which joins each regexp with |.
            //
            // Following the lead of Bash 4.1, note that "**" only has special meaning
            // when it is the *only* thing in a path portion.  Otherwise, any series
            // of * is equivalent to a single *.  Globstar behavior is enabled by
            // default, and can be disabled by setting options.noglobstar.
            private Tuple <ParseItem, bool> Parse(string pattern, bool isSub)
            {
                // shortcuts
                if (!options.NoGlobStar && pattern == "**")
                {
                    return(Tuple.Create(GlobStar.Instance, false));
                }
                if (pattern == "")
                {
                    return(Tuple.Create(ParseItem.Empty, false));
                }

                string re = "";
                bool   hasMagic = options.NoCase, escaping = false, inClass = false;
                // ? => one single character
                var  patternListStack = new Stack <PatternListEntry>();
                char plType;
                char?stateChar = null;

                int reClassStart = -1, classStart = -1;
                // . and .. never match anything that doesn't start with .,
                // even when options.dot is set.
                string patternStart = pattern[0] == '.' ? "" // anything
                                                             // not (start or / followed by . or .. followed by / or end)
                  : options.Dot ? "(?!(?:^|\\/)\\.{1,2}(?:$|\\/))"
                  : "(?!\\.)";

                Action clearStateChar = () =>
                {
                    if (stateChar != null)
                    {
                        // we had some state-tracking character
                        // that wasn't consumed by this pass.
                        switch (stateChar)
                        {
                        case '*':
                            re      += star;
                            hasMagic = true;
                            break;

                        case '?':
                            re      += qmark;
                            hasMagic = true;
                            break;

                        default:
                            re += "\\" + stateChar;
                            break;
                        }
                        stateChar = null;
                    }
                };

                for (var i = 0; i < pattern.Length; i++)
                {
                    var c = pattern[i];
                    //if (options.debug) {
                    //  console.error("%s\t%s %s %j", pattern, i, re, c)
                    //}

                    // skip over any that are escaped.
                    if (escaping && reSpecials.Contains(c))
                    {
                        re      += "\\" + c;
                        escaping = false;
                        continue;
                    }

                    switch (c)
                    {
                    case '/':
                        // completely not allowed, even escaped.
                        // Should already be path-split by now.
                        return(null);

                    case '\\':
                        clearStateChar();
                        escaping = true;
                        continue;

                    // the various stateChar values
                    // for the 'extglob' stuff.
                    case '?':
                    case '*':
                    case '+':
                    case '@':
                    case '!':
                        //if (options.debug) {
                        //  console.error("%s\t%s %s %j <-- stateChar", pattern, i, re, c)
                        //}

                        // all of those are literals inside a class, except that
                        // the glob [!a] means [^a] in regexp
                        if (inClass)
                        {
                            if (c == '!' && i == classStart + 1)
                            {
                                c = '^';
                            }
                            re += c;
                            continue;
                        }

                        // if we already have a stateChar, then it means
                        // that there was something like ** or +? in there.
                        // Handle the stateChar, then proceed with this one.
                        clearStateChar();
                        stateChar = c;
                        // if extglob is disabled, then +(asdf|foo) isn't a thing.
                        // just clear the statechar *now*, rather than even diving into
                        // the patternList stuff.
                        if (options.NoExt)
                        {
                            clearStateChar();
                        }
                        continue;

                    case '(':
                        if (inClass)
                        {
                            re += "(";
                            continue;
                        }

                        if (stateChar == null)
                        {
                            re += "\\(";
                            continue;
                        }

                        plType = stateChar.Value;
                        patternListStack.Push(new PatternListEntry {
                            Type = plType, Start = i - 1, ReStart = re.Length
                        });
                        // negation is (?:(?!js)[^/]*)
                        re       += stateChar == '!' ? "(?:(?!" : "(?:";
                        stateChar = null;
                        continue;

                    case ')':
                        if (inClass || !patternListStack.Any())
                        {
                            re += "\\)";
                            continue;
                        }

                        hasMagic = true;
                        re      += ')';
                        plType   = patternListStack.Pop().Type;
                        // negation is (?:(?!js)[^/]*)
                        // The others are (?:<pattern>)<type>
                        switch (plType)
                        {
                        case '!':
                            re += "[^/]*?)";
                            break;

                        case '?':
                        case '+':
                        case '*': re += plType; break;

                        case '@': break;         // the default anyway
                        }
                        continue;

                    case '|':
                        if (inClass || !patternListStack.Any() || escaping)
                        {
                            re      += "\\|";
                            escaping = false;
                            continue;
                        }

                        re += "|";
                        continue;

                    // these are mostly the same in regexp and glob
                    case '[':
                        // swallow any state-tracking char before the [
                        clearStateChar();

                        if (inClass)
                        {
                            re += "\\" + c;
                            continue;
                        }

                        inClass      = true;
                        classStart   = i;
                        reClassStart = re.Length;
                        re          += c;
                        continue;

                    case ']':
                        //  a right bracket shall lose its special
                        //  meaning and represent itself in
                        //  a bracket expression if it occurs
                        //  first in the list.  -- POSIX.2 2.8.3.2
                        if (i == classStart + 1 || !inClass)
                        {
                            re      += "\\" + c;
                            escaping = false;
                            continue;
                        }

                        // finish up the class.
                        hasMagic = true;
                        inClass  = false;
                        re      += c;
                        continue;

                    default:
                        // swallow any state char that wasn't consumed
                        clearStateChar();

                        if (escaping)
                        {
                            // no need
                            escaping = false;
                        }
                        else if (reSpecials.Contains(c) && !(c == '^' && inClass))
                        {
                            re += "\\";
                        }

                        re += c;
                        break;
                    } // switch
                }     // for


                // handle the case where we left a class open.
                // "[abc" is valid, equivalent to "\[abc"
                if (inClass)
                {
                    // split where the last [ was, and escape it
                    // this is a huge pita.  We now have to re-walk
                    // the contents of the would-be class to re-translate
                    // any characters that were passed through as-is
                    string cs = pattern.Substring(classStart + 1);
                    var    sp = this.Parse(cs, true);
                    re       = re.Substring(0, reClassStart) + "\\[" + sp.Item1.Source;
                    hasMagic = hasMagic || sp.Item2;
                }

                // handle the case where we had a +( thing at the *end*
                // of the pattern.
                // each pattern list stack adds 3 chars, and we need to go through
                // and escape any | chars that were passed through as-is for the regexp.
                // Go through and escape them, taking care not to double-escape any
                // | chars that were already escaped.
                while (patternListStack.Any())
                {
                    var pl   = patternListStack.Pop();
                    var tail = re.Substring(pl.ReStart + 3);
                    // maybe some even number of \, then maybe 1 \, followed by a |
                    tail = escapeCheck.Replace(tail, m =>
                    {
                        string escape = m.Groups[2].Value;
                        // the | isn't already escaped, so escape it.
                        if (String.IsNullOrEmpty(escape))
                        {
                            escape = "\\";
                        }

                        // need to escape all those slashes *again*, without escaping the
                        // one that we need for escaping the | character.  As it works out,
                        // escaping an even number of slashes can be done by simply repeating
                        // it exactly after itself.  That's why this trick works.
                        //
                        // I am sorry that you have to see this.
                        return(m.Groups[1].Value + m.Groups[1].Value + escape + "|");
                    });

                    // console.error("tail=%j\n   %s", tail, tail)
                    var t = pl.Type == '*' ? star
                          : pl.Type == '?' ? qmark
                          : "\\" + pl.Type;

                    hasMagic = true;
                    re       = re.Remove(pl.ReStart)
                               + t + "\\("
                               + tail;
                }

                // handle trailing things that only matter at the very end.
                clearStateChar();
                if (escaping)
                {
                    // trailing \\
                    re += "\\\\";
                }

                // only need to apply the nodot start if the re starts with
                // something that could conceivably capture a dot
                var addPatternStart = false;

                switch (re[0])
                {
                case '.':
                case '[':
                case '(': addPatternStart = true; break;
                }

                // if the re is not "" at this point, then we need to make sure
                // it doesn't match against an empty path part.
                // Otherwise a/* will match a/, which it should not.
                if (re != "" && hasMagic)
                {
                    re = "(?=.)" + re;
                }

                if (addPatternStart)
                {
                    re = patternStart + re;
                }

                // parsing just a piece of a larger pattern.
                if (isSub)
                {
                    return(Tuple.Create(ParseItem.Literal(re), hasMagic));
                }

                // skip the regexp for non-magical patterns
                // unescape anything in it, though, so that it'll be
                // an exact match against a file etc.
                if (!hasMagic)
                {
                    return(Tuple.Create(ParseItem.Literal(GlobUnescape(pattern)), false));
                }
                return(new Tuple <ParseItem, bool>(new MagicItem(re, options), false));
            }