コード例 #1
0
ファイル: parser.cs プロジェクト: brass9/RegexStringBuilder
        private void ResolveReferences()
        {
            int       gid  = 1;
            Hashtable dict = new Hashtable();
            ArrayList explicit_numeric_groups = null;

            // number unnamed groups

            foreach (CapturingGroup group in caps)
            {
                if (group.Name != null)
                {
                    continue;
                }

                dict.Add(gid.ToString(), group);
                group.Index = gid++;
                ++num_groups;
            }

            // number named groups

            foreach (CapturingGroup group in caps)
            {
                if (group.Name == null)
                {
                    continue;
                }

                if (dict.Contains(group.Name))
                {
                    CapturingGroup prev = (CapturingGroup)dict [group.Name];
                    group.Index = prev.Index;

                    if (group.Index == gid)
                    {
                        gid++;
                    }
                    else if (group.Index > gid)
                    {
                        explicit_numeric_groups.Add(group);
                    }
                    continue;
                }

                if (Char.IsDigit(group.Name [0]))
                {
                    int ptr       = 0;
                    int group_gid = ParseDecimal(group.Name, ref ptr);
                    if (ptr == group.Name.Length)
                    {
                        group.Index = group_gid;
                        dict.Add(group.Name, group);
                        ++num_groups;

                        if (group_gid == gid)
                        {
                            gid++;
                        }
                        else
                        {
                            // all numbers before 'gid' are already in the dictionary.  So, we know group_gid > gid
                            if (explicit_numeric_groups == null)
                            {
                                explicit_numeric_groups = new ArrayList(4);
                            }
                            explicit_numeric_groups.Add(group);
                        }

                        continue;
                    }
                }

                string gid_s = gid.ToString();
                while (dict.Contains(gid_s))
                {
                    gid_s = (++gid).ToString();
                }

                dict.Add(gid_s, group);
                dict.Add(group.Name, group);
                group.Index = gid++;
                ++num_groups;
            }

            gap = gid;             // == 1 + num_groups, if explicit_numeric_groups == null

            if (explicit_numeric_groups != null)
            {
                HandleExplicitNumericGroups(explicit_numeric_groups);
            }

            // resolve references

            foreach (Expression expr in refs.Keys)
            {
                string name = (string)refs [expr];
                if (!dict.Contains(name))
                {
                    if (expr is CaptureAssertion && !Char.IsDigit(name [0]))
                    {
                        continue;
                    }
                    BackslashNumber bn = expr as BackslashNumber;
                    if (bn != null && bn.ResolveReference(name, dict))
                    {
                        continue;
                    }
                    throw NewParseException("Reference to undefined group " +
                                            (Char.IsDigit(name[0]) ? "number " : "name ") +
                                            name);
                }

                CapturingGroup group = (CapturingGroup)dict[name];
                if (expr is Reference)
                {
                    ((Reference)expr).CapturingGroup = group;
                }
                else if (expr is CaptureAssertion)
                {
                    ((CaptureAssertion)expr).CapturingGroup = group;
                }
                else if (expr is BalancingGroup)
                {
                    ((BalancingGroup)expr).Balance = group;
                }
            }
        }
コード例 #2
0
ファイル: parser.cs プロジェクト: brass9/RegexStringBuilder
        private Expression ParseSpecial(RegexOptions options)
        {
            int        p    = ptr;
            bool       ecma = IsECMAScript(options);
            Expression expr = null;

            switch (pattern[ptr++])
            {
            // categories

            case 'd':
                expr = new CharacterClass(ecma ? Category.EcmaDigit : Category.Digit, false);
                break;

            case 'w':
                expr = new CharacterClass(ecma ? Category.EcmaWord : Category.Word, false);
                break;

            case 's':
                expr = new CharacterClass(ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, false);
                break;

            case 'p':
                // this is odd - ECMAScript isn't supposed to support Unicode,
                // yet \p{..} compiles and runs under the MS implementation
                // identically to canonical mode. That's why I'm ignoring the
                // value of ecma here.

                expr = new CharacterClass(ParseUnicodeCategory(), false);
                break;

            case 'D':
                expr = new CharacterClass(ecma ? Category.EcmaDigit : Category.Digit, true);
                break;

            case 'W':
                expr = new CharacterClass(ecma ? Category.EcmaWord : Category.Word, true);
                break;

            case 'S':
                expr = new CharacterClass(ecma ? Category.EcmaWhiteSpace : Category.WhiteSpace, true);
                break;

            case 'P':
                expr = new CharacterClass(ParseUnicodeCategory(), true);
                break;

            // positions

            case 'A': expr = new PositionAssertion(Position.StartOfString); break;

            case 'Z': expr = new PositionAssertion(Position.End); break;

            case 'z': expr = new PositionAssertion(Position.EndOfString); break;

            case 'G': expr = new PositionAssertion(Position.StartOfScan); break;

            case 'b': expr = new PositionAssertion(Position.Boundary); break;

            case 'B': expr = new PositionAssertion(Position.NonBoundary); break;

            // references

            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9': {
                ptr--;
                int n = ParseNumber(10, 1, 0);
                if (n < 0)
                {
                    ptr = p;
                    return(null);
                }

                // FIXME test if number is within number of assigned groups
                // this may present a problem for right-to-left matching

                Reference reference = new BackslashNumber(IsIgnoreCase(options), ecma);
                refs.Add(reference, n.ToString());
                expr = reference;
                break;
            }

            case 'k': {
                char delim = pattern[ptr++];
                if (delim == '<')
                {
                    delim = '>';
                }
                else if (delim != '\'')
                {
                    throw NewParseException("Malformed \\k<...> named backreference.");
                }

                string name = ParseName();
                if (name == null || pattern[ptr] != delim)
                {
                    throw NewParseException("Malformed \\k<...> named backreference.");
                }

                ++ptr;
                Reference reference = new Reference(IsIgnoreCase(options));
                refs.Add(reference, name);
                expr = reference;
                break;
            }

            default:
                expr = null;
                break;
            }

            if (expr == null)
            {
                ptr = p;
            }

            return(expr);
        }