public void RegexStateMachine()
        {
            // Initialize unicode-related classes to avoid having their cost in the time computation
            var sw       = Stopwatch.StartNew();
            var provider = new UnicodeCharSetProvider();

            provider.GetClassSet(CharSetClass.Digit);
            provider.GetClassSet(CharSetClass.Dot);
            provider.GetClassSet(CharSetClass.Space);
            provider.GetClassSet(CharSetClass.Word);
            UnicodeRanges.FromUnicodeName("InCombining_Diacritical_Marks");
            sw.Stop();
            this.output.WriteLine("Unicode init ms: " + sw.ElapsedMilliseconds);
            sw = Stopwatch.StartNew();
            RegexLexer.CreateStateMachine(out var stateMachine, out var startStateId);
            var lapBuild   = sw.ElapsedTicks;
            var sm         = stateMachine.Compile();
            var lapCompile = sw.ElapsedTicks;
            var startState = new Id <DfaState <LetterId> >(startStateId);

            sm(ref startState, ' ');
            var lapJit = sw.ElapsedTicks;

            sm(ref startState, ' ');
            sw.Stop();
            this.output.WriteLine("Build ticks: " + lapBuild);
            this.output.WriteLine("Compile ticks: " + (lapCompile - lapBuild));
            this.output.WriteLine("JIT ticks: " + (lapJit - lapCompile));
            this.output.WriteLine("Exec ticks: " + (sw.ElapsedTicks - lapJit));
            this.output.WriteLine("Total ms: " + sw.ElapsedMilliseconds);
            this.output.WriteLine(stateMachine.ToReadableString());
        }
Ejemplo n.º 2
0
        public void CharacterInSet(string ch, bool inSet, string unicodeName)
        {
            Codepoint parsedChar;

            if (ch.StartsWith("\\"))
            {
                Assert.True(((RangeSetHandle.Static)RegexMatchSet.ParseEscape(ch)).TryGetSingle(out parsedChar));
            }
            else
            {
                parsedChar = ch.Single();
            }
            Assert.Equal(inSet, UnicodeRanges.FromUnicodeName(unicodeName).Contains(parsedChar));
        }
        public static RangeSetHandle ParseEscape(string escape)
        {
            var match = rxEscape.Match(escape);

            if (!match.Success)
            {
                throw new ArgumentException("Escape is invalid", "escape");
            }
            if (match.Groups["name"].Success)
            {
                return(new RangeSetHandle.Static(UnicodeRanges.FromUnicodeName(match.Groups["name"].Value), match.Groups["c"].Value == "P"));
            }
            if (match.Groups["hex"].Success)
            {
                return(new RangeSetHandle.Static(Codepoint.Parse(match.Groups["hex"].Value)));
            }
            var c = match.Groups["c"].Value[0];

            switch (c)
            {
            case '0':
                return(new RangeSetHandle.Static('\0'));

            case 'r':
                return(new RangeSetHandle.Static('\r'));

            case 'n':
                return(new RangeSetHandle.Static('\n'));

            case 't':
                return(new RangeSetHandle.Static('\t'));

            case 'a':
                return(new RangeSetHandle.Static('\x07'));

            case 'e':
                return(new RangeSetHandle.Static('\x1B'));

            case 'f':
                return(new RangeSetHandle.Static('\x0C'));

            case 'v':
                return(new RangeSetHandle.Static('\x0B'));

            case 'd':
                return(new RangeSetHandle.Class(CharSetClass.Digit, false));

            case 'D':
                return(new RangeSetHandle.Class(CharSetClass.Digit, true));

            case 'w':
                return(new RangeSetHandle.Class(CharSetClass.Word, false));

            case 'W':
                return(new RangeSetHandle.Class(CharSetClass.Word, true));

            case 's':
                return(new RangeSetHandle.Class(CharSetClass.Space, false));

            case 'S':
                return(new RangeSetHandle.Class(CharSetClass.Space, true));

            default:
                if (char.IsLetterOrDigit(c))
                {
                    throw new ArgumentOutOfRangeException(nameof(escape), "Invalid escape character " + c);
                }
                return(new RangeSetHandle.Static(c));
            }
        }
 public static RegexMatchSet FromUnicode(string name, bool negate = false)
 {
     return(new RegexMatchSet($@"\{(negate ? 'P' : 'p')}{{{name}}}", new RangeSetHandle.Static(UnicodeRanges.FromUnicodeName(name), negate)));
 }