Exemple #1
0
        static void _EnqueueFiber(ref int lcount, ref _Fiber[] l, _Fiber t, int sp)
        {
            // really shouldn't happen, but maybe it might
            if (l.Length <= lcount)
            {
                var newarr = new _Fiber[l.Length * 2];
                Array.Copy(l, 0, newarr, 0, l.Length);
                l = newarr;
            }
            l[lcount] = t;
            ++lcount;

            var pc = t.Program[t.Index];

            switch (pc[0])
            {
            case Compiler.Jmp:
                for (var j = 1; j < pc.Length; j++)
                {
                    _EnqueueFiber(ref lcount, ref l, new _Fiber(t.Program, pc[j], t.Saved), sp);
                }
                break;

            case Compiler.Save:
                var slot  = pc[1];
                var max   = slot > t.Saved.Length ? slot : t.Saved.Length;
                var saved = new int[max];
                for (var i = 0; i < t.Saved.Length; ++i)
                {
                    saved[i] = t.Saved[i];
                }
                saved[slot] = sp;
                _EnqueueFiber(ref lcount, ref l, new _Fiber(t, t.Index + 1, saved), sp);
                break;
            }
        }
Exemple #2
0
 public _Fiber(_Fiber fiber, int index, int[] saved)
 {
     Program = fiber.Program;
     Index   = index;
     Saved   = saved;
 }
Exemple #3
0
        /// <summary>
        /// Runs the specified program over the specified input, logging the run to <paramref name="log"/>
        /// </summary>
        /// <param name="prog">The program to run</param>
        /// <param name="input">The input to match</param>
        /// <param name="log">The log to output to</param>
        /// <returns>The id of the match, or -1 for an error. <see cref="LexContext.CaptureBuffer"/> contains the captured value.</returns>
        public static LexStatistics RunWithLoggingAndStatistics(int[][] prog, LexContext input, TextWriter log, out int result)
        {
            // for speed we rewrite this routine so we don't have the overhead of
            // logging in the main routine
            input.EnsureStarted();
            int i, match = -1;
            int passes        = 0;
            int maxFiberCount = 0;

            _Fiber[] currentFibers, nextFibers, tmp;
            int      currentFiberCount = 0, nextFiberCount = 0;

            int[] pc;
            // position in input
            int sp = 0;
            // stores our captured input
            var sb = new StringBuilder(64);

            int[] saved, matched;
            saved         = new int[2];
            currentFibers = new _Fiber[prog.Length];
            nextFibers    = new _Fiber[prog.Length];
            _EnqueueFiber(ref currentFiberCount, ref currentFibers, new _Fiber(prog, 0, saved), 0);
            if (currentFiberCount > maxFiberCount)
            {
                maxFiberCount = currentFiberCount;
            }
            matched = null;
            int cur;

            if (LexContext.EndOfInput != input.Current)
            {
                var ch1 = unchecked ((char)input.Current);
                if (char.IsHighSurrogate(ch1))
                {
                    if (-1 == input.Advance())
                    {
                        throw new ExpectingException("Expecting low surrogate in unicode stream. The input source is corrupt or not valid Unicode", input.Line, input.Column, input.Position, input.FileOrUrl);
                    }
                    ++sp;
                    var ch2 = unchecked ((char)input.Current);
                    cur = char.ConvertToUtf32(ch1, ch2);
                }
                else
                {
                    cur = ch1;
                }
            }
            else
            {
                cur = -1;
            }

            while (0 < currentFiberCount)
            {
                bool passed = false;
                for (i = 0; i < currentFiberCount; ++i)
                {
                    var lpassed   = false;
                    var shouldLog = false;
                    var t         = currentFibers[i];
                    pc    = t.Program[t.Index];
                    saved = t.Saved;
                    switch (pc[0])
                    {
                    case Compiler.Switch:
                        var idx = 1;
                        shouldLog = true;
                        while (idx < pc.Length && -2 < pc[idx])
                        {
                            if (_InRanges(pc, ref idx, cur))
                            {
                                while (-1 != pc[idx])
                                {
                                    ++idx;
                                }

                                ++idx;
                                lpassed = true;
                                passed  = true;
                                _EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, pc[idx], saved), sp + 1);
                                idx = pc.Length;
                                break;
                            }
                            else
                            {
                                while (-1 != pc[idx])
                                {
                                    ++idx;
                                }
                                ++idx;
                            }
                            ++idx;
                        }
                        if (idx < pc.Length && -2 == pc[idx])
                        {
                            ++idx;
                            while (pc.Length > idx)
                            {
                                _EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, pc[idx], saved), sp);
                                if (currentFiberCount > maxFiberCount)
                                {
                                    maxFiberCount = currentFiberCount;
                                }
                                ++idx;
                            }
                        }
                        break;

                    case Compiler.Char:
                        shouldLog = true;
                        if (cur == pc[1])
                        {
                            goto case Compiler.Any;
                        }
                        break;

                    case Compiler.Set:
                        shouldLog = true;
                        idx       = 1;
                        if (_InRanges(pc, ref idx, cur))
                        {
                            goto case Compiler.Any;
                        }
                        break;

                    case Compiler.NSet:
                        shouldLog = true;
                        idx       = 1;
                        if (!_InRanges(pc, ref idx, cur))
                        {
                            goto case Compiler.Any;
                        }
                        break;

                    case Compiler.UCode:
                        shouldLog = true;
                        var str = char.ConvertFromUtf32(cur);
                        if (unchecked ((int)char.GetUnicodeCategory(str, 0) == pc[1]))
                        {
                            goto case Compiler.Any;
                        }
                        break;

                    case Compiler.NUCode:
                        shouldLog = true;
                        str       = char.ConvertFromUtf32(cur);
                        if (unchecked ((int)char.GetUnicodeCategory(str, 0)) != pc[1])
                        {
                            goto case Compiler.Any;
                        }
                        break;

                    case Compiler.Any:
                        shouldLog = true;
                        if (LexContext.EndOfInput != input.Current)
                        {
                            passed  = true;
                            lpassed = true;
                            _EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, t.Index + 1, saved), sp + 1);
                        }
                        break;

                    case Compiler.Match:
                        matched = saved;
                        match   = pc[1];

                        // break the for loop:
                        i = currentFiberCount;
                        break;
                    }

                    if (shouldLog)
                    {
                        ++passes;
                        _LogInstruction(input, pc, cur, sp, lpassed, log);
                    }
                }

                if (passed)
                {
                    sb.Append(char.ConvertFromUtf32(cur));
                    input.Advance();
                    if (LexContext.EndOfInput != input.Current)
                    {
                        var ch1 = unchecked ((char)input.Current);
                        if (char.IsHighSurrogate(ch1))
                        {
                            if (-1 == input.Advance())
                            {
                                throw new ExpectingException("Expecting low surrogate in unicode stream. The input source is corrupt or not valid Unicode", input.Line, input.Column, input.Position, input.FileOrUrl);
                            }
                            ++sp;
                            var ch2 = unchecked ((char)input.Current);
                            cur = char.ConvertToUtf32(ch1, ch2);
                        }
                        else
                        {
                            cur = ch1;
                        }
                    }
                    else
                    {
                        cur = -1;
                    }
                    ++sp;
                }
                tmp               = currentFibers;
                currentFibers     = nextFibers;
                nextFibers        = tmp;
                currentFiberCount = nextFiberCount;
                nextFiberCount    = 0;
            }


            if (null != matched)
            {
                var start = matched[0];
                // this is actually the point just past the end
                // of the match, but we can treat it as the length
                var len = matched[1];
                input.CaptureBuffer.Append(sb.ToString(start, len - start));
                result = match;
                return(new LexStatistics(maxFiberCount, passes / (sp + 1f)));
            }
            result = -1;             // error symbol
            return(new LexStatistics(maxFiberCount, passes / (sp + 1f)));
        }
Exemple #4
0
        /// <summary>
        /// Runs the specified program over the specified input
        /// </summary>
        /// <param name="prog">The program to run</param>
        /// <param name="input">The input to match</param>
        /// <returns>The id of the match, or -1 for an error. <see cref="LexContext.CaptureBuffer"/> contains the captured value.</returns>
        public static int Run(int[][] prog, LexContext input)
        {
            input.EnsureStarted();
            int i, match = -1;

            _Fiber[] currentFibers, nextFibers, tmp;
            int      currentFiberCount = 0, nextFiberCount = 0;

            int[] pc;
            // position in input
            int sp = 0;
            // stores our captured input
            var sb = new StringBuilder(64);

            int[] saved, matched;
            saved         = new int[2];
            currentFibers = new _Fiber[prog.Length];
            nextFibers    = new _Fiber[prog.Length];
            _EnqueueFiber(ref currentFiberCount, ref currentFibers, new _Fiber(prog, 0, saved), 0);
            matched = null;
            var cur = -1;

            if (LexContext.EndOfInput != input.Current)
            {
                var ch1 = unchecked ((char)input.Current);
                if (char.IsHighSurrogate(ch1))
                {
                    if (-1 == input.Advance())
                    {
                        throw new ExpectingException("Expecting low surrogate in unicode stream. The input source is corrupt or not valid Unicode", input.Line, input.Column, input.Position, input.FileOrUrl);
                    }
                    ++sp;
                    var ch2 = unchecked ((char)input.Current);
                    cur = char.ConvertToUtf32(ch1, ch2);
                }
                else
                {
                    cur = ch1;
                }
            }

            while (0 < currentFiberCount)
            {
                bool passed = false;

                for (i = 0; i < currentFiberCount; ++i)
                {
                    var t = currentFibers[i];
                    pc    = t.Program[t.Index];
                    saved = t.Saved;
                    switch (pc[0])
                    {
                    case Compiler.Switch:
                        var idx = 1;
                        while (idx < pc.Length && -2 < pc[idx])
                        {
                            if (_InRanges(pc, ref idx, cur))
                            {
                                while (-1 != pc[idx])
                                {
                                    ++idx;
                                }

                                ++idx;
                                passed = true;
                                _EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, pc[idx], saved), sp + 1);
                                idx = pc.Length;
                                break;
                            }
                            else
                            {
                                while (-1 != pc[idx])
                                {
                                    ++idx;
                                }
                                ++idx;
                            }
                            ++idx;
                        }
                        if (idx < pc.Length && -2 == pc[idx])
                        {
                            ++idx;
                            while (idx < pc.Length)
                            {
                                _EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, pc[idx], saved), sp);
                                ++idx;
                            }
                        }
                        break;

                    case Compiler.Char:
                        if (cur != pc[1])
                        {
                            break;
                        }
                        goto case Compiler.Any;

                    case Compiler.Set:
                        idx = 1;
                        if (!_InRanges(pc, ref idx, cur))
                        {
                            break;
                        }
                        goto case Compiler.Any;

                    case Compiler.NSet:
                        idx = 1;
                        if (_InRanges(pc, ref idx, cur))
                        {
                            break;
                        }
                        goto case Compiler.Any;

                    case Compiler.UCode:
                        var str = char.ConvertFromUtf32(cur);
                        if (unchecked ((int)char.GetUnicodeCategory(str, 0) != pc[1]))
                        {
                            break;
                        }
                        goto case Compiler.Any;

                    case Compiler.NUCode:
                        str = char.ConvertFromUtf32(cur);
                        if (unchecked ((int)char.GetUnicodeCategory(str, 0)) == pc[1])
                        {
                            break;
                        }
                        goto case Compiler.Any;

                    case Compiler.Any:
                        if (LexContext.EndOfInput == input.Current)
                        {
                            break;
                        }
                        passed = true;
                        _EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, t.Index + 1, saved), sp + 1);

                        break;

                    case Compiler.Match:
                        matched = saved;
                        match   = pc[1];

                        // break the for loop:
                        i = currentFiberCount;
                        break;
                    }
                }
                if (passed)
                {
                    sb.Append(char.ConvertFromUtf32(cur));
                    input.Advance();
                    if (LexContext.EndOfInput != input.Current)
                    {
                        var ch1 = unchecked ((char)input.Current);
                        if (char.IsHighSurrogate(ch1))
                        {
                            input.Advance();
                            if (-1 == input.Advance())
                            {
                                throw new ExpectingException("Expecting low surrogate in unicode stream. The input source is corrupt or not valid Unicode", input.Line, input.Column, input.Position, input.FileOrUrl);
                            }
                            ++sp;
                            var ch2 = unchecked ((char)input.Current);
                            cur = char.ConvertToUtf32(ch1, ch2);
                        }
                        else
                        {
                            cur = ch1;
                        }
                    }
                    else
                    {
                        cur = -1;
                    }
                    ++sp;
                }
                tmp               = currentFibers;
                currentFibers     = nextFibers;
                nextFibers        = tmp;
                currentFiberCount = nextFiberCount;
                nextFiberCount    = 0;
            }

            if (null != matched)
            {
                var start = matched[0];
                // this is actually the point just past the end
                // of the match, but we can treat it as the length
                var len = matched[1];
                input.CaptureBuffer.Append(sb.ToString(start, len - start));
                return(match);
            }
            ;
            return(-1);            // error symbol
        }