static void _EnqueueFiber(ref int lcount, ref _Fiber[] l, _Fiber t, int sp) { // really shouldn't happen, but maybe it might if (l.Length <= lcount) { var newarr = new _Fiber[l.Length * 2]; Array.Copy(l, 0, newarr, 0, l.Length); l = newarr; } l[lcount] = t; ++lcount; var pc = t.Program[t.Index]; switch (pc[0]) { case Compiler.Jmp: for (var j = 1; j < pc.Length; j++) { _EnqueueFiber(ref lcount, ref l, new _Fiber(t.Program, pc[j], t.Saved), sp); } break; case Compiler.Save: var slot = pc[1]; var max = slot > t.Saved.Length ? slot : t.Saved.Length; var saved = new int[max]; for (var i = 0; i < t.Saved.Length; ++i) { saved[i] = t.Saved[i]; } saved[slot] = sp; _EnqueueFiber(ref lcount, ref l, new _Fiber(t, t.Index + 1, saved), sp); break; } }
public _Fiber(_Fiber fiber, int index, int[] saved) { Program = fiber.Program; Index = index; Saved = saved; }
/// <summary> /// Runs the specified program over the specified input, logging the run to <paramref name="log"/> /// </summary> /// <param name="prog">The program to run</param> /// <param name="input">The input to match</param> /// <param name="log">The log to output to</param> /// <returns>The id of the match, or -1 for an error. <see cref="LexContext.CaptureBuffer"/> contains the captured value.</returns> public static LexStatistics RunWithLoggingAndStatistics(int[][] prog, LexContext input, TextWriter log, out int result) { // for speed we rewrite this routine so we don't have the overhead of // logging in the main routine input.EnsureStarted(); int i, match = -1; int passes = 0; int maxFiberCount = 0; _Fiber[] currentFibers, nextFibers, tmp; int currentFiberCount = 0, nextFiberCount = 0; int[] pc; // position in input int sp = 0; // stores our captured input var sb = new StringBuilder(64); int[] saved, matched; saved = new int[2]; currentFibers = new _Fiber[prog.Length]; nextFibers = new _Fiber[prog.Length]; _EnqueueFiber(ref currentFiberCount, ref currentFibers, new _Fiber(prog, 0, saved), 0); if (currentFiberCount > maxFiberCount) { maxFiberCount = currentFiberCount; } matched = null; int cur; if (LexContext.EndOfInput != input.Current) { var ch1 = unchecked ((char)input.Current); if (char.IsHighSurrogate(ch1)) { if (-1 == input.Advance()) { throw new ExpectingException("Expecting low surrogate in unicode stream. The input source is corrupt or not valid Unicode", input.Line, input.Column, input.Position, input.FileOrUrl); } ++sp; var ch2 = unchecked ((char)input.Current); cur = char.ConvertToUtf32(ch1, ch2); } else { cur = ch1; } } else { cur = -1; } while (0 < currentFiberCount) { bool passed = false; for (i = 0; i < currentFiberCount; ++i) { var lpassed = false; var shouldLog = false; var t = currentFibers[i]; pc = t.Program[t.Index]; saved = t.Saved; switch (pc[0]) { case Compiler.Switch: var idx = 1; shouldLog = true; while (idx < pc.Length && -2 < pc[idx]) { if (_InRanges(pc, ref idx, cur)) { while (-1 != pc[idx]) { ++idx; } ++idx; lpassed = true; passed = true; _EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, pc[idx], saved), sp + 1); idx = pc.Length; break; } else { while (-1 != pc[idx]) { ++idx; } ++idx; } ++idx; } if (idx < pc.Length && -2 == pc[idx]) { ++idx; while (pc.Length > idx) { _EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, pc[idx], saved), sp); if (currentFiberCount > maxFiberCount) { maxFiberCount = currentFiberCount; } ++idx; } } break; case Compiler.Char: shouldLog = true; if (cur == pc[1]) { goto case Compiler.Any; } break; case Compiler.Set: shouldLog = true; idx = 1; if (_InRanges(pc, ref idx, cur)) { goto case Compiler.Any; } break; case Compiler.NSet: shouldLog = true; idx = 1; if (!_InRanges(pc, ref idx, cur)) { goto case Compiler.Any; } break; case Compiler.UCode: shouldLog = true; var str = char.ConvertFromUtf32(cur); if (unchecked ((int)char.GetUnicodeCategory(str, 0) == pc[1])) { goto case Compiler.Any; } break; case Compiler.NUCode: shouldLog = true; str = char.ConvertFromUtf32(cur); if (unchecked ((int)char.GetUnicodeCategory(str, 0)) != pc[1]) { goto case Compiler.Any; } break; case Compiler.Any: shouldLog = true; if (LexContext.EndOfInput != input.Current) { passed = true; lpassed = true; _EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, t.Index + 1, saved), sp + 1); } break; case Compiler.Match: matched = saved; match = pc[1]; // break the for loop: i = currentFiberCount; break; } if (shouldLog) { ++passes; _LogInstruction(input, pc, cur, sp, lpassed, log); } } if (passed) { sb.Append(char.ConvertFromUtf32(cur)); input.Advance(); if (LexContext.EndOfInput != input.Current) { var ch1 = unchecked ((char)input.Current); if (char.IsHighSurrogate(ch1)) { if (-1 == input.Advance()) { throw new ExpectingException("Expecting low surrogate in unicode stream. The input source is corrupt or not valid Unicode", input.Line, input.Column, input.Position, input.FileOrUrl); } ++sp; var ch2 = unchecked ((char)input.Current); cur = char.ConvertToUtf32(ch1, ch2); } else { cur = ch1; } } else { cur = -1; } ++sp; } tmp = currentFibers; currentFibers = nextFibers; nextFibers = tmp; currentFiberCount = nextFiberCount; nextFiberCount = 0; } if (null != matched) { var start = matched[0]; // this is actually the point just past the end // of the match, but we can treat it as the length var len = matched[1]; input.CaptureBuffer.Append(sb.ToString(start, len - start)); result = match; return(new LexStatistics(maxFiberCount, passes / (sp + 1f))); } result = -1; // error symbol return(new LexStatistics(maxFiberCount, passes / (sp + 1f))); }
/// <summary> /// Runs the specified program over the specified input /// </summary> /// <param name="prog">The program to run</param> /// <param name="input">The input to match</param> /// <returns>The id of the match, or -1 for an error. <see cref="LexContext.CaptureBuffer"/> contains the captured value.</returns> public static int Run(int[][] prog, LexContext input) { input.EnsureStarted(); int i, match = -1; _Fiber[] currentFibers, nextFibers, tmp; int currentFiberCount = 0, nextFiberCount = 0; int[] pc; // position in input int sp = 0; // stores our captured input var sb = new StringBuilder(64); int[] saved, matched; saved = new int[2]; currentFibers = new _Fiber[prog.Length]; nextFibers = new _Fiber[prog.Length]; _EnqueueFiber(ref currentFiberCount, ref currentFibers, new _Fiber(prog, 0, saved), 0); matched = null; var cur = -1; if (LexContext.EndOfInput != input.Current) { var ch1 = unchecked ((char)input.Current); if (char.IsHighSurrogate(ch1)) { if (-1 == input.Advance()) { throw new ExpectingException("Expecting low surrogate in unicode stream. The input source is corrupt or not valid Unicode", input.Line, input.Column, input.Position, input.FileOrUrl); } ++sp; var ch2 = unchecked ((char)input.Current); cur = char.ConvertToUtf32(ch1, ch2); } else { cur = ch1; } } while (0 < currentFiberCount) { bool passed = false; for (i = 0; i < currentFiberCount; ++i) { var t = currentFibers[i]; pc = t.Program[t.Index]; saved = t.Saved; switch (pc[0]) { case Compiler.Switch: var idx = 1; while (idx < pc.Length && -2 < pc[idx]) { if (_InRanges(pc, ref idx, cur)) { while (-1 != pc[idx]) { ++idx; } ++idx; passed = true; _EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, pc[idx], saved), sp + 1); idx = pc.Length; break; } else { while (-1 != pc[idx]) { ++idx; } ++idx; } ++idx; } if (idx < pc.Length && -2 == pc[idx]) { ++idx; while (idx < pc.Length) { _EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, pc[idx], saved), sp); ++idx; } } break; case Compiler.Char: if (cur != pc[1]) { break; } goto case Compiler.Any; case Compiler.Set: idx = 1; if (!_InRanges(pc, ref idx, cur)) { break; } goto case Compiler.Any; case Compiler.NSet: idx = 1; if (_InRanges(pc, ref idx, cur)) { break; } goto case Compiler.Any; case Compiler.UCode: var str = char.ConvertFromUtf32(cur); if (unchecked ((int)char.GetUnicodeCategory(str, 0) != pc[1])) { break; } goto case Compiler.Any; case Compiler.NUCode: str = char.ConvertFromUtf32(cur); if (unchecked ((int)char.GetUnicodeCategory(str, 0)) == pc[1]) { break; } goto case Compiler.Any; case Compiler.Any: if (LexContext.EndOfInput == input.Current) { break; } passed = true; _EnqueueFiber(ref nextFiberCount, ref nextFibers, new _Fiber(t, t.Index + 1, saved), sp + 1); break; case Compiler.Match: matched = saved; match = pc[1]; // break the for loop: i = currentFiberCount; break; } } if (passed) { sb.Append(char.ConvertFromUtf32(cur)); input.Advance(); if (LexContext.EndOfInput != input.Current) { var ch1 = unchecked ((char)input.Current); if (char.IsHighSurrogate(ch1)) { input.Advance(); if (-1 == input.Advance()) { throw new ExpectingException("Expecting low surrogate in unicode stream. The input source is corrupt or not valid Unicode", input.Line, input.Column, input.Position, input.FileOrUrl); } ++sp; var ch2 = unchecked ((char)input.Current); cur = char.ConvertToUtf32(ch1, ch2); } else { cur = ch1; } } else { cur = -1; } ++sp; } tmp = currentFibers; currentFibers = nextFibers; nextFibers = tmp; currentFiberCount = nextFiberCount; nextFiberCount = 0; } if (null != matched) { var start = matched[0]; // this is actually the point just past the end // of the match, but we can treat it as the length var len = matched[1]; input.CaptureBuffer.Append(sb.ToString(start, len - start)); return(match); } ; return(-1); // error symbol }