private void End(State start, State end, UTF8Sequence utf8, int upto, bool doAll) { if (upto == utf8.len - 1) { // Done recursing start.AddTransition(new Transition(utf8.ByteAt(upto) & (~MASKS[utf8.NumBits(upto) - 1]), utf8.ByteAt(upto), end)); // type=end } else { int startCode; if (utf8.NumBits(upto) == 5) { // special case -- avoid created unused edges (utf8 // doesn't accept certain byte sequences) -- there // are other cases we could optimize too: startCode = 194; } else { startCode = utf8.ByteAt(upto) & (~MASKS[utf8.NumBits(upto) - 1]); } if (doAll && utf8.ByteAt(upto) != startCode) { All(start, end, startCode, utf8.ByteAt(upto) - 1, utf8.len - upto - 1); } State n = NewUTF8State(); start.AddTransition(new Transition(utf8.ByteAt(upto), n)); // type=end End(n, end, utf8, 1 + upto, true); } }
private void Start(State start, State end, UTF8Sequence utf8, int upto, bool doAll) { if (upto == utf8.len - 1) { // Done recursing start.AddTransition(new Transition(utf8.ByteAt(upto), utf8.ByteAt(upto) | MASKS[utf8.NumBits(upto) - 1], end)); // type=start } else { State n = NewUTF8State(); start.AddTransition(new Transition(utf8.ByteAt(upto), n)); // type=start Start(n, end, utf8, 1 + upto, true); int endCode = utf8.ByteAt(upto) | MASKS[utf8.NumBits(upto) - 1]; if (doAll && utf8.ByteAt(upto) != endCode) { All(start, end, utf8.ByteAt(upto) + 1, endCode, utf8.len - upto - 1); } } }
private void Build(State start, State end, UTF8Sequence startUTF8, UTF8Sequence endUTF8, int upto) { // Break into start, middle, end: if (startUTF8.ByteAt(upto) == endUTF8.ByteAt(upto)) { // Degen case: lead with the same byte: if (upto == startUTF8.len - 1 && upto == endUTF8.len - 1) { // Super degen: just single edge, one UTF8 byte: start.AddTransition(new Transition(startUTF8.ByteAt(upto), endUTF8.ByteAt(upto), end)); return; } else { if (Debugging.AssertsEnabled) { Debugging.Assert(startUTF8.len > upto + 1); Debugging.Assert(endUTF8.len > upto + 1); } State n = NewUTF8State(); // Single value leading edge start.AddTransition(new Transition(startUTF8.ByteAt(upto), n)); // type=single // Recurse for the rest Build(n, end, startUTF8, endUTF8, 1 + upto); } } else if (startUTF8.len == endUTF8.len) { if (upto == startUTF8.len - 1) { start.AddTransition(new Transition(startUTF8.ByteAt(upto), endUTF8.ByteAt(upto), end)); // type=startend } else { Start(start, end, startUTF8, upto, false); if (endUTF8.ByteAt(upto) - startUTF8.ByteAt(upto) > 1) { // There is a middle All(start, end, startUTF8.ByteAt(upto) + 1, endUTF8.ByteAt(upto) - 1, startUTF8.len - upto - 1); } End(start, end, endUTF8, upto, false); } } else { // start Start(start, end, startUTF8, upto, true); // possibly middle, spanning multiple num bytes int byteCount = 1 + startUTF8.len - upto; int limit = endUTF8.len - upto; while (byteCount < limit) { // wasteful: we only need first byte, and, we should // statically encode this first byte: tmpUTF8a.Set(startCodes[byteCount - 1]); tmpUTF8b.Set(endCodes[byteCount - 1]); All(start, end, tmpUTF8a.ByteAt(0), tmpUTF8b.ByteAt(0), tmpUTF8a.len - 1); byteCount++; } // end End(start, end, endUTF8, upto, true); } }
private void End(State start, State end, UTF8Sequence utf8, int upto, bool doAll) { if (upto == utf8.Len - 1) { // Done recursing start.AddTransition(new Transition(utf8.ByteAt(upto) & (~MASKS[utf8.NumBits(upto) - 1]), utf8.ByteAt(upto), end)); // type=end } else { int startCode; if (utf8.NumBits(upto) == 5) { // special case -- avoid created unused edges (utf8 // doesn't accept certain byte sequences) -- there // are other cases we could optimize too: startCode = 194; } else { startCode = utf8.ByteAt(upto) & (~MASKS[utf8.NumBits(upto) - 1]); } if (doAll && utf8.ByteAt(upto) != startCode) { All(start, end, startCode, utf8.ByteAt(upto) - 1, utf8.Len - upto - 1); } State n = NewUTF8State(); start.AddTransition(new Transition(utf8.ByteAt(upto), n)); // type=end End(n, end, utf8, 1 + upto, true); } }
private void Start(State start, State end, UTF8Sequence utf8, int upto, bool doAll) { if (upto == utf8.Len - 1) { // Done recursing start.AddTransition(new Transition(utf8.ByteAt(upto), utf8.ByteAt(upto) | MASKS[utf8.NumBits(upto) - 1], end)); // type=start } else { State n = NewUTF8State(); start.AddTransition(new Transition(utf8.ByteAt(upto), n)); // type=start Start(n, end, utf8, 1 + upto, true); int endCode = utf8.ByteAt(upto) | MASKS[utf8.NumBits(upto) - 1]; if (doAll && utf8.ByteAt(upto) != endCode) { All(start, end, utf8.ByteAt(upto) + 1, endCode, utf8.Len - upto - 1); } } }
private void Build(State start, State end, UTF8Sequence startUTF8, UTF8Sequence endUTF8, int upto) { // Break into start, middle, end: if (startUTF8.ByteAt(upto) == endUTF8.ByteAt(upto)) { // Degen case: lead with the same byte: if (upto == startUTF8.Len - 1 && upto == endUTF8.Len - 1) { // Super degen: just single edge, one UTF8 byte: start.AddTransition(new Transition(startUTF8.ByteAt(upto), endUTF8.ByteAt(upto), end)); return; } else { Debug.Assert(startUTF8.Len > upto + 1); Debug.Assert(endUTF8.Len > upto + 1); State n = NewUTF8State(); // Single value leading edge start.AddTransition(new Transition(startUTF8.ByteAt(upto), n)); // type=single // Recurse for the rest Build(n, end, startUTF8, endUTF8, 1 + upto); } } else if (startUTF8.Len == endUTF8.Len) { if (upto == startUTF8.Len - 1) { start.AddTransition(new Transition(startUTF8.ByteAt(upto), endUTF8.ByteAt(upto), end)); // type=startend } else { Start(start, end, startUTF8, upto, false); if (endUTF8.ByteAt(upto) - startUTF8.ByteAt(upto) > 1) { // There is a middle All(start, end, startUTF8.ByteAt(upto) + 1, endUTF8.ByteAt(upto) - 1, startUTF8.Len - upto - 1); } End(start, end, endUTF8, upto, false); } } else { // start Start(start, end, startUTF8, upto, true); // possibly middle, spanning multiple num bytes int byteCount = 1 + startUTF8.Len - upto; int limit = endUTF8.Len - upto; while (byteCount < limit) { // wasteful: we only need first byte, and, we should // statically encode this first byte: TmpUTF8a.Set(StartCodes[byteCount - 1]); TmpUTF8b.Set(EndCodes[byteCount - 1]); All(start, end, TmpUTF8a.ByteAt(0), TmpUTF8b.ByteAt(0), TmpUTF8a.Len - 1); byteCount++; } // end End(start, end, endUTF8, upto, true); } }