Example #1
0
 private void End(State start, State end, UTF8Sequence utf8, int upto, bool doAll)
 {
     if (upto == utf8.len - 1)
     {
         // Done recursing
         start.AddTransition(new Transition(utf8.ByteAt(upto) & (~MASKS[utf8.NumBits(upto) - 1]), utf8.ByteAt(upto), end)); // type=end
     }
     else
     {
         int startCode;
         if (utf8.NumBits(upto) == 5)
         {
             // special case -- avoid created unused edges (utf8
             // doesn't accept certain byte sequences) -- there
             // are other cases we could optimize too:
             startCode = 194;
         }
         else
         {
             startCode = utf8.ByteAt(upto) & (~MASKS[utf8.NumBits(upto) - 1]);
         }
         if (doAll && utf8.ByteAt(upto) != startCode)
         {
             All(start, end, startCode, utf8.ByteAt(upto) - 1, utf8.len - upto - 1);
         }
         State n = NewUTF8State();
         start.AddTransition(new Transition(utf8.ByteAt(upto), n)); // type=end
         End(n, end, utf8, 1 + upto, true);
     }
 }
Example #2
0
 private void Start(State start, State end, UTF8Sequence utf8, int upto, bool doAll)
 {
     if (upto == utf8.len - 1)
     {
         // Done recursing
         start.AddTransition(new Transition(utf8.ByteAt(upto), utf8.ByteAt(upto) | MASKS[utf8.NumBits(upto) - 1], end)); // type=start
     }
     else
     {
         State n = NewUTF8State();
         start.AddTransition(new Transition(utf8.ByteAt(upto), n)); // type=start
         Start(n, end, utf8, 1 + upto, true);
         int endCode = utf8.ByteAt(upto) | MASKS[utf8.NumBits(upto) - 1];
         if (doAll && utf8.ByteAt(upto) != endCode)
         {
             All(start, end, utf8.ByteAt(upto) + 1, endCode, utf8.len - upto - 1);
         }
     }
 }
Example #3
0
        private void Build(State start, State end, UTF8Sequence startUTF8, UTF8Sequence endUTF8, int upto)
        {
            // Break into start, middle, end:
            if (startUTF8.ByteAt(upto) == endUTF8.ByteAt(upto))
            {
                // Degen case: lead with the same byte:
                if (upto == startUTF8.len - 1 && upto == endUTF8.len - 1)
                {
                    // Super degen: just single edge, one UTF8 byte:
                    start.AddTransition(new Transition(startUTF8.ByteAt(upto), endUTF8.ByteAt(upto), end));
                    return;
                }
                else
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(startUTF8.len > upto + 1);
                        Debugging.Assert(endUTF8.len > upto + 1);
                    }
                    State n = NewUTF8State();

                    // Single value leading edge
                    start.AddTransition(new Transition(startUTF8.ByteAt(upto), n)); // type=single

                    // Recurse for the rest
                    Build(n, end, startUTF8, endUTF8, 1 + upto);
                }
            }
            else if (startUTF8.len == endUTF8.len)
            {
                if (upto == startUTF8.len - 1)
                {
                    start.AddTransition(new Transition(startUTF8.ByteAt(upto), endUTF8.ByteAt(upto), end)); // type=startend
                }
                else
                {
                    Start(start, end, startUTF8, upto, false);
                    if (endUTF8.ByteAt(upto) - startUTF8.ByteAt(upto) > 1)
                    {
                        // There is a middle
                        All(start, end, startUTF8.ByteAt(upto) + 1, endUTF8.ByteAt(upto) - 1, startUTF8.len - upto - 1);
                    }
                    End(start, end, endUTF8, upto, false);
                }
            }
            else
            {
                // start
                Start(start, end, startUTF8, upto, true);

                // possibly middle, spanning multiple num bytes
                int byteCount = 1 + startUTF8.len - upto;
                int limit     = endUTF8.len - upto;
                while (byteCount < limit)
                {
                    // wasteful: we only need first byte, and, we should
                    // statically encode this first byte:
                    tmpUTF8a.Set(startCodes[byteCount - 1]);
                    tmpUTF8b.Set(endCodes[byteCount - 1]);
                    All(start, end, tmpUTF8a.ByteAt(0), tmpUTF8b.ByteAt(0), tmpUTF8a.len - 1);
                    byteCount++;
                }

                // end
                End(start, end, endUTF8, upto, true);
            }
        }
Example #4
0
 private void End(State start, State end, UTF8Sequence utf8, int upto, bool doAll)
 {
     if (upto == utf8.Len - 1)
     {
         // Done recursing
         start.AddTransition(new Transition(utf8.ByteAt(upto) & (~MASKS[utf8.NumBits(upto) - 1]), utf8.ByteAt(upto), end)); // type=end
     }
     else
     {
         int startCode;
         if (utf8.NumBits(upto) == 5)
         {
             // special case -- avoid created unused edges (utf8
             // doesn't accept certain byte sequences) -- there
             // are other cases we could optimize too:
             startCode = 194;
         }
         else
         {
             startCode = utf8.ByteAt(upto) & (~MASKS[utf8.NumBits(upto) - 1]);
         }
         if (doAll && utf8.ByteAt(upto) != startCode)
         {
             All(start, end, startCode, utf8.ByteAt(upto) - 1, utf8.Len - upto - 1);
         }
         State n = NewUTF8State();
         start.AddTransition(new Transition(utf8.ByteAt(upto), n)); // type=end
         End(n, end, utf8, 1 + upto, true);
     }
 }
Example #5
0
 private void Start(State start, State end, UTF8Sequence utf8, int upto, bool doAll)
 {
     if (upto == utf8.Len - 1)
     {
         // Done recursing
         start.AddTransition(new Transition(utf8.ByteAt(upto), utf8.ByteAt(upto) | MASKS[utf8.NumBits(upto) - 1], end)); // type=start
     }
     else
     {
         State n = NewUTF8State();
         start.AddTransition(new Transition(utf8.ByteAt(upto), n)); // type=start
         Start(n, end, utf8, 1 + upto, true);
         int endCode = utf8.ByteAt(upto) | MASKS[utf8.NumBits(upto) - 1];
         if (doAll && utf8.ByteAt(upto) != endCode)
         {
             All(start, end, utf8.ByteAt(upto) + 1, endCode, utf8.Len - upto - 1);
         }
     }
 }
Example #6
0
        private void Build(State start, State end, UTF8Sequence startUTF8, UTF8Sequence endUTF8, int upto)
        {
            // Break into start, middle, end:
            if (startUTF8.ByteAt(upto) == endUTF8.ByteAt(upto))
            {
                // Degen case: lead with the same byte:
                if (upto == startUTF8.Len - 1 && upto == endUTF8.Len - 1)
                {
                    // Super degen: just single edge, one UTF8 byte:
                    start.AddTransition(new Transition(startUTF8.ByteAt(upto), endUTF8.ByteAt(upto), end));
                    return;
                }
                else
                {
                    Debug.Assert(startUTF8.Len > upto + 1);
                    Debug.Assert(endUTF8.Len > upto + 1);
                    State n = NewUTF8State();

                    // Single value leading edge
                    start.AddTransition(new Transition(startUTF8.ByteAt(upto), n)); // type=single

                    // Recurse for the rest
                    Build(n, end, startUTF8, endUTF8, 1 + upto);
                }
            }
            else if (startUTF8.Len == endUTF8.Len)
            {
                if (upto == startUTF8.Len - 1)
                {
                    start.AddTransition(new Transition(startUTF8.ByteAt(upto), endUTF8.ByteAt(upto), end)); // type=startend
                }
                else
                {
                    Start(start, end, startUTF8, upto, false);
                    if (endUTF8.ByteAt(upto) - startUTF8.ByteAt(upto) > 1)
                    {
                        // There is a middle
                        All(start, end, startUTF8.ByteAt(upto) + 1, endUTF8.ByteAt(upto) - 1, startUTF8.Len - upto - 1);
                    }
                    End(start, end, endUTF8, upto, false);
                }
            }
            else
            {
                // start
                Start(start, end, startUTF8, upto, true);

                // possibly middle, spanning multiple num bytes
                int byteCount = 1 + startUTF8.Len - upto;
                int limit = endUTF8.Len - upto;
                while (byteCount < limit)
                {
                    // wasteful: we only need first byte, and, we should
                    // statically encode this first byte:
                    TmpUTF8a.Set(StartCodes[byteCount - 1]);
                    TmpUTF8b.Set(EndCodes[byteCount - 1]);
                    All(start, end, TmpUTF8a.ByteAt(0), TmpUTF8b.ByteAt(0), TmpUTF8a.Len - 1);
                    byteCount++;
                }

                // end
                End(start, end, endUTF8, upto, true);
            }
        }