示例#1
0
        public static StringBuffer Decode(StringBuffer input, char[] case_flags)
        {
            char[] @in = input.ToString().ToCharArray();
            int[]
            outLen = new int[1];
            outLen[0] = MAX_BUFFER_SIZE;
            int[]        output = new int[outLen[0]];
            int          rc     = punycode_success;
            StringBuffer result = new StringBuffer();

            for (; ;)
            {
                rc = Decode(input.Length, @in, outLen, output, case_flags);
                if (rc == punycode_big_output)
                {
                    outLen[0] = output.Length * 4;
                    output    = new int[outLen[0]];
                    continue;
                }
                break;
            }
            if (rc == punycode_success)
            {
                for (int i = 0; i < outLen[0]; i++)
                {
                    UTF16.Append(result, output[i]);
                }
            }
            else
            {
                GetException(rc);
            }
            return(result);
        }
示例#2
0
        public static int Current32(CharacterIterator ci)
        {
            char lead   = ci.Current;
            int  retVal = lead;

            if (retVal < UTF16.LeadSurrogateMinValue)
            {
                return(retVal);
            }
            if (UTF16.IsLeadSurrogate(lead))
            {
                int trail = (int)ci.Next();
                ci.Previous();
                if (UTF16.IsTrailSurrogate((char)trail))
                {
                    retVal = ((lead - UTF16.LeadSurrogateMinValue) << 10) +
                             (trail - UTF16.TrailSurrogateMinValue) +
                             UTF16.SupplementaryMinValue;
                }
            }
            else
            {
                if (lead == CharacterIterator.Done)
                {
                    if (ci.Index >= ci.EndIndex)
                    {
                        retVal = Done32;
                    }
                }
            }
            return(retVal);
        }
示例#3
0
        public static int Previous32(CharacterIterator ci)
        {
            if (ci.Index <= ci.BeginIndex)
            {
                return(Done32);
            }
            char trail  = ci.Previous();
            int  retVal = trail;

            if (UTF16.IsTrailSurrogate(trail) && ci.Index > ci.BeginIndex)
            {
                char lead = ci.Previous();
                if (UTF16.IsLeadSurrogate(lead))
                {
                    retVal = (((int)lead - UTF16.LeadSurrogateMinValue) << 10) +
                             ((int)trail - UTF16.TrailSurrogateMinValue) +
                             UTF16.SupplementaryMinValue;
                }
                else
                {
                    ci.Next();
                }
            }
            return(retVal);
        }
示例#4
0
        public static int Previous32(CharacterIterator ci)
        {
            if (ci.Index <= ci.BeginIndex)
            {
                return(DONE32);
            }
            char trail  = ci.Previous();
            int  retVal = trail;

            if (UTF16.IsTrailSurrogate(trail) && ci.Index > ci.BeginIndex)
            {
                char lead = ci.Previous();
                if (UTF16.IsLeadSurrogate(lead))
                {
                    retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
                             ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
                             UTF16.SUPPLEMENTARY_MIN_VALUE;
                }
                else
                {
                    ci.Next();
                }
            }
            return(retVal);
        }
示例#5
0
        /**
         * Do a normalization using the iterative API in the given direction.
         * @param str a Java StringCharacterIterator
         * @param buf scratch buffer
         * @param dir either +1 or -1
         */
        private String iterativeNorm(StringCharacterIterator str, Normalizer.Mode mode,
                                     StringBuffer buf, int dir, int options)
        {
            normalizer.SetText(str);
            normalizer.SetMode(mode);
            buf.Length = (0);
            normalizer.SetOption(-1, false);      // reset all options
            normalizer.SetOption(options, true);  // set desired options

            int ch;

            if (dir > 0)
            {
                for (ch = normalizer.First(); ch != Normalizer.DONE;
                     ch = normalizer.Next())
                {
                    buf.Append(UTF16.ValueOf(ch));
                }
            }
            else
            {
                for (ch = normalizer.Last(); ch != Normalizer.DONE;
                     ch = normalizer.Previous())
                {
                    buf.Insert(0, UTF16.ValueOf(ch));
                }
            }
            return(buf.ToString());
        }
示例#6
0
文件: Trie.cs 项目: introfog/ICU4N
 /// <summary>
 /// Internal trie getter from a code point.
 /// Could be faster(?) but longer with
 /// <code>
 ///     if((c32)&lt;=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }
 /// </code>
 /// Gets the offset to data which the codepoint points to.
 /// </summary>
 /// <param name="ch">Codepoint.</param>
 /// <returns>Offset to data.</returns>
 protected int GetCodePointOffset(int ch)
 {
     // if ((ch >> 16) == 0) slower
     if (ch < 0)
     {
         return(-1);
     }
     else if (ch < UTF16.LeadSurrogateMinValue)
     {
         // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
         return(GetRawOffset(0, (char)ch));
     }
     else if (ch < UTF16.SupplementaryMinValue)
     {
         // BMP codepoint
         return(GetBMPOffset((char)ch));
     }
     else if (ch <= UChar.MaxValue)
     {
         // look at the construction of supplementary characters
         // trail forms the ends of it.
         return(GetSurrogateOffset(UTF16.GetLeadSurrogate(ch),
                                   (char)(ch & SurrogateMask)));
     }
     else
     {
         // return -1 if there is an error, in this case we return
         return(-1);
     }
 }
示例#7
0
        public static int Current32(CharacterIterator ci)
        {
            char lead   = ci.Current;
            int  retVal = lead;

            if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE)
            {
                return(retVal);
            }
            if (UTF16.IsLeadSurrogate(lead))
            {
                int trail = (int)ci.Next();
                ci.Previous();
                if (UTF16.IsTrailSurrogate((char)trail))
                {
                    retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
                             (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
                             UTF16.SUPPLEMENTARY_MIN_VALUE;
                }
            }
            else
            {
                if (lead == CharacterIterator.DONE)
                {
                    if (ci.Index >= ci.EndIndex)
                    {
                        retVal = DONE32;
                    }
                }
            }
            return(retVal);
        }
示例#8
0
 public void TestToCodePoint()
 {
     char[] pairs = { (char)(UCharacter.MIN_HIGH_SURROGATE + 0),
                      (char)(UCharacter.MIN_LOW_SURROGATE + 0),
                      (char)(UCharacter.MIN_HIGH_SURROGATE + 1),
                      (char)(UCharacter.MIN_LOW_SURROGATE + 1),
                      (char)(UCharacter.MIN_HIGH_SURROGATE + 2),
                      (char)(UCharacter.MIN_LOW_SURROGATE + 2),
                      (char)(UCharacter.MAX_HIGH_SURROGATE - 2),
                      (char)(UCharacter.MAX_LOW_SURROGATE - 2),
                      (char)(UCharacter.MAX_HIGH_SURROGATE - 1),
                      (char)(UCharacter.MAX_LOW_SURROGATE - 1),
                      (char)(UCharacter.MAX_HIGH_SURROGATE - 0),
                      (char)(UCharacter.MAX_LOW_SURROGATE - 0), };
     for (int i = 0; i < pairs.Length; i += 2)
     {
         int cp = UCharacter.ToCodePoint(pairs[i], pairs[i + 1]);
         if (pairs[i] != UTF16.GetLeadSurrogate(cp) ||
             pairs[i + 1] != UTF16.GetTrailSurrogate(cp))
         {
             Errln((pairs[i]).ToHexString() + ", " + pairs[i + 1]);
             break;
         }
     }
 }
            private RuleStatus CalcStatus(int current, int next)
            {
                if (current == BreakIterator.Done || next == BreakIterator.Done)
                {
                    return(ICU4N.Text.RuleStatus.WordNone);
                }

                int begin = m_start + current;
                int end   = m_start + next;

                int codepoint;

                for (int i = begin; i < end; i += UTF16.GetCharCount(codepoint))
                {
                    codepoint = UTF16.CharAt(m_text, 0, end, begin);

                    if (UChar.IsDigit(codepoint))
                    {
                        return(ICU4N.Text.RuleStatus.WordNumber);
                    }
                    else if (UChar.IsLetter(codepoint))
                    {
                        // TODO: try to separately specify ideographic, kana?
                        // [currently all bundled as letter for this case]
                        return(ICU4N.Text.RuleStatus.WordLetter);
                    }
                }

                return(ICU4N.Text.RuleStatus.WordNone);
            }
示例#10
0
        public void TestExhaustive()
        {
            int counter          = 0;
            CanonicalIterator it = new CanonicalIterator("");

            /*
             * CanonicalIterator slowIt = new CanonicalIterator("");
             * slowIt.SKIP_ZEROS = false;
             */
            //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name");
            //Set itSet = new TreeSet();
            //Set slowItSet = new TreeSet();


            for (int i = 0; i < 0x10FFFF; ++i)
            {
                // skip characters we know don't have decomps
                UCharacterCategory type = UCharacter.GetType(i);
                if (type == UCharacterCategory.OtherNotAssigned || type == UCharacterCategory.PrivateUse ||
                    type == UCharacterCategory.Surrogate)
                {
                    continue;
                }

                if ((++counter % 5000) == 0)
                {
                    Logln("Testing " + Utility.Hex(i, 0));
                }

                string s = UTF16.ValueOf(i);
                CharacterTest(s, i, it);

                CharacterTest(s + "\u0345", i, it);
            }
        }
示例#11
0
 public void TestToCodePoint()
 {
     char[] pairs = { (char)(UChar.MinHighSurrogate + 0),
                      (char)(UChar.MinLowSurrogate + 0),
                      (char)(UChar.MinHighSurrogate + 1),
                      (char)(UChar.MinLowSurrogate + 1),
                      (char)(UChar.MinHighSurrogate + 2),
                      (char)(UChar.MinLowSurrogate + 2),
                      (char)(UChar.MaxHighSurrogate - 2),
                      (char)(UChar.MaxLowSurrogate - 2),
                      (char)(UChar.MaxHighSurrogate - 1),
                      (char)(UChar.MaxLowSurrogate - 1),
                      (char)(UChar.MaxHighSurrogate - 0),
                      (char)(UChar.MaxLowSurrogate - 0), };
     for (int i = 0; i < pairs.Length; i += 2)
     {
         int cp = UChar.ToCodePoint(pairs[i], pairs[i + 1]);
         if (pairs[i] != UTF16.GetLeadSurrogate(cp) ||
             pairs[i + 1] != UTF16.GetTrailSurrogate(cp))
         {
             Errln((pairs[i]).ToHexString() + ", " + pairs[i + 1]);
             break;
         }
     }
 }
示例#12
0
 // two strings that are canonically equivalent must test
 // equal under a canonical caseless match
 // see UAX #21 Case Mappings and Jitterbug 2021 and
 // Unicode Technical Committee meeting consensus 92-C31
 private void compare(String s1, String s2)
 {
     if (s1.Length == 1 && s2.Length == 1)
     {
         if (Normalizer.Compare(UTF16.CharAt(s1, 0), UTF16.CharAt(s2, 0), Normalizer.COMPARE_IGNORE_CASE) != 0)
         {
             Errln("Normalizer.compare(int,int) failed for s1: "
                   + Utility.Hex(s1) + " s2: " + Utility.Hex(s2));
         }
     }
     if (s1.Length == 1 && s2.Length > 1)
     {
         if (Normalizer.Compare(UTF16.CharAt(s1, 0), s2, Normalizer.COMPARE_IGNORE_CASE) != 0)
         {
             Errln("Normalizer.compare(int,String) failed for s1: "
                   + Utility.Hex(s1) + " s2: " + Utility.Hex(s2));
         }
     }
     if (s1.Length > 1 && s2.Length > 1)
     {
         // TODO: Re-enable this tests after UTC fixes UAX 21
         if (Normalizer.Compare(s1.ToCharArray(), s2.ToCharArray(), Normalizer.COMPARE_IGNORE_CASE) != 0)
         {
             Errln("Normalizer.compare(char[],char[]) failed for s1: "
                   + Utility.Hex(s1) + " s2: " + Utility.Hex(s2));
         }
     }
 }
示例#13
0
        /// <summary>
        /// Returns the next character using the given options, or <see cref="DONE"/> if there
        /// are no more characters, and advance the position to the next
        /// character.
        /// </summary>
        /// <param name="options">One or more of the following options, bitwise-OR-ed
        /// together: <see cref="RuleCharacterIteratorOptions.ParseVariables"/>,
        /// <see cref="RuleCharacterIteratorOptions.ParseEscapes"/>,
        /// <see cref="RuleCharacterIteratorOptions.SkipWhitespace"/>.</param>
        /// <returns>The current 32-bit code point, or <see cref="DONE"/>.</returns>
        public virtual int Next(RuleCharacterIteratorOptions options)
        {
            int c = DONE;

            isEscaped = false;

            for (; ;)
            {
                c = Current();
                Advance(UTF16.GetCharCount(c));

                if (c == SymbolTable.SYMBOL_REF && buf == null &&
                    (options & RuleCharacterIteratorOptions.ParseVariables) != 0 && sym != null)
                {
                    string name = sym.ParseReference(text, pos, text.Length);
                    // If name == null there was an isolated SYMBOL_REF;
                    // return it.  Caller must be prepared for this.
                    if (name == null)
                    {
                        break;
                    }
                    bufPos = 0;
                    buf    = sym.Lookup(name);
                    if (buf == null)
                    {
                        throw new ArgumentException(
                                  "Undefined variable: " + name);
                    }
                    // Handle empty variable value
                    if (buf.Length == 0)
                    {
                        buf = null;
                    }
                    continue;
                }

                if ((options & RuleCharacterIteratorOptions.SkipWhitespace) != 0 &&
                    PatternProps.IsWhiteSpace(c))
                {
                    continue;
                }

                if (c == '\\' && (options & RuleCharacterIteratorOptions.ParseEscapes) != 0)
                {
                    int[] offset = new int[] { 0 };
                    c = Utility.UnescapeAt(Lookahead(), offset);
                    Jumpahead(offset[0]);
                    isEscaped = true;
                    if (c < 0)
                    {
                        throw new ArgumentException("Invalid escape");
                    }
                }

                break;
            }

            return(c);
        }
示例#14
0
 /// <summary>
 /// Traverses the trie from the current state for the
 /// one or two UTF-16 code units for this input code point.
 /// </summary>
 /// <param name="cp">A Unicode code point 0..0x10ffff.</param>
 /// <returns>The match/value <see cref="Result"/>.</returns>
 /// <stable>ICU 4.8</stable>
 public Result NextForCodePoint(int cp)
 {
     return(cp <= 0xffff ?
            Next(cp) :
            (Next(UTF16.GetLeadSurrogate(cp)).HasNext() ?
             Next(UTF16.GetTrailSurrogate(cp)) :
             Result.NoMatch));
 }
示例#15
0
 /**
  * Gets the current string from the iterator. Only use after calling next(), not nextRange().
  */
 public string GetString()
 {
     if (Codepoint != IS_STRING)
     {
         return(UTF16.ValueOf(Codepoint));
     }
     return(String);
 }
示例#16
0
            public int AddString(string s)
            {
                List <int> offsets = new List <int>(strings.Keys);

                offsets.Sort();
                int offset = offsets[offsets.Count - 1];

                offset          = offset + UTF16.GetByteCount(strings[offset]) + 2;
                strings[offset] = s;
                return(offset);
            }
示例#17
0
 /// <summary>
 /// Returns the current 32-bit code point without parsing escapes, parsing
 /// variables, or skipping whitespace.
 /// </summary>
 /// <returns>The current 32-bit code point.</returns>
 private int Current()
 {
     if (buf != null)
     {
         return(UTF16.CharAt(buf, 0, buf.Length, bufPos));
     }
     else
     {
         int i = pos.Index;
         return((i < text.Length) ? UTF16.CharAt(text, i) : DONE);
     }
 }
示例#18
0
 /// <summary>
 /// Skips ahead past any ignored characters, as indicated by the given
 /// options.  This is useful in conjunction with the <see cref="Lookahead()"/> method.
 /// <para/>
 /// Currently, this only has an effect for <see cref="RuleCharacterIteratorOptions.SkipWhitespace"/>.
 /// </summary>
 /// <param name="options">One or more of the following options, bitwise-OR-ed
 /// together: <see cref="RuleCharacterIteratorOptions.ParseVariables"/>,
 /// <see cref="RuleCharacterIteratorOptions.ParseEscapes"/>,
 /// <see cref="RuleCharacterIteratorOptions.SkipWhitespace"/>.</param>
 public virtual void SkipIgnored(RuleCharacterIteratorOptions options)
 {
     if ((options & RuleCharacterIteratorOptions.SkipWhitespace) != 0)
     {
         for (; ;)
         {
             int a = Current();
             if (!PatternProps.IsWhiteSpace(a))
             {
                 break;
             }
             Advance(UTF16.GetCharCount(a));
         }
     }
 }
示例#19
0
        /**
         * Change unicode string from <00AD> to \u00AD, for the later is accepted
         * by Java
         * @param str String including <*> style unicode
         * @return \\u String
         */
        private static String StringReplace(String str)
        {
            StringBuffer result = new StringBuffer();

            char[]       chars  = str.ToCharArray();
            StringBuffer sbTemp = new StringBuffer();

            for (int i = 0; i < chars.Length; i++)
            {
                if ('<' == chars[i])
                {
                    sbTemp = new StringBuffer();
                    while ('>' != chars[i + 1])
                    {
                        sbTemp.Append(chars[++i]);
                    }

                    /*
                     * The unicode sometimes is larger then \uFFFF, so have to use
                     * UTF16.
                     */
                    int toBeInserted = int.Parse(sbTemp.ToString(), NumberStyles.HexNumber, CultureInfo.InvariantCulture);
                    if ((toBeInserted >> 16) == 0)
                    {
                        result.Append((char)toBeInserted);
                    }
                    else
                    {
                        String utf16String = UTF16.ValueOf(toBeInserted);
                        char[] charsTemp   = utf16String.ToCharArray();
                        for (int j = 0; j < charsTemp.Length; j++)
                        {
                            result.Append(charsTemp[j]);
                        }
                    }
                }
                else if ('>' == chars[i])
                {//end when met with '>'
                    continue;
                }
                else
                {
                    result.Append(chars[i]);
                }
            }
            return(result.ToString());
        }
示例#20
0
        /// <summary>
        /// Iterates to the next script run, returning true if one exists.
        /// </summary>
        /// <returns>true if there is another script run, false otherwise.</returns>
        public bool Next()
        {
            if (scriptLimit >= limit)
            {
                return(false);
            }

            scriptCode  = UScript.Common;
            scriptStart = scriptLimit;

            while (index < limit)
            {
                int ch = UTF16.CharAt(text, start, limit, index - start);
                int sc = GetScript(ch);

                /*
                 * From UTR #24: Implementations that determine the boundaries between
                 * characters of given scripts should never break between a non-spacing
                 * mark and its base character. Thus for boundary determinations and
                 * similar sorts of processing, a non-spacing mark — whatever its script
                 * value — should inherit the script value of its base character.
                 */
                if (IsSameScript(scriptCode, sc) ||
                    UChar.GetUnicodeCategory(ch) == UUnicodeCategory.NonSpacingMark)
                {
                    index += UTF16.GetCharCount(ch);

                    /*
                     * Inherited or Common becomes the script code of the surrounding text.
                     */
                    if (scriptCode <= UScript.Inherited && sc > UScript.Inherited)
                    {
                        scriptCode = sc;
                    }
                }
                else
                {
                    break;
                }
            }

            scriptLimit = index;
            return(true);
        }
示例#21
0
        /// <summary>
        /// Get the value associated with a pair of surrogates.
        /// </summary>
        /// <param name="lead">A lead surrogate.</param>
        /// <param name="trail">A trail surrogate.</param>
        public int GetSurrogateValue(char lead, char trail)
        {
            if (!UTF16.IsLeadSurrogate(lead) || !UTF16.IsTrailSurrogate(trail))
            {
                throw new ArgumentException(
                          "Argument characters do not form a supplementary character");
            }
            // get fold position for the next trail surrogate
            int offset = GetSurrogateOffset(lead, trail);

            // get the real data from the folded lead/trail units
            if (offset > 0)
            {
                return(m_data_[offset]);
            }

            // return m_initialValue_ if there is an error
            return(m_initialValue_);
        }
示例#22
0
        public static String ReplaceAll(String source, UnicodeSet set, String replacement)
        {
            StringBuffer results = new StringBuffer();
            int          cp;

            for (int i = 0; i < source.Length; i += UTF16.GetCharCount(cp))
            {
                cp = UTF16.CharAt(source, i);
                if (set.Contains(cp))
                {
                    results.Append(replacement);
                }
                else
                {
                    UTF16.Append(results, cp);
                }
            }
            return(results.ToString());
        }
示例#23
0
        internal static bool MayHaveLccc(int c)
        {
            // Handles all of Unicode 0..10FFFF.
            // c can be negative, e.g., Collation.SENTINEL_CP.
            // U+0300 is the first character with lccc!=0.
            if (c < 0x300)
            {
                return(false);
            }
            if (c > 0xffff)
            {
                c = UTF16.GetLeadSurrogate(c);
            }
            int i;

            return
                ((i = lcccIndex[c >> 5]) != 0 &&
                 (lcccBits[i] & (1 << (c & 0x1f))) != 0);
        }
示例#24
0
        /**
         * @param rand
         * @param others
         * @return
         */
        private String GetRandomKey(Random rand)
        {
            int r = rand.Next(30);

            if (r == 0)
            {
                return(UTF16.ValueOf(r));
            }
            else if (r < 10)
            {
                return(UTF16.ValueOf('A' - 1 + r));
            }
            else if (r < 20)
            {
                return(UTF16.ValueOf(0x10FFFF - (r - 10)));
                //        } else if (r == 20) {
                //            return "";
            }
            return("a" + UTF16.ValueOf(r + 'a' - 1));
        }
示例#25
0
        /// <summary>Returns <c>true</c> if the current text represents emoji character or sequence.</summary>
        private bool IsEmoji(int current, int next)
        {
            int begin     = start + current;
            int end       = start + next;
            int codepoint = UTF16.CharAt(text, 0, end, begin);

            if (EMOJI.Contains(codepoint))
            {
                if (EMOJI_RK.Contains(codepoint))
                {
                    // if its in EmojiRK, we don't treat it as emoji unless there is evidence it forms emoji sequence,
                    // an emoji presentation selector or keycap follows.
                    int trailer = begin + Character.CharCount(codepoint);
                    return(trailer < end && (text[trailer] == 0xFE0F || text[trailer] == 0x20E3));
                }
                else
                {
                    return(true);
                }
            }
            return(false);
        }
示例#26
0
        internal String GetTestSource()
        {
            if (random == null)
            {
                random = CreateRandom(); // use test framework's random seed
            }
            String source = "";
            int    i      = 0;

            while (i < (random.Next(maxCharCount) + 1))
            {
                int codepoint = random.Next(maxCodePoint);
                //Elimate unassigned characters
                while (UCharacter.GetType(codepoint) == UCharacterCategory.OtherNotAssigned)
                {
                    codepoint = random.Next(maxCodePoint);
                }
                source = source + UTF16.ValueOf(codepoint);
                i++;
            }
            return(source);
        }
示例#27
0
 /// <summary>
 /// Checks if we are beginning at the start of a initial block.
 /// If we are then the rest of the codepoints in this initial block
 /// has the same values.
 /// We increment <see cref="m_nextCodepoint_"/> and relevant data members if so.
 /// This is used only in for the supplementary codepoints because
 /// the offset to the trail indexes could be 0.
 /// </summary>
 /// <returns>true if we are at the start of a initial block.</returns>
 private bool CheckNullNextTrailIndex()
 {
     if (m_nextIndex_ <= 0)
     {
         m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1;
         int nextLead  = UTF16.GetLeadSurrogate(m_nextCodepoint_);
         int leadBlock =
             m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
                 Trie.INDEX_STAGE_2_SHIFT_;
         if (m_trie_.m_dataManipulate_ == null)
         {
             throw new InvalidOperationException(
                       "The field DataManipulate in this Trie is null");   // ICU4N: This was originally NullPointerException
         }
         m_nextIndex_ = m_trie_.m_dataManipulate_.GetFoldingOffset(
             m_trie_[leadBlock +
                     (nextLead & Trie.INDEX_STAGE_3_MASK_)]);
         m_nextIndex_--;
         m_nextBlockIndex_ = DATA_BLOCK_LENGTH_;
         return(true);
     }
     return(false);
 }
示例#28
0
        /// <summary>
        /// Out-of-line portion of the in-line <see cref="Next32(CharacterIterator)"/> code.
        /// The call site does an initial ci.Next() and calls this function
        /// if the 16 bit value it gets is >= <see cref="UTF16.LeadSurrogateMinValue"/>.
        /// </summary>
        // NOTE:  we leave the underlying char iterator positioned in the
        //        middle of a surrogate pair.  ci.next() will work correctly
        //        from there, but the ci.getIndex() will be wrong, and needs
        //        adjustment.
        public static int NextTrail32(CharacterIterator ci, int lead)
        {
            if (lead == CharacterIterator.Done && ci.Index >= ci.EndIndex)
            {
                return(Done32);
            }
            int retVal = lead;

            if (lead <= UTF16.LeadSurrogateMaxValue)
            {
                char cTrail = ci.Next();
                if (UTF16.IsTrailSurrogate(cTrail))
                {
                    retVal = ((lead - UTF16.LeadSurrogateMinValue) << 10) +
                             (cTrail - UTF16.TrailSurrogateMinValue) +
                             UTF16.SupplementaryMinValue;
                }
                else
                {
                    ci.Previous();
                }
            }
            return(retVal);
        }
示例#29
0
        /// <summary>
        /// Out-of-line portion of the in-line <see cref="Next32(CharacterIterator)"/> code.
        /// The call site does an initial ci.Next() and calls this function
        /// if the 16 bit value it gets is >= <see cref="UTF16.LEAD_SURROGATE_MIN_VALUE"/>.
        /// </summary>
        // NOTE:  we leave the underlying char iterator positioned in the
        //        middle of a surrogate pair.  ci.next() will work correctly
        //        from there, but the ci.getIndex() will be wrong, and needs
        //        adjustment.
        public static int NextTrail32(CharacterIterator ci, int lead)
        {
            if (lead == CharacterIterator.DONE && ci.Index >= ci.EndIndex)
            {
                return(DONE32);
            }
            int retVal = lead;

            if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE)
            {
                char cTrail = ci.Next();
                if (UTF16.IsTrailSurrogate(cTrail))
                {
                    retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
                             (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
                             UTF16.SUPPLEMENTARY_MIN_VALUE;
                }
                else
                {
                    ci.Previous();
                }
            }
            return(retVal);
        }
示例#30
0
        private void _testTrieRanges(SetRange[] setRanges, int countSetRanges,
                                     CheckRange[] checkRanges, int countCheckRanges,
                                     bool latin1Linear)
        {
            Int32TrieBuilder newTrie = new Int32TrieBuilder(null, 2000,
                                                            checkRanges[0].Value,
                                                            checkRanges[0].Value,
                                                            latin1Linear);

            // set values from setRanges[]
            bool ok = true;

            for (int i = 0; i < countSetRanges; ++i)
            {
                int  start     = setRanges[i].start;
                int  limit     = setRanges[i].limit;
                int  value     = setRanges[i].value;
                bool overwrite = setRanges[i].overwrite;
                if ((limit - start) == 1 && overwrite)
                {
                    ok &= newTrie.SetValue(start, value);
                }
                else
                {
                    ok &= newTrie.SetRange(start, limit, value, overwrite);
                }
            }
            if (!ok)
            {
                Errln("setting values into a trie failed");
                return;
            }

            {
                // verify that all these values are in the new Trie
                int start = 0;
                for (int i = 0; i < countCheckRanges; ++i)
                {
                    int limit = checkRanges[i].Limit;
                    int value = checkRanges[i].Value;

                    while (start < limit)
                    {
                        if (value != newTrie.GetValue(start))
                        {
                            Errln("newTrie [U+"
                                  + (start).ToHexString() + "]==0x"
                                  + (newTrie.GetValue(start).ToHexString())
                                  + " instead of 0x" + (value).ToHexString());
                        }
                        ++start;
                    }
                }

                Int32Trie trie = newTrie.Serialize(new _testFoldedValue(newTrie),
                                                   new _testFoldingOffset());

                // test linear Latin-1 range from utrie_getData()
                if (latin1Linear)
                {
                    start = 0;
                    for (int i = 0; i < countCheckRanges && start <= 0xff; ++i)
                    {
                        int limit = checkRanges[i].Limit;
                        int value = checkRanges[i].Value;

                        while (start < limit && start <= 0xff)
                        {
                            if (value != trie.GetLatin1LinearValue((char)start))
                            {
                                Errln("IntTrie.getLatin1LinearValue[U+"
                                      + (start).ToHexString() + "]==0x"
                                      + (
                                          trie.GetLatin1LinearValue((char)start).ToHexString())
                                      + " instead of 0x" + (value).ToHexString());
                            }
                            ++start;
                        }
                    }
                }

                if (latin1Linear != trie.IsLatin1Linear)
                {
                    Errln("trie serialization did not preserve "
                          + "Latin-1-linearity");
                }

                // verify that all these values are in the serialized Trie
                start = 0;
                for (int i = 0; i < countCheckRanges; ++i)
                {
                    int limit = checkRanges[i].Limit;
                    int value = checkRanges[i].Value;

                    if (start == 0xd800)
                    {
                        // skip surrogates
                        start = limit;
                        continue;
                    }

                    while (start < limit)
                    {
                        if (start <= 0xffff)
                        {
                            int value2 = trie.GetBMPValue((char)start);
                            if (value != value2)
                            {
                                Errln("serialized trie.getBMPValue(U+"
                                      + (start).ToHexString() + " == 0x"
                                      + (value2).ToHexString() + " instead of 0x"
                                      + (value).ToHexString());
                            }
                            if (!UTF16.IsLeadSurrogate((char)start))
                            {
                                value2 = trie.GetLeadValue((char)start);
                                if (value != value2)
                                {
                                    Errln("serialized trie.getLeadValue(U+"
                                          + (start).ToHexString() + " == 0x"
                                          + (value2).ToHexString() + " instead of 0x"
                                          + (value).ToHexString());
                                }
                            }
                        }

                        {
                            int value2 = trie.GetCodePointValue(start);
                            if (value != value2)
                            {
                                Errln("serialized trie.getCodePointValue(U+"
                                      + (start).ToHexString() + ")==0x"
                                      + (value2).ToHexString() + " instead of 0x"
                                      + (value).ToHexString());
                            }
                            ++start;
                        }
                    }
                }


                // enumerate and verify all ranges

                int            enumRanges = 1;
                TrieEnumerator iter       = new _testEnumValue(trie);
                while (iter.MoveNext())
                {
                    RangeValueEnumeratorElement result = iter.Current;
                    if (result.Start != checkRanges[enumRanges - 1].Limit ||
                        result.Limit != checkRanges[enumRanges].Limit ||
                        (result.Value ^ 0x5555) != checkRanges[enumRanges].Value)
                    {
                        Errln("utrie_enum() delivers wrong range [U+"
                              + (result.Start).ToHexString() + "..U+"
                              + (result.Limit).ToHexString() + "].0x"
                              + (result.Value ^ 0x5555).ToHexString()
                              + " instead of [U+"
                              + (checkRanges[enumRanges - 1].Limit).ToHexString()
                              + "..U+"
                              + (checkRanges[enumRanges].Limit).ToHexString()
                              + "].0x"
                              + (checkRanges[enumRanges].Value).ToHexString());
                    }
                    enumRanges++;
                }

                // test linear Latin-1 range
                if (trie.IsLatin1Linear)
                {
                    for (start = 0; start < 0x100; ++start)
                    {
                        if (trie.GetLatin1LinearValue((char)start)
                            != trie.GetLeadValue((char)start))
                        {
                            Errln("trie.getLatin1LinearValue[U+"
                                  + (start).ToHexString() + "]=0x"
                                  + (
                                      trie.GetLatin1LinearValue((char)start).ToHexString())
                                  + " instead of 0x"
                                  + (
                                      trie.GetLeadValue((char)start)).ToHexString());
                        }
                    }
                }

                _testTrieIteration(trie, checkRanges, countCheckRanges);
            }
        }