Exemple #1
0
        public void Test54TruncatingIteratorFromLinearMatchLong()
        {
            StringAndValue[] data =
            {
                new StringAndValue("abcdef",   10),
                new StringAndValue("abcdepq", 200),
                new StringAndValue("abcdeyz", 3000)
            };
            CharsTrie trie = buildTrie(data, data.Length, StringTrieBuilder.Option.FAST);

            // Go into a linear-match node.
            trie.Next('a');
            trie.Next('b');
            trie.Next('c');
            // Truncate after the linear-match node.
            CharsTrie.Enumerator iter     = trie.GetEnumerator(3);
            StringAndValue[]     expected =
            {
                new StringAndValue("def", 10),
                new StringAndValue("dep", -1),
                new StringAndValue("dey", -1)
            };
            checkIterator(iter, expected);
            // Reset, and we should get the same result.
            Logln("after iter.Reset()");
            checkIterator(iter.Reset(), expected);
        }
Exemple #2
0
        public void Test50IteratorFromBranch()
        {
            CharsTrie trie = buildMonthsTrie(StringTrieBuilder.Option.FAST);

            // Go to a branch node.
            trie.Next('j');
            trie.Next('a');
            trie.Next('n');
            CharsTrie.Enumerator iter = trie.GetEnumerator();
            // Expected data: Same as in buildMonthsTrie(), except only the suffixes
            // following "jan".
            StringAndValue[] data =
            {
                new StringAndValue("",                             1),
                new StringAndValue(".",                            1),
                new StringAndValue("a",                            1),
                new StringAndValue("bb",                           1),
                new StringAndValue("c",                            1),
                new StringAndValue("ddd",                          1),
                new StringAndValue("ee",                           1),
                new StringAndValue("ef",                           1),
                new StringAndValue("f",                            1),
                new StringAndValue("gg",                           1),
                new StringAndValue("h",                            1),
                new StringAndValue("iiii",                         1),
                new StringAndValue("j",                            1),
                new StringAndValue("kk",                           1),
                new StringAndValue("kl",                           1),
                new StringAndValue("kmm",                          1),
                new StringAndValue("l",                            1),
                new StringAndValue("m",                            1),
                new StringAndValue("nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1),
                new StringAndValue("o",                            1),
                new StringAndValue("pp",                           1),
                new StringAndValue("qqq",                          1),
                new StringAndValue("r",                            1),
                new StringAndValue("uar",                          1),
                new StringAndValue("uary", 1)
            };
            checkIterator(iter, data);
            // Reset, and we should get the same result.
            Logln("after iter.Reset()");
            checkIterator(iter.Reset(), data);
        }
Exemple #3
0
        public void Test52TruncatingIteratorFromRoot()
        {
            CharsTrie trie = buildMonthsTrie(StringTrieBuilder.Option.FAST);

            CharsTrie.Enumerator iter = trie.GetEnumerator(4);
            // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
            // of each string, and no string duplicates from the truncation.
            StringAndValue[] data =
            {
                new StringAndValue("augu", -1),
                new StringAndValue("jan",   1),
                new StringAndValue("jan.",  1),
                new StringAndValue("jana",  1),
                new StringAndValue("janb", -1),
                new StringAndValue("janc",  1),
                new StringAndValue("jand", -1),
                new StringAndValue("jane", -1),
                new StringAndValue("janf",  1),
                new StringAndValue("jang", -1),
                new StringAndValue("janh",  1),
                new StringAndValue("jani", -1),
                new StringAndValue("janj",  1),
                new StringAndValue("jank", -1),
                new StringAndValue("janl",  1),
                new StringAndValue("janm",  1),
                new StringAndValue("jann", -1),
                new StringAndValue("jano",  1),
                new StringAndValue("janp", -1),
                new StringAndValue("janq", -1),
                new StringAndValue("janr",  1),
                new StringAndValue("janu", -1),
                new StringAndValue("july",  7),
                new StringAndValue("jun",   6),
                new StringAndValue("jun.",  6),
                new StringAndValue("june", 6)
            };
            checkIterator(iter, data);
            // Reset, and we should get the same result.
            Logln("after iter.Reset()");
            checkIterator(iter.Reset(), data);
        }
Exemple #4
0
 private void checkIterator(CharsTrie.Enumerator iter, StringAndValue[] data, int dataLength)
 {
     for (int i = 0; i < dataLength; ++i)
     {
         if (!iter.MoveNext())
         {
             Errln("trie iterator hasNext()=false for item " + i + ": " + data[i].s);
             break;
         }
         CharsTrie.Entry entry          = iter.Current;
         String          expectedString = data[i].s;
         if (!expectedString.ContentEquals(entry.Chars))
         {
             Errln(String.Format("trie iterator next().getString()={0} but expected {1} for item {2}",
                                 entry.Chars, data[i].s, i));
         }
         if (entry.Value != data[i].value)
         {
             Errln(String.Format("trie iterator next().getValue()={0}=0x{1:x} but expected {2}=0x{3:x} for item {4}: {5}",
                                 entry.Value, entry.Value,
                                 data[i].value, data[i].value,
                                 i, data[i].s));
         }
     }
     if (iter.MoveNext())
     {
         Errln("trie iterator hasNext()=true after all items");
     }
     //try // ICU4N specific - not applicable in .NET
     //{
     //    iter.Next();
     //    Errln("trie iterator next() did not throw NoSuchElementException after all items");
     //}
     //catch (NoSuchElementException e)
     //{
     //    // good
     //}
 }
Exemple #5
0
        public void Test51IteratorFromLinearMatch()
        {
            CharsTrie trie = buildMonthsTrie(StringTrieBuilder.Option.SMALL);

            // Go into a linear-match node.
            trie.Next('j');
            trie.Next('a');
            trie.Next('n');
            trie.Next('u');
            trie.Next('a');
            CharsTrie.Enumerator iter = trie.GetEnumerator();
            // Expected data: Same as in buildMonthsTrie(), except only the suffixes
            // following "janua".
            StringAndValue[] data =
            {
                new StringAndValue("r",  1),
                new StringAndValue("ry", 1)
            };
            checkIterator(iter, data);
            // Reset, and we should get the same result.
            Logln("after iter.Reset()");
            checkIterator(iter.Reset(), data);
        }
Exemple #6
0
        private bool GetCEsFromContractionCE32(CollationData data, int ce32)
        {
            int trieIndex = Collation.IndexFromCE32(ce32);

            ce32 = data.GetCE32FromContexts(trieIndex);  // Default if no suffix match.
                                                         // Since the original ce32 is not a prefix mapping,
                                                         // the default ce32 must not be another contraction.
            Debug.Assert(!Collation.IsContractionCE32(ce32));
            int contractionIndex = contractionCEs.Count;

            if (GetCEsFromCE32(data, Collation.SENTINEL_CP, ce32))
            {
                AddContractionEntry(CollationFastLatin.CONTR_CHAR_MASK, ce0, ce1);
            }
            else
            {
                // Bail out for c-without-contraction.
                AddContractionEntry(CollationFastLatin.CONTR_CHAR_MASK, Collation.NO_CE, 0);
            }
            // Handle an encodable contraction unless the next contraction is too long
            // and starts with the same character.
            int  prevX          = -1;
            bool addContraction = false;

            using (CharsTrie.Enumerator suffixes = CharsTrie.GetEnumerator(data.contexts, trieIndex + 2, 0))
            {
                while (suffixes.MoveNext())
                {
                    CharsTrie.Entry entry  = suffixes.Current;
                    ICharSequence   suffix = entry.Chars;
                    int             x      = CollationFastLatin.GetCharIndex(suffix[0]);
                    if (x < 0)
                    {
                        continue;
                    }                         // ignore anything but fast Latin text
                    if (x == prevX)
                    {
                        if (addContraction)
                        {
                            // Bail out for all contractions starting with this character.
                            AddContractionEntry(x, Collation.NO_CE, 0);
                            addContraction = false;
                        }
                        continue;
                    }
                    if (addContraction)
                    {
                        AddContractionEntry(prevX, ce0, ce1);
                    }
                    ce32 = entry.Value;
                    if (suffix.Length == 1 && GetCEsFromCE32(data, Collation.SENTINEL_CP, ce32))
                    {
                        addContraction = true;
                    }
                    else
                    {
                        AddContractionEntry(x, Collation.NO_CE, 0);
                        addContraction = false;
                    }
                    prevX = x;
                }
            }
            if (addContraction)
            {
                AddContractionEntry(prevX, ce0, ce1);
            }
            // Note: There might not be any fast Latin contractions, but
            // we need to enter contraction handling anyway so that we can bail out
            // when there is a non-fast-Latin character following.
            // For example: Danish &Y<<u+umlaut, when we compare Y vs. u\u0308 we need to see the
            // following umlaut and bail out, rather than return the difference of Y vs. u.
            ce0 = (Collation.NO_CE_PRIMARY << 32) | CONTRACTION_FLAG | contractionIndex;
            ce1 = 0;
            return(true);
        }
Exemple #7
0
 private void checkIterator(CharsTrie.Enumerator iter, StringAndValue[] data)
 {
     checkIterator(iter, data, data.Length);
 }