Example #1
0
        public void Test54TruncatingIteratorFromLinearMatchLong()
        {
            StringAndValue[] data =
            {
                new StringAndValue("abcdef",   10),
                new StringAndValue("abcdepq", 200),
                new StringAndValue("abcdeyz", 3000)
            };
            CharsTrie trie = buildTrie(data, data.Length, TrieBuilderOption.Fast);

            // Go into a linear-match node.
            trie.Next('a');
            trie.Next('b');
            trie.Next('c');
            // Truncate after the linear-match node.
            CharsTrieEnumerator iter = trie.GetEnumerator(3);

            StringAndValue[] expected =
            {
                new StringAndValue("def", 10),
                new StringAndValue("dep", -1),
                new StringAndValue("dey", -1)
            };
            checkIterator(iter, expected);
            // Reset, and we should get the same result.
            Logln("after iter.Reset()");
            checkIterator(iter.Reset(), expected);
        }
Example #2
0
        public void Test59IteratorFromChars()
        {
            StringAndValue[] data =
            {
                new StringAndValue("mm",     3),
                new StringAndValue("mmm",   33),
                new StringAndValue("mmnop", 333)
            };
            builder_.Clear();
            foreach (StringAndValue item in data)
            {
                builder_.Add(item.s, item.value);
            }
            ICharSequence trieChars = builder_.BuildCharSequence(TrieBuilderOption.Fast);

            checkIterator(CharsTrie.GetEnumerator(trieChars, 0, 0), data);
        }
Example #3
0
        public void Test50IteratorFromBranch()
        {
            CharsTrie trie = buildMonthsTrie(TrieBuilderOption.Fast);

            // Go to a branch node.
            trie.Next('j');
            trie.Next('a');
            trie.Next('n');
            CharsTrieEnumerator iter = trie.GetEnumerator();

            // Expected data: Same as in buildMonthsTrie(), except only the suffixes
            // following "jan".
            StringAndValue[] data =
            {
                new StringAndValue("",                             1),
                new StringAndValue(".",                            1),
                new StringAndValue("a",                            1),
                new StringAndValue("bb",                           1),
                new StringAndValue("c",                            1),
                new StringAndValue("ddd",                          1),
                new StringAndValue("ee",                           1),
                new StringAndValue("ef",                           1),
                new StringAndValue("f",                            1),
                new StringAndValue("gg",                           1),
                new StringAndValue("h",                            1),
                new StringAndValue("iiii",                         1),
                new StringAndValue("j",                            1),
                new StringAndValue("kk",                           1),
                new StringAndValue("kl",                           1),
                new StringAndValue("kmm",                          1),
                new StringAndValue("l",                            1),
                new StringAndValue("m",                            1),
                new StringAndValue("nnnnnnnnnnnnnnnnnnnnnnnnnnnn", 1),
                new StringAndValue("o",                            1),
                new StringAndValue("pp",                           1),
                new StringAndValue("qqq",                          1),
                new StringAndValue("r",                            1),
                new StringAndValue("uar",                          1),
                new StringAndValue("uary", 1)
            };
            checkIterator(iter, data);
            // Reset, and we should get the same result.
            Logln("after iter.Reset()");
            checkIterator(iter.Reset(), data);
        }
Example #4
0
        public void Test52TruncatingIteratorFromRoot()
        {
            CharsTrie           trie = buildMonthsTrie(TrieBuilderOption.Fast);
            CharsTrieEnumerator iter = trie.GetEnumerator(4);

            // Expected data: Same as in buildMonthsTrie(), except only the first 4 characters
            // of each string, and no string duplicates from the truncation.
            StringAndValue[] data =
            {
                new StringAndValue("augu", -1),
                new StringAndValue("jan",   1),
                new StringAndValue("jan.",  1),
                new StringAndValue("jana",  1),
                new StringAndValue("janb", -1),
                new StringAndValue("janc",  1),
                new StringAndValue("jand", -1),
                new StringAndValue("jane", -1),
                new StringAndValue("janf",  1),
                new StringAndValue("jang", -1),
                new StringAndValue("janh",  1),
                new StringAndValue("jani", -1),
                new StringAndValue("janj",  1),
                new StringAndValue("jank", -1),
                new StringAndValue("janl",  1),
                new StringAndValue("janm",  1),
                new StringAndValue("jann", -1),
                new StringAndValue("jano",  1),
                new StringAndValue("janp", -1),
                new StringAndValue("janq", -1),
                new StringAndValue("janr",  1),
                new StringAndValue("janu", -1),
                new StringAndValue("july",  7),
                new StringAndValue("jun",   6),
                new StringAndValue("jun.",  6),
                new StringAndValue("june", 6)
            };
            checkIterator(iter, data);
            // Reset, and we should get the same result.
            Logln("after iter.Reset()");
            checkIterator(iter.Reset(), data);
        }
Example #5
0
        public void Test51IteratorFromLinearMatch()
        {
            CharsTrie trie = buildMonthsTrie(StringTrieBuilder.Option.SMALL);

            // Go into a linear-match node.
            trie.Next('j');
            trie.Next('a');
            trie.Next('n');
            trie.Next('u');
            trie.Next('a');
            CharsTrie.Enumerator iter = trie.GetEnumerator();
            // Expected data: Same as in buildMonthsTrie(), except only the suffixes
            // following "janua".
            StringAndValue[] data =
            {
                new StringAndValue("r",  1),
                new StringAndValue("ry", 1)
            };
            checkIterator(iter, data);
            // Reset, and we should get the same result.
            Logln("after iter.Reset()");
            checkIterator(iter.Reset(), data);
        }
        private bool GetCEsFromContractionCE32(CollationData data, int ce32)
        {
            int trieIndex = Collation.IndexFromCE32(ce32);

            ce32 = data.GetCE32FromContexts(trieIndex);  // Default if no suffix match.
                                                         // Since the original ce32 is not a prefix mapping,
                                                         // the default ce32 must not be another contraction.
            Debug.Assert(!Collation.IsContractionCE32(ce32));
            int contractionIndex = contractionCEs.Count;

            if (GetCEsFromCE32(data, Collation.SentinelCodePoint, ce32))
            {
                AddContractionEntry(CollationFastLatin.CONTR_CHAR_MASK, ce0, ce1);
            }
            else
            {
                // Bail out for c-without-contraction.
                AddContractionEntry(CollationFastLatin.CONTR_CHAR_MASK, Collation.NoCE, 0);
            }
            // Handle an encodable contraction unless the next contraction is too long
            // and starts with the same character.
            int  prevX          = -1;
            bool addContraction = false;

            using (CharsTrieEnumerator suffixes = CharsTrie.GetEnumerator(data.contexts, trieIndex + 2, 0))
            {
                while (suffixes.MoveNext())
                {
                    CharsTrieEntry entry  = suffixes.Current;
                    ICharSequence  suffix = entry.Chars;
                    int            x      = CollationFastLatin.GetCharIndex(suffix[0]);
                    if (x < 0)
                    {
                        continue;
                    }                         // ignore anything but fast Latin text
                    if (x == prevX)
                    {
                        if (addContraction)
                        {
                            // Bail out for all contractions starting with this character.
                            AddContractionEntry(x, Collation.NoCE, 0);
                            addContraction = false;
                        }
                        continue;
                    }
                    if (addContraction)
                    {
                        AddContractionEntry(prevX, ce0, ce1);
                    }
                    ce32 = entry.Value;
                    if (suffix.Length == 1 && GetCEsFromCE32(data, Collation.SentinelCodePoint, ce32))
                    {
                        addContraction = true;
                    }
                    else
                    {
                        AddContractionEntry(x, Collation.NoCE, 0);
                        addContraction = false;
                    }
                    prevX = x;
                }
            }
            if (addContraction)
            {
                AddContractionEntry(prevX, ce0, ce1);
            }
            // Note: There might not be any fast Latin contractions, but
            // we need to enter contraction handling anyway so that we can bail out
            // when there is a non-fast-Latin character following.
            // For example: Danish &Y<<u+umlaut, when we compare Y vs. u\u0308 we need to see the
            // following umlaut and bail out, rather than return the difference of Y vs. u.
            ce0 = (Collation.NO_CE_PRIMARY << 32) | CONTRACTION_FLAG | (uint)contractionIndex;
            ce1 = 0;
            return(true);
        }
Example #7
0
 private void checkIterator(CharsTrie trie, StringAndValue[] data, int dataLength)
 {
     checkIterator(trie.GetEnumerator(), data, dataLength);
 }