public virtual void TestRandom()
        {
            JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>();
            int nterms = AtLeast(10000);

            for (int i = 0; i < nterms; i++)
            {
                Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random));
                terms.Add(term);
            }

            PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
            foreach (Term @ref in terms)
            {
                b.Add(@ref);
            }
            PrefixCodedTerms pb = b.Finish();

            IEnumerator <Term> expected = terms.GetEnumerator();

            foreach (Term t in pb)
            {
                Assert.IsTrue(expected.MoveNext());
                Assert.AreEqual(expected.Current, t);
            }
            Assert.IsFalse(expected.MoveNext());
        }
示例#2
0
        /// <summary>
        /// Expert: specify a custom maximum possible symbol
        /// (alphaMax); default is <see cref="Character.MaxCodePoint"/>.
        /// </summary>
        public LevenshteinAutomata(int[] word, int alphaMax, bool withTranspositions)
        {
            this.word     = word;
            this.alphaMax = alphaMax;

            // calculate the alphabet
            ISet <int> set = new JCG.SortedSet <int>();

            for (int i = 0; i < word.Length; i++)
            {
                int v = word[i];
                if (v > alphaMax)
                {
                    throw new ArgumentException("alphaMax exceeded by symbol " + v + " in word");
                }
                set.Add(v);
            }
            alphabet = new int[set.Count];
            using (IEnumerator <int> iterator = set.GetEnumerator())
            {
                for (int i = 0; i < alphabet.Length; i++)
                {
                    iterator.MoveNext();
                    alphabet[i] = iterator.Current;
                }
            }

            rangeLower = new int[alphabet.Length + 2];
            rangeUpper = new int[alphabet.Length + 2];
            // calculate the unicode range intervals that exclude the alphabet
            // these are the ranges for all unicode characters not in the alphabet
            int lower = 0;

            for (int i = 0; i < alphabet.Length; i++)
            {
                int higher = alphabet[i];
                if (higher > lower)
                {
                    rangeLower[numRanges] = lower;
                    rangeUpper[numRanges] = higher - 1;
                    numRanges++;
                }
                lower = higher + 1;
            }
            /* add the final endpoint */
            if (lower <= alphaMax)
            {
                rangeLower[numRanges] = lower;
                rangeUpper[numRanges] = alphaMax;
                numRanges++;
            }

            descriptions = new ParametricDescription[] {
                null,
                withTranspositions ? (ParametricDescription) new Lev1TParametricDescription(word.Length) : new Lev1ParametricDescription(word.Length),
                withTranspositions ? (ParametricDescription) new Lev2TParametricDescription(word.Length) : new Lev2ParametricDescription(word.Length)
            };
        }
        public virtual void TestMergeRandom()
        {
            PrefixCodedTerms[]   pb       = new PrefixCodedTerms[TestUtil.NextInt32(Random, 2, 10)];
            JCG.SortedSet <Term> superSet = new JCG.SortedSet <Term>();

            for (int i = 0; i < pb.Length; i++)
            {
                JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>();
                int nterms = TestUtil.NextInt32(Random, 0, 10000);
                for (int j = 0; j < nterms; j++)
                {
                    Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random, 4));
                    terms.Add(term);
                }
                superSet.UnionWith(terms);

                PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
                foreach (Term @ref in terms)
                {
                    b.Add(@ref);
                }
                pb[i] = b.Finish();
            }

            List <IEnumerator <Term> > subs = new List <IEnumerator <Term> >();

            for (int i = 0; i < pb.Length; i++)
            {
                subs.Add(pb[i].GetEnumerator());
            }

            IEnumerator <Term> expected = superSet.GetEnumerator();
            IEnumerator <Term> actual   = new MergedEnumerator <Term>(subs.ToArray());

            while (actual.MoveNext())
            {
                Assert.IsTrue(expected.MoveNext());
                Assert.AreEqual(expected.Current, actual.Current);
            }
            Assert.IsFalse(expected.MoveNext());
        }