public virtual void TestRandom() { JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>(); int nterms = AtLeast(10000); for (int i = 0; i < nterms; i++) { Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random)); terms.Add(term); } PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); foreach (Term @ref in terms) { b.Add(@ref); } PrefixCodedTerms pb = b.Finish(); IEnumerator <Term> expected = terms.GetEnumerator(); foreach (Term t in pb) { Assert.IsTrue(expected.MoveNext()); Assert.AreEqual(expected.Current, t); } Assert.IsFalse(expected.MoveNext()); }
/// <summary> /// Expert: specify a custom maximum possible symbol /// (alphaMax); default is <see cref="Character.MaxCodePoint"/>. /// </summary> public LevenshteinAutomata(int[] word, int alphaMax, bool withTranspositions) { this.word = word; this.alphaMax = alphaMax; // calculate the alphabet ISet <int> set = new JCG.SortedSet <int>(); for (int i = 0; i < word.Length; i++) { int v = word[i]; if (v > alphaMax) { throw new ArgumentException("alphaMax exceeded by symbol " + v + " in word"); } set.Add(v); } alphabet = new int[set.Count]; using (IEnumerator <int> iterator = set.GetEnumerator()) { for (int i = 0; i < alphabet.Length; i++) { iterator.MoveNext(); alphabet[i] = iterator.Current; } } rangeLower = new int[alphabet.Length + 2]; rangeUpper = new int[alphabet.Length + 2]; // calculate the unicode range intervals that exclude the alphabet // these are the ranges for all unicode characters not in the alphabet int lower = 0; for (int i = 0; i < alphabet.Length; i++) { int higher = alphabet[i]; if (higher > lower) { rangeLower[numRanges] = lower; rangeUpper[numRanges] = higher - 1; numRanges++; } lower = higher + 1; } /* add the final endpoint */ if (lower <= alphaMax) { rangeLower[numRanges] = lower; rangeUpper[numRanges] = alphaMax; numRanges++; } descriptions = new ParametricDescription[] { null, withTranspositions ? (ParametricDescription) new Lev1TParametricDescription(word.Length) : new Lev1ParametricDescription(word.Length), withTranspositions ? (ParametricDescription) new Lev2TParametricDescription(word.Length) : new Lev2ParametricDescription(word.Length) }; }
public virtual void TestMergeRandom() { PrefixCodedTerms[] pb = new PrefixCodedTerms[TestUtil.NextInt32(Random, 2, 10)]; JCG.SortedSet <Term> superSet = new JCG.SortedSet <Term>(); for (int i = 0; i < pb.Length; i++) { JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>(); int nterms = TestUtil.NextInt32(Random, 0, 10000); for (int j = 0; j < nterms; j++) { Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random, 4)); terms.Add(term); } superSet.UnionWith(terms); PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); foreach (Term @ref in terms) { b.Add(@ref); } pb[i] = b.Finish(); } List <IEnumerator <Term> > subs = new List <IEnumerator <Term> >(); for (int i = 0; i < pb.Length; i++) { subs.Add(pb[i].GetEnumerator()); } IEnumerator <Term> expected = superSet.GetEnumerator(); IEnumerator <Term> actual = new MergedEnumerator <Term>(subs.ToArray()); while (actual.MoveNext()) { Assert.IsTrue(expected.MoveNext()); Assert.AreEqual(expected.Current, actual.Current); } Assert.IsFalse(expected.MoveNext()); }