public override void SetUp() { base.SetUp(); // we generate aweful regexps: good for testing. // but for preflex codec, the test can be very slow, so use less iterations. numIterations = Codec.Default.Name.Equals("Lucene3x", StringComparison.Ordinal) ? 10 * RandomMultiplier : AtLeast(50); dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000))); Document doc = new Document(); Field field = NewStringField("field", "", Field.Store.YES); doc.Add(field); terms = new JCG.SortedSet <BytesRef>(); int num = AtLeast(200); for (int i = 0; i < num; i++) { string s = TestUtil.RandomUnicodeString(Random); field.SetStringValue(s); terms.Add(new BytesRef(s)); writer.AddDocument(doc); } termsAutomaton = BasicAutomata.MakeStringUnion(terms); reader = writer.GetReader(); searcher = NewSearcher(reader); writer.Dispose(); }
/// <summary> /// Tests that a query matches the an expected set of documents using Hits. /// /// <para>Note that when using the Hits API, documents will only be returned /// if they have a positive normalized score. /// </para> /// </summary> /// <param name="luceneTestCase"> The current test instance. </param> /// <param name="query"> the query to test </param> /// <param name="searcher"> the searcher to test the query against </param> /// <param name="defaultFieldName"> used for displaing the query in assertion messages </param> /// <param name="results"> a list of documentIds that must match the query </param> /// <seealso cref="CheckHitCollector(LuceneTestCase, Random, Query, string, IndexSearcher, int[])"/> // LUCENENET specific // Removes dependency on <see cref="LuceneTestCase.ClassEnv.Similarity"/> public static void DoCheckHits(LuceneTestCase luceneTestCase, Random random, Query query, string defaultFieldName, IndexSearcher searcher, int[] results) #endif { ScoreDoc[] hits = searcher.Search(query, 1000).ScoreDocs; JCG.SortedSet <int> correct = new JCG.SortedSet <int>(); for (int i = 0; i < results.Length; i++) { correct.Add(Convert.ToInt32(results[i], CultureInfo.InvariantCulture)); } JCG.SortedSet <int> actual = new JCG.SortedSet <int>(); for (int i = 0; i < hits.Length; i++) { actual.Add(Convert.ToInt32(hits[i].Doc, CultureInfo.InvariantCulture)); } Assert.AreEqual(correct, actual, aggressive: false, () => query.ToString(defaultFieldName)); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION luceneTestCase, #endif random, query, searcher, LuceneTestCase.Rarely(random)); }
public static ISet <Type> LoadTypesSubclassing(Type baseClass, params Assembly[] assemblies) { if (baseClass is null) { throw new ArgumentNullException(nameof(baseClass)); } if (assemblies is null) { throw new ArgumentNullException(nameof(assemblies)); } var result = new JCG.SortedSet <Type>(Comparer <Type> .Create((left, right) => left.FullName.CompareToOrdinal(right.FullName))); foreach (var assembly in assemblies) { foreach (var type in assembly.GetTypes()) { if (baseClass.IsAssignableFrom(type)) { result.Add(type); } } } return(result); }
/// <summary> /// Tests that a query matches the an expected set of documents using a /// HitCollector. /// <para> /// Note that when using the HitCollector API, documents will be collected /// if they "match" regardless of what their score is. /// </para> /// </summary> /// <param name="luceneTestCase"> The current test instance. </param> /// <param name="query"> The query to test. </param> /// <param name="searcher"> The searcher to test the query against. </param> /// <param name="defaultFieldName"> Used for displaying the query in assertion messages. </param> /// <param name="results"> A list of documentIds that must match the query. </param> /// <seealso cref="DoCheckHits(LuceneTestCase, Random, Query, string, IndexSearcher, int[])"/> // LUCENENET specific // Removes dependency on <see cref="LuceneTestCase.ClassEnv.Similarity"/> public static void CheckHitCollector(LuceneTestCase luceneTestCase, Random random, Query query, string defaultFieldName, IndexSearcher searcher, int[] results) #endif { QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION luceneTestCase, #endif random, query, searcher); Trace.TraceInformation("Checked"); JCG.SortedSet <int> correct = new JCG.SortedSet <int>(); for (int i = 0; i < results.Length; i++) { correct.Add(Convert.ToInt32(results[i], CultureInfo.InvariantCulture)); } JCG.SortedSet <int> actual = new JCG.SortedSet <int>(); ICollector c = new SetCollector(actual); searcher.Search(query, c); Assert.AreEqual(correct, actual, aggressive: false, () => "Simple: " + query.ToString(defaultFieldName)); for (int i = -1; i < 2; i++) { actual.Clear(); IndexSearcher s = QueryUtils.WrapUnderlyingReader( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION luceneTestCase, #endif random, searcher, i); s.Search(query, c); Assert.AreEqual(correct, actual, aggressive: false, () => "Wrap Reader " + i + ": " + query.ToString(defaultFieldName)); } }
internal UnicodeLocaleExtension(JCG.SortedSet <string> attributes, JCG.SortedDictionary <string, string> keywords) : this() { if (attributes != null && attributes.Count > 0) { _attributes = attributes; } if (keywords != null && keywords.Count > 0) { _keywords = keywords; } if (_attributes.Count > 0 || _keywords.Count > 0) { StringBuilder sb = new StringBuilder(); foreach (string attribute in _attributes) { sb.Append(LanguageTag.Separator).Append(attribute); } foreach (var keyword in _keywords) { string key = keyword.Key; string value = keyword.Value; sb.Append(LanguageTag.Separator).Append(key); if (value.Length > 0) { sb.Append(LanguageTag.Separator).Append(value); } } m_value = sb.ToString(1, sb.Length - 1); // skip leading '-' } }
public SpanWeight(SpanQuery query, IndexSearcher searcher) { this.m_similarity = searcher.Similarity; this.m_query = query; m_termContexts = new Dictionary <Term, TermContext>(); ISet <Term> terms = new JCG.SortedSet <Term>(); query.ExtractTerms(terms); IndexReaderContext context = searcher.TopReaderContext; TermStatistics[] termStats = new TermStatistics[terms.Count]; int i = 0; foreach (Term term in terms) { TermContext state = TermContext.Build(context, term); termStats[i] = searcher.TermStatistics(term, state); m_termContexts[term] = state; i++; } string field = query.Field; if (field != null) { m_stats = m_similarity.ComputeWeight(query.Boost, searcher.CollectionStatistics(query.Field), termStats); } }
public virtual void TestRandom() { JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>(); int nterms = AtLeast(10000); for (int i = 0; i < nterms; i++) { Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random)); terms.Add(term); } PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); foreach (Term @ref in terms) { b.Add(@ref); } PrefixCodedTerms pb = b.Finish(); IEnumerator <Term> expected = terms.GetEnumerator(); foreach (Term t in pb) { Assert.IsTrue(expected.MoveNext()); Assert.AreEqual(expected.Current, t); } Assert.IsFalse(expected.MoveNext()); }
private void GetPayloads(ICollection <byte[]> payloads, SpanQuery query) { IDictionary <Term, TermContext> termContexts = new Dictionary <Term, TermContext>(); var terms = new JCG.SortedSet <Term>(); query.ExtractTerms(terms); foreach (Term term in terms) { termContexts[term] = TermContext.Build(context, term); } foreach (AtomicReaderContext atomicReaderContext in context.Leaves) { Spans spans = query.GetSpans(atomicReaderContext, atomicReaderContext.AtomicReader.LiveDocs, termContexts); while (spans.MoveNext() == true) { if (spans.IsPayloadAvailable) { var payload = spans.GetPayload(); foreach (var bytes in payload) { payloads.Add(bytes); } } } } }
public GroupedFacetResult(int size, int minCount, bool orderByCount, int totalCount, int totalMissingCount) { this.facetEntries = new JCG.SortedSet <FacetEntry>(orderByCount ? orderByCountAndValue : orderByValue); this.totalMissingCount = totalMissingCount; this.totalCount = totalCount; maxSize = size; currentMin = minCount; }
/// <summary> /// Expert: specify a custom maximum possible symbol /// (alphaMax); default is <see cref="Character.MaxCodePoint"/>. /// </summary> public LevenshteinAutomata(int[] word, int alphaMax, bool withTranspositions) { this.word = word; this.alphaMax = alphaMax; // calculate the alphabet ISet <int> set = new JCG.SortedSet <int>(); for (int i = 0; i < word.Length; i++) { int v = word[i]; if (v > alphaMax) { throw new ArgumentException("alphaMax exceeded by symbol " + v + " in word"); } set.Add(v); } alphabet = new int[set.Count]; using (IEnumerator <int> iterator = set.GetEnumerator()) { for (int i = 0; i < alphabet.Length; i++) { iterator.MoveNext(); alphabet[i] = iterator.Current; } } rangeLower = new int[alphabet.Length + 2]; rangeUpper = new int[alphabet.Length + 2]; // calculate the unicode range intervals that exclude the alphabet // these are the ranges for all unicode characters not in the alphabet int lower = 0; for (int i = 0; i < alphabet.Length; i++) { int higher = alphabet[i]; if (higher > lower) { rangeLower[numRanges] = lower; rangeUpper[numRanges] = higher - 1; numRanges++; } lower = higher + 1; } /* add the final endpoint */ if (lower <= alphaMax) { rangeLower[numRanges] = lower; rangeUpper[numRanges] = alphaMax; numRanges++; } descriptions = new ParametricDescription[] { null, withTranspositions ? (ParametricDescription) new Lev1TParametricDescription(word.Length) : new Lev1ParametricDescription(word.Length), withTranspositions ? (ParametricDescription) new Lev2TParametricDescription(word.Length) : new Lev2ParametricDescription(word.Length) }; }
internal int[] fDtran; // Transitions out of this state. // indexed by input character // contents is int index of dest state // in RBBITableBuilder.fDStates internal RBBIStateDescriptor(int maxInputSymbol) { fTagVals = new JCG.SortedSet <int>(); fPositions = new JCG.HashSet <RBBINode>(); fDtran = new int[maxInputSymbol + 1]; // fDtran needs to be pre-sized. // It is indexed by input symbols, and will // hold the next state number for each // symbol. }
//----------------------------------------------------------------------------- // // mergeRuleStatusVals // // Allocate positions in the global array of rule status {tag} values // // The RBBI runtime uses an array of {sets of status values} that can // be returned for boundaries. Each accepting state that has non-zero // status includes an index into this array. The format of the array // is // Num of status values in group 1 // status val // status val // ... // Num of status vals in group 2 // status val // status val // ... // etc. // // //----------------------------------------------------------------------------- internal virtual void MergeRuleStatusVals() { // // The basic outline of what happens here is this... // // for each state in this state table // if the status tag list for this state is in the global statuses list // record where and // continue with the next state // else // add the tag list for this state to the global list. // int n; // Pre-load a single tag of {0} into the table. // We will need this as a default, for rule sets with no explicit tagging, // or with explicit tagging of {0}. if (fRB.fRuleStatusVals.Count == 0) { fRB.fRuleStatusVals.Add(1); // Num of statuses in group fRB.fRuleStatusVals.Add(0); // and our single status of zero ISet <int> s0 = new JCG.SortedSet <int>(); int izero = 0; fRB.fStatusSets[s0] = izero; ISet <int> s1 = new JCG.SortedSet <int>(); s1.Add(izero); fRB.fStatusSets[s0] = izero; } // For each state, check whether the state's status tag values are // already entered into the status values array, and add them if not. for (n = 0; n < fDStates.Count; n++) { RBBIStateDescriptor sd = fDStates[n]; ISet <int> statusVals = sd.fTagVals; int?arrayIndexI = fRB.fStatusSets.Get(statusVals); if (arrayIndexI == null) { // This is the first encounter of this set of status values. // Add them to the statusSets map, This map associates // the set of status values with an index in the runtime status // values array. arrayIndexI = fRB.fRuleStatusVals.Count; fRB.fStatusSets[statusVals] = arrayIndexI; // Add the new set of status values to the vector of values that // will eventually become the array used by the runtime engine. fRB.fRuleStatusVals.Add(statusVals.Count); fRB.fRuleStatusVals.AddRange(statusVals); } // Save the runtime array index back into the state descriptor. sd.fTagsIdx = arrayIndexI.Value; // ICU4N NOTE: At this pint the value cannot be null } }
/// <summary> /// Creates an unbounded TopNSearcher </summary> /// <param name="fst"> the <see cref="Lucene.Net.Util.Fst.FST{T}"/> to search on </param> /// <param name="topN"> the number of top scoring entries to retrieve </param> /// <param name="maxQueueDepth"> the maximum size of the queue of possible top entries </param> /// <param name="comparer"> the comparer to select the top N </param> public TopNSearcher(FST <T> fst, int topN, int maxQueueDepth, IComparer <T> comparer) { this.fst = fst; this.bytesReader = fst.GetBytesReader(); this.topN = topN; this.maxQueueDepth = maxQueueDepth; this.comparer = comparer; queue = new JCG.SortedSet <FSTPath <T> >(new TieBreakByInputComparer <T>(comparer)); }
private void BuildSortedSet() { var comparer = new BuildSortedSetComparer(this); m_orderedGroups = new JCG.SortedSet <CollectedSearchGroup <TGroupValue> >(comparer); m_orderedGroups.UnionWith(groupMap.Values); Debug.Assert(m_orderedGroups.Count > 0); foreach (FieldComparer fc in comparers) { fc.SetBottom(m_orderedGroups.Last().ComparerSlot); } }
private void AddFixedDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, int length) { field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_FIXED_DEREF.ToString()); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_DEREF_VERSION_CURRENT); // deduplicate JCG.SortedSet <BytesRef> dictionary = new JCG.SortedSet <BytesRef>(); foreach (BytesRef v in values) { dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v)); } /* values */ data.WriteInt32(length); foreach (BytesRef v in dictionary) { data.WriteBytes(v.Bytes, v.Offset, v.Length); } /* ordinals */ int valueCount = dictionary.Count; if (Debugging.AssertsEnabled) { Debugging.Assert(valueCount > 0); } index.WriteInt32(valueCount); int maxDoc = state.SegmentInfo.DocCount; PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT); BytesRef brefDummy; foreach (BytesRef v in values) { brefDummy = v; if (v == null) { brefDummy = new BytesRef(); } //int ord = dictionary.HeadSet(brefDummy).Size(); int ord = dictionary.Count(@ref => @ref.CompareTo(brefDummy) < 0); w.Add(ord); } w.Finish(); }
/** * @param me * @param stayWithMe */ private void checkEquals(UnicodeMap <Integer> me, JCG.SortedDictionary <String, Integer> stayWithMe) { temp.Clear(); foreach (var e in me.EntrySet()) { temp.Add(e); } ISet <KeyValuePair <String, Integer> > entrySet = new JCG.HashSet <KeyValuePair <string, Integer> >(stayWithMe); if (!entrySet.SetEquals(temp)) { Logln(me.EntrySet().ToString()); Logln(me.ToString()); assertEquals("are in parallel", entrySet, temp); // we failed. Reset and start again entrySet.Clear(); temp.Clear(); return; } // ICU4N: looping through pairs instead of doing explicity table lookups is much faster foreach (var pair in stayWithMe) { assertEquals("containsKey", stayWithMe.ContainsKey(pair.Key), me.ContainsKey(pair.Key)); Integer value = pair.Value; assertEquals("get", value, me.Get(pair.Key)); assertEquals("containsValue", stayWithMe.ContainsValue(value), me.ContainsValue(value)); int cp = UnicodeSet.GetSingleCodePoint(pair.Key); if (cp != int.MaxValue) { assertEquals("get", value, me.Get(cp)); } } // ICU4N TODO: complete implementation //ISet<String> nonCodePointStrings = stayWithMe.tailMap("").keySet(); //if (nonCodePointStrings.Count == 0) nonCodePointStrings = null; // for parallel api //assertEquals("getNonRangeStrings", nonCodePointStrings, me.GetNonRangeStrings()); ISet <Integer> values = new JCG.SortedSet <Integer>(stayWithMe.Values); ISet <Integer> myValues = new JCG.SortedSet <Integer>(me.Values()); assertEquals("values", myValues, values); foreach (String key in stayWithMe.Keys) { assertEquals("containsKey", stayWithMe.ContainsKey(key), me.ContainsKey(key)); } }
private void getEntries(String title, ICollection <KeyValuePair <Integer, String> > m1entries, ICollection <KeyValuePair <Integer, String> > m2entries, StringBuilder buffer, int limit) { ISet <KeyValuePair <Integer, String> > m1_m2 = new JCG.SortedSet <KeyValuePair <Integer, String> >(ENTRY_COMPARATOR); m1_m2.UnionWith(m1entries); m1_m2.ExceptWith(m2entries); buffer.Append(title + ": " + m1_m2.Count + "\r\n"); foreach (var entry in m1_m2) { if (limit-- < 0) { return; } buffer.Append(entry.Key).Append(" => ") .Append(entry.Value).Append("\r\n"); } }
public static Spans Wrap(IndexReaderContext topLevelReaderContext, SpanQuery query) { IDictionary <Term, TermContext> termContexts = new Dictionary <Term, TermContext>(); JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>(); query.ExtractTerms(terms); foreach (Term term in terms) { termContexts[term] = TermContext.Build(topLevelReaderContext, term); } IList <AtomicReaderContext> leaves = topLevelReaderContext.Leaves; if (leaves.Count == 1) { AtomicReaderContext ctx = leaves[0]; return(query.GetSpans(ctx, ((AtomicReader)ctx.Reader).LiveDocs, termContexts)); } return(new MultiSpansWrapper(leaves, query, termContexts)); }
public static bool VerifySetsIdentical(AbstractTestLog here, ISet <T> values1, ISet <T> values2) { if (SetEqualityComparer <T> .Aggressive.Equals(values1, values2)) { return(true); } ISet <T> temp; TestFmwk.Errln("Values differ:"); TestFmwk.Errln("UnicodeMap - HashMap"); temp = new JCG.SortedSet <T>(values1, JCG.Comparer <T> .Default); temp.ExceptWith(values2); TestFmwk.Errln(Show(temp)); TestFmwk.Errln("HashMap - UnicodeMap"); temp = new JCG.SortedSet <T>(values2, JCG.Comparer <T> .Default); temp.ExceptWith(values1); TestFmwk.Errln(Show(temp)); return(false); }
[Timeout(120000)] // ICU4N: This test can take awhile because of the slowness of adding items to SortedSet public void TestUnicodeMapGeneralCategory() { Logln("Setting General Category"); UnicodeMap <String> map1 = new UnicodeMap <string>(); IDictionary <Integer, String> map2 = new JCG.Dictionary <Integer, String>(); //Map<Integer, String> map3 = new TreeMap<Integer, String>(); map1 = new UnicodeMap <String>(); map2 = new JCG.SortedDictionary <Integer, String>(); for (int cp = 0; cp <= SET_LIMIT; ++cp) { int enumValue = UChar.GetIntPropertyValue(cp, propEnum); //if (enumValue <= 0) continue; // for smaller set String value = UChar.GetPropertyValueName(propEnum, enumValue, NameChoice.Long); map1.Put(cp, value); map2[new Integer(cp)] = value; } checkNext(map1, map2, int.MaxValue); Logln("Comparing General Category"); check(map1, map2, -1); Logln("Comparing Values"); ISet <String> values1 = new JCG.SortedSet <String>(StringComparer.Ordinal); map1.GetAvailableValues(values1); ISet <String> values2 = new JCG.SortedSet <String>(map2.Values.Distinct(), StringComparer.Ordinal); // ICU4N NOTE: Added Distinct() if (!TestBoilerplate <string> .VerifySetsIdentical(this, values1, values2)) { throw new ArgumentException("Halting"); } Logln("Comparing Sets"); foreach (string value in values1) { Logln(value == null ? "null" : value); UnicodeSet set1 = map1.KeySet(value); UnicodeSet set2 = TestBoilerplate <string> .GetSet(map2, value); if (!TestBoilerplate <string> .VerifySetsIdentical(this, set1, set2)) { throw new ArgumentException("Halting"); } } }
public virtual void TestIntersect() { for (int i = 0; i < numIterations; i++) { string reg = AutomatonTestUtil.RandomRegexp(Random); Automaton automaton = (new RegExp(reg, RegExpSyntax.NONE)).ToAutomaton(); CompiledAutomaton ca = new CompiledAutomaton(automaton, SpecialOperations.IsFinite(automaton), false); TermsEnum te = MultiFields.GetTerms(reader, "field").Intersect(ca, null); Automaton expected = BasicOperations.Intersection(termsAutomaton, automaton); JCG.SortedSet <BytesRef> found = new JCG.SortedSet <BytesRef>(); while (te.Next() != null) { found.Add(BytesRef.DeepCopyOf(te.Term)); } Automaton actual = BasicAutomata.MakeStringUnion(found); Assert.IsTrue(BasicOperations.SameLanguage(expected, actual)); } }
private void AddVarDerefBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values) { field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_DEREF.ToString()); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_DEREF_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_DEREF_VERSION_CURRENT); // deduplicate JCG.SortedSet <BytesRef> dictionary = new JCG.SortedSet <BytesRef>(); foreach (BytesRef v in values) { dictionary.Add(v == null ? new BytesRef() : BytesRef.DeepCopyOf(v)); } /* values */ long startPosition = data.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream long currentAddress = 0; Dictionary <BytesRef, long> valueToAddress = new Dictionary <BytesRef, long>(); foreach (BytesRef v in dictionary) { currentAddress = data.Position - startPosition; // LUCENENET specific: Renamed from getFilePointer() to match FileStream valueToAddress[v] = currentAddress; WriteVInt16(data, v.Length); data.WriteBytes(v.Bytes, v.Offset, v.Length); } /* ordinals */ long totalBytes = data.Position - startPosition; // LUCENENET specific: Renamed from getFilePointer() to match FileStream index.WriteInt64(totalBytes); int maxDoc = state.SegmentInfo.DocCount; PackedInt32s.Writer w = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(currentAddress), PackedInt32s.DEFAULT); foreach (BytesRef v in values) { w.Add(valueToAddress[v ?? new BytesRef()]); } w.Finish(); }
/// <summary> /// Helper function to create an SortedSet fulfilling the given specific parameters. The function will /// create an SortedSet using the Comparer constructor and then add values /// to it until it is full. It will begin by adding the desired number of matching, /// followed by random (deterministic) elements until the desired count is reached. /// </summary> protected IEnumerable <T> CreateSortedSet(IEnumerable <T> enumerableToMatchTo, int count, int numberOfMatchingElements) { JCG.SortedSet <T> set = new JCG.SortedSet <T>(GetIComparer()); int seed = 528; JCG.List <T> match = null; // Add Matching elements if (enumerableToMatchTo != null) { match = enumerableToMatchTo.ToList(); for (int i = 0; i < numberOfMatchingElements; i++) { set.Add(match[i]); } } // Add elements to reach the desired count while (set.Count < count) { T toAdd = CreateT(seed++); while (set.Contains(toAdd) || (match != null && match.Contains(toAdd, GetIEqualityComparer()))) // Don't want any unexpectedly duplicate values { toAdd = CreateT(seed++); } set.Add(toAdd); } // Validate that the Enumerable fits the guidelines as expected Debug.Assert(set.Count == count); if (match != null) { int actualMatchingCount = 0; foreach (T lookingFor in match) { actualMatchingCount += set.Contains(lookingFor) ? 1 : 0; } Assert.Equal(numberOfMatchingElements, actualMatchingCount); } return(set); }
public virtual void TestMergeRandom() { PrefixCodedTerms[] pb = new PrefixCodedTerms[TestUtil.NextInt32(Random, 2, 10)]; JCG.SortedSet <Term> superSet = new JCG.SortedSet <Term>(); for (int i = 0; i < pb.Length; i++) { JCG.SortedSet <Term> terms = new JCG.SortedSet <Term>(); int nterms = TestUtil.NextInt32(Random, 0, 10000); for (int j = 0; j < nterms; j++) { Term term = new Term(TestUtil.RandomUnicodeString(Random, 2), TestUtil.RandomUnicodeString(Random, 4)); terms.Add(term); } superSet.UnionWith(terms); PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder(); foreach (Term @ref in terms) { b.Add(@ref); } pb[i] = b.Finish(); } List <IEnumerator <Term> > subs = new List <IEnumerator <Term> >(); for (int i = 0; i < pb.Length; i++) { subs.Add(pb[i].GetEnumerator()); } IEnumerator <Term> expected = superSet.GetEnumerator(); IEnumerator <Term> actual = new MergedEnumerator <Term>(subs.ToArray()); while (actual.MoveNext()) { Assert.IsTrue(expected.MoveNext()); Assert.AreEqual(expected.Current, actual.Current); } Assert.IsFalse(expected.MoveNext()); }
/// <summary> /// Returns a sorted array containing unique field numbers. </summary> private int[] FlushFieldNums() { JCG.SortedSet <int> fieldNums = new JCG.SortedSet <int>(); foreach (DocData dd in pendingDocs) { foreach (FieldData fd in dd.fields) { fieldNums.Add(fd.fieldNum); } } int numDistinctFields = fieldNums.Count; if (Debugging.AssertsEnabled) { Debugging.Assert(numDistinctFields > 0); } int bitsRequired = PackedInt32s.BitsRequired(fieldNums.Max); int token = (Math.Min(numDistinctFields - 1, 0x07) << 5) | bitsRequired; vectorsStream.WriteByte((byte)token); if (numDistinctFields - 1 >= 0x07) { vectorsStream.WriteVInt32(numDistinctFields - 1 - 0x07); } PackedInt32s.Writer writer = PackedInt32s.GetWriterNoHeader(vectorsStream, PackedInt32s.Format.PACKED, fieldNums.Count, bitsRequired, 1); foreach (int fieldNum in fieldNums) { writer.Add(fieldNum); } writer.Finish(); int[] fns = new int[fieldNums.Count]; int i = 0; foreach (int key in fieldNums) { fns[i++] = key; } return(fns); }
public virtual void TestSort() { BytesRef @ref = new BytesRef(); int num = AtLeast(2); for (int j = 0; j < num; j++) { // LUCENENET specific - to ensure sorting strings works the same in the SortedSet, // we need to use StringComparer.Ordinal, which compares strings the same // way they are done in Java. JCG.SortedSet <string> strings = new JCG.SortedSet <string>(StringComparer.Ordinal); for (int k = 0; k < 797; k++) { string str; do { str = TestUtil.RandomRealisticUnicodeString(Random, 1000); } while (str.Length == 0); @ref.CopyChars(str); Hash.Add(@ref); strings.Add(str); } // We use the UTF-16 comparer here, because we need to be able to // compare to native String.CompareTo() [UTF-16]: #pragma warning disable 612, 618 int[] sort = Hash.Sort(BytesRef.UTF8SortedAsUTF16Comparer); #pragma warning restore 612, 618 Assert.IsTrue(strings.Count < sort.Length); int i = 0; BytesRef scratch = new BytesRef(); foreach (string @string in strings) { @ref.CopyChars(@string); Assert.AreEqual(@ref, Hash.Get(sort[i++], scratch)); } Hash.Clear(); Assert.AreEqual(0, Hash.Count); Hash.Reinit(); } }
/// <summary> /// Applies the final rules to convert from a language-specific phonetic representation to a /// language-independent representation. /// </summary> /// <param name="phonemeBuilder">The current phonemes.</param> /// <param name="finalRules">The final rules to apply.</param> /// <returns>The resulting phonemes.</returns> private PhonemeBuilder ApplyFinalRules(PhonemeBuilder phonemeBuilder, IDictionary <string, IList <Rule> > finalRules) { if (finalRules == null) { throw new ArgumentNullException("finalRules can not be null"); } if (finalRules.Count == 0) { return(phonemeBuilder); } ISet <Phoneme> phonemes = new JCG.SortedSet <Phoneme>(Phoneme.COMPARER); foreach (Phoneme phoneme in phonemeBuilder.Phonemes) { PhonemeBuilder subBuilder = PhonemeBuilder.Empty(phoneme.Languages); string phonemeText = phoneme.GetPhonemeText(); for (int i = 0; i < phonemeText.Length;) { RulesApplication rulesApplication = new RulesApplication(finalRules, phonemeText, subBuilder, i, maxPhonemes).Invoke(); bool found = rulesApplication.IsFound; subBuilder = rulesApplication.PhonemeBuilder; if (!found) { // not found, appending as-is subBuilder.Append(phonemeText.Substring(i, 1)); } i = rulesApplication.I; } phonemes.UnionWith(subBuilder.Phonemes); } return(new PhonemeBuilder(phonemes.ToList())); }
/// <summary> /// Tests that a query matches the an expected set of documents using a /// HitCollector. /// <para> /// Note that when using the HitCollector API, documents will be collected /// if they "match" regardless of what their score is. /// </para> /// </summary> /// <param name="query"> The query to test. </param> /// <param name="searcher"> The searcher to test the query against. </param> /// <param name="defaultFieldName"> Used for displaying the query in assertion messages. </param> /// <param name="results"> A list of documentIds that must match the query. </param> /// <seealso cref="DoCheckHits(Random, Query, string, IndexSearcher, int[])"/> public static void CheckHitCollector(Random random, Query query, string defaultFieldName, IndexSearcher searcher, int[] results) { QueryUtils.Check(random, query, searcher); Trace.TraceInformation("Checked"); JCG.SortedSet <int> correct = new JCG.SortedSet <int>(results); JCG.SortedSet <int> actual = new JCG.SortedSet <int>(); ICollector c = new SetCollector(actual); searcher.Search(query, c); Assert.AreEqual(correct, actual, aggressive: false, () => "Simple: " + query.ToString(defaultFieldName)); for (int i = -1; i < 2; i++) { actual.Clear(); IndexSearcher s = QueryUtils.WrapUnderlyingReader(random, searcher, i); s.Search(query, c); Assert.AreEqual(correct, actual, aggressive: false, () => "Wrap Reader " + i + ": " + query.ToString(defaultFieldName)); } }
private void assertPOSToken(TokenStream ts, String term, params String[] tags) { ts.IncrementToken(); assertEquals(term, ts.GetAttribute <ICharTermAttribute>().ToString()); SCG.ISet <String> actual = new JCG.SortedSet <String>(StringComparer.Ordinal); SCG.ISet <String> expected = new JCG.SortedSet <String>(StringComparer.Ordinal); foreach (StringBuilder b in ts.GetAttribute <IMorphosyntacticTagsAttribute>().Tags) { actual.add(b.toString()); } foreach (String s in tags) { expected.add(s); } if (!expected.equals(actual)) { Console.WriteLine("Expected:\n" + expected); Console.WriteLine("Actual:\n" + actual); assertEquals(expected, actual); } }
/// <summary> /// Tests that all documents up to maxDoc which are *not* in the /// expected result set, have an explanation which indicates that /// the document does not match /// </summary> public static void CheckNoMatchExplanations(Query q, string defaultFieldName, IndexSearcher searcher, int[] results) { string d = q.ToString(defaultFieldName); JCG.SortedSet <int?> ignore = new JCG.SortedSet <int?>(); for (int i = 0; i < results.Length; i++) { ignore.Add(Convert.ToInt32(results[i], CultureInfo.InvariantCulture)); } int maxDoc = searcher.IndexReader.MaxDoc; for (int doc = 0; doc < maxDoc; doc++) { if (ignore.Contains(Convert.ToInt32(doc, CultureInfo.InvariantCulture))) { continue; } Explanation exp = searcher.Explain(q, doc); Assert.IsNotNull(exp, "Explanation of [[" + d + "]] for #" + doc + " is null"); Assert.IsFalse(exp.IsMatch, "Explanation of [[" + d + "]] for #" + doc + " doesn't indicate non-match: " + exp.ToString()); } }