public override MergeSpecification FindMerges(MergeTrigger?mergeTrigger, SegmentInfos segmentInfos) { MergeSpecification mergeSpec = null; //System.out.println("MRMP: findMerges sis=" + segmentInfos); int numSegments = segmentInfos.Size(); IList <SegmentCommitInfo> segments = new List <SegmentCommitInfo>(); ICollection <SegmentCommitInfo> merging = Writer.Get().MergingSegments; foreach (SegmentCommitInfo sipc in segmentInfos.Segments) { if (!merging.Contains(sipc)) { segments.Add(sipc); } } numSegments = segments.Count; if (numSegments > 1 && (numSegments > 30 || Random.Next(5) == 3)) { segments = CollectionsHelper.Shuffle(segments); // TODO: sometimes make more than 1 merge? mergeSpec = new MergeSpecification(); int segsToMerge = TestUtil.NextInt(Random, 1, numSegments); mergeSpec.Add(new OneMerge(segments.SubList(0, segsToMerge))); } return(mergeSpec); }
public void TestHashCodeAndEquals() { int num = AtLeast(100); bool singleField = Random().NextBoolean(); IList <Term> terms = new List <Term>(); var uniqueTerms = new HashSet <Term>(); for (int i = 0; i < num; i++) { string field = "field" + (singleField ? "1" : Random().Next(100).ToString()); string @string = TestUtil.RandomRealisticUnicodeString(Random()); terms.Add(new Term(field, @string)); uniqueTerms.Add(new Term(field, @string)); TermsFilter left = TermsFilter(singleField && Random().NextBoolean(), uniqueTerms); CollectionsHelper.Shuffle(terms); TermsFilter right = TermsFilter(singleField && Random().NextBoolean(), terms); assertEquals(right, left); assertEquals(right.GetHashCode(), left.GetHashCode()); if (uniqueTerms.Count > 1) { IList <Term> asList = new List <Term>(uniqueTerms); asList.RemoveAt(0); TermsFilter notEqual = TermsFilter(singleField && Random().NextBoolean(), asList); assertFalse(left.Equals(notEqual)); assertFalse(right.Equals(notEqual)); } } }
private static IList <Term> Sample(Random random, IndexReader reader, int size) { IList <Term> sample = new List <Term>(); Fields fields = MultiFields.GetFields(reader); foreach (string field in fields) { Terms terms = fields.Terms(field); Assert.IsNotNull(terms); TermsEnum termsEnum = terms.Iterator(null); while (termsEnum.Next() != null) { if (sample.Count >= size) { int pos = random.Next(size); sample[pos] = new Term(field, termsEnum.Term()); } else { sample.Add(new Term(field, termsEnum.Term())); } } } sample = CollectionsHelper.Shuffle(sample); return(sample); }
private static IList <FacetField> RandomCategories(Random random) { // add random categories from the two dimensions, ensuring that the same // category is not added twice. int numFacetsA = random.Next(3) + 1; // 1-3 int numFacetsB = random.Next(2) + 1; // 1-2 List <FacetField> categories_a = new List <FacetField>(); categories_a.AddRange(Arrays.AsList(CATEGORIES_A)); List <FacetField> categories_b = new List <FacetField>(); categories_b.AddRange(Arrays.AsList(CATEGORIES_B)); categories_a = CollectionsHelper.Shuffle(categories_a).ToList(); categories_b = CollectionsHelper.Shuffle(categories_b).ToList(); List <FacetField> categories = new List <FacetField>(); categories.AddRange(categories_a.SubList(0, numFacetsA)); categories.AddRange(categories_b.SubList(0, numFacetsB)); // add the NO_PARENT categories categories.Add(CATEGORIES_C[Random().Next(NUM_CHILDREN_CP_C)]); categories.Add(CATEGORIES_D[Random().Next(NUM_CHILDREN_CP_D)]); return(categories); }
public virtual void TestSeeking() { for (int i = 0; i < NumIterations; i++) { string reg = AutomatonTestUtil.RandomRegexp(Random()); Automaton automaton = (new RegExp(reg, RegExp.NONE)).ToAutomaton(); TermsEnum te = MultiFields.GetTerms(Reader, "field").Iterator(null); IList <BytesRef> unsortedTerms = new List <BytesRef>(Terms); unsortedTerms = CollectionsHelper.Shuffle(unsortedTerms); foreach (BytesRef term in unsortedTerms) { if (BasicOperations.Run(automaton, term.Utf8ToString())) { // term is accepted if (Random().NextBoolean()) { // seek exact Assert.IsTrue(te.SeekExact(term)); } else { // seek ceil Assert.AreEqual(SeekStatus.FOUND, te.SeekCeil(term)); Assert.AreEqual(term, te.Term()); } } } } }
public override void Run() { try { StartingGun.Wait(); for (int i = 0; i < 20; i++) { IList <KeyValuePair <BytesRef, TopDocs> > shuffled = new List <KeyValuePair <BytesRef, TopDocs> >(Answers.EntrySet()); shuffled = CollectionsHelper.Shuffle(shuffled); foreach (KeyValuePair <BytesRef, TopDocs> ent in shuffled) { TopDocs actual = s.Search(new TermQuery(new Term("body", ent.Key)), 100); TopDocs expected = ent.Value; Assert.AreEqual(expected.TotalHits, actual.TotalHits); Assert.AreEqual(expected.ScoreDocs.Length, actual.ScoreDocs.Length, "query=" + ent.Key.Utf8ToString()); for (int hit = 0; hit < expected.ScoreDocs.Length; hit++) { Assert.AreEqual(expected.ScoreDocs[hit].Doc, actual.ScoreDocs[hit].Doc); // Floats really should be identical: Assert.IsTrue(expected.ScoreDocs[hit].Score == actual.ScoreDocs[hit].Score); } } } } catch (Exception e) { throw new Exception(e.Message, e); } }
public RandomSimilarityProvider(Random random) { PerFieldSeed = random.Next(); CoordType = random.Next(3); ShouldQueryNorm = random.NextBoolean(); KnownSims = new List <Similarity>(AllSims); KnownSims = CollectionsHelper.Shuffle(KnownSims); //Collections.shuffle(KnownSims, random); }
/** * Loads terms and frequencies from Wikipedia (cached). */ public override void SetUp() { Debug.Assert(false, "disable assertions before running benchmarks!"); IList <Input> input = ReadTop50KWiki(); input = CollectionsHelper.Shuffle(input); dictionaryInput = input.ToArray(); input = CollectionsHelper.Shuffle(input); benchmarkInput = input; }
private static void AddSome(Document doc, string[] values) { IList <string> list = Arrays.AsList(values); list = CollectionsHelper.Shuffle(list); int howMany = TestUtil.NextInt(Random(), 1, list.Count); for (int i = 0; i < howMany; i++) { doc.Add(new StringField("field", list[i], Field.Store.NO)); doc.Add(new SortedSetDocValuesField("dv", new BytesRef(list[i]))); } }
public void TestRandom() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); int num = AtLeast(100); bool singleField = Random().NextBoolean(); IList <Term> terms = new List <Term>(); for (int i = 0; i < num; i++) { string field = "field" + (singleField ? "1" : Random().Next(100).ToString()); string @string = TestUtil.RandomRealisticUnicodeString(Random()); terms.Add(new Term(field, @string)); Document doc = new Document(); doc.Add(NewStringField(field, @string, Field.Store.YES)); w.AddDocument(doc); } IndexReader reader = w.Reader; w.Dispose(); IndexSearcher searcher = NewSearcher(reader); int numQueries = AtLeast(10); for (int i = 0; i < numQueries; i++) { CollectionsHelper.Shuffle(terms); int numTerms = 1 + Random().Next(Math.Min(BooleanQuery.MaxClauseCount, terms.Count)); BooleanQuery bq = new BooleanQuery(); for (int j = 0; j < numTerms; j++) { bq.Add(new BooleanClause(new TermQuery(terms[j]), BooleanClause.Occur.SHOULD)); } TopDocs queryResult = searcher.Search(new ConstantScoreQuery(bq), reader.MaxDoc); MatchAllDocsQuery matchAll = new MatchAllDocsQuery(); TermsFilter filter = TermsFilter(singleField, terms.SubList(0, numTerms)); TopDocs filterResult = searcher.Search(matchAll, filter, reader.MaxDoc); assertEquals(filterResult.TotalHits, queryResult.TotalHits); ScoreDoc[] scoreDocs = filterResult.ScoreDocs; for (int j = 0; j < scoreDocs.Length; j++) { assertEquals(scoreDocs[j].Doc, queryResult.ScoreDocs[j].Doc); } } reader.Dispose(); dir.Dispose(); }
public void AssertLexicon(List<Automaton> a, List<string> terms) { var automata = CollectionsHelper.Shuffle(a); var lex = BasicOperations.Union(automata); lex.Determinize(); Assert.IsTrue(SpecialOperations.IsFinite(lex)); foreach (string s in terms) { Assert.IsTrue(BasicOperations.Run(lex, s)); } var lexByte = new ByteRunAutomaton(lex); foreach (string s in terms) { sbyte[] bytes = s.GetBytes(Encoding.UTF8); Assert.IsTrue(lexByte.Run(bytes, 0, bytes.Length)); } }
public RandomCodec(Random random, ISet <string> avoidCodecs) { this.PerFieldSeed = random.Next(); // TODO: make it possible to specify min/max iterms per // block via CL: int minItemsPerBlock = TestUtil.NextInt(random, 2, 100); int maxItemsPerBlock = 2 * (Math.Max(2, minItemsPerBlock - 1)) + random.Next(100); int lowFreqCutoff = TestUtil.NextInt(random, 2, 100); Add(avoidCodecs, new Lucene41PostingsFormat(minItemsPerBlock, maxItemsPerBlock), /* * new FSTPostingsFormat(), * new FSTOrdPostingsFormat(), * new FSTPulsing41PostingsFormat(1 + random.Next(20)), new FSTOrdPulsing41PostingsFormat(1 + random.Next(20)), * new DirectPostingsFormat(LuceneTestCase.Rarely(random) ? 1 : (LuceneTestCase.Rarely(random) ? int.MaxValue : maxItemsPerBlock), LuceneTestCase.Rarely(random) ? 1 : (LuceneTestCase.Rarely(random) ? int.MaxValue : lowFreqCutoff)), * new Pulsing41PostingsFormat(1 + random.Next(20), minItemsPerBlock, maxItemsPerBlock), new Pulsing41PostingsFormat(1 + random.Next(20), minItemsPerBlock, maxItemsPerBlock), * new TestBloomFilteredLucene41Postings(), new MockSepPostingsFormat(), new MockFixedIntBlockPostingsFormat(TestUtil.NextInt(random, 1, 2000)), * new MockVariableIntBlockPostingsFormat(TestUtil.NextInt(random, 1, 127)), new MockRandomPostingsFormat(random), * new NestedPulsingPostingsFormat(), new Lucene41WithOrds(), new SimpleTextPostingsFormat(), */ new AssertingPostingsFormat() /*new MemoryPostingsFormat(true, random.nextFloat()), new MemoryPostingsFormat(false, random.nextFloat())*/ ); // add pulsing again with (usually) different parameters //TODO as a PostingsFormat which wraps others, we should allow TestBloomFilteredLucene41Postings to be constructed //with a choice of concrete PostingsFormats. Maybe useful to have a generic means of marking and dealing //with such "wrapper" classes? AddDocValues(avoidCodecs, new Lucene45DocValuesFormat(), /*new DiskDocValuesFormat(), new MemoryDocValuesFormat(), new SimpleTextDocValuesFormat(),*/ new AssertingDocValuesFormat()); Formats = CollectionsHelper.Shuffle(Formats); DvFormats = CollectionsHelper.Shuffle(DvFormats); // Avoid too many open files: if (Formats.Count > 4) { Formats = Formats.SubList(0, 4); } if (DvFormats.Count > 4) { DvFormats = DvFormats.SubList(0, 4); } }
/** Creates an index for sorting. */ public void CreateIndex(Directory dir, int numDocs, Random random) { IList <int> ids = new List <int>(); for (int i = 0; i < numDocs; i++) { ids.Add(i * 10); } // shuffle them for indexing // LUCENENET NOTE: Using LINQ, so we need to reassign the variable with the result ids = CollectionsHelper.Shuffle(ids); if (VERBOSE) { Console.WriteLine("Shuffled IDs for indexing: " + Arrays.ToString(ids.ToArray())); } PositionsTokenStream positions = new PositionsTokenStream(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); conf.SetMaxBufferedDocs(4); // create some segments conf.SetSimilarity(new NormsSimilarity(conf.Similarity)); // for testing norms field using (RandomIndexWriter writer = new RandomIndexWriter(random, dir, conf)) { writer.RandomForceMerge = (false); foreach (int id in ids) { writer.AddDocument(Doc(id, positions)); } // delete some documents writer.Commit(); foreach (int id in ids) { if (random.NextDouble() < 0.2) { if (VERBOSE) { Console.WriteLine("delete doc_id " + id); } writer.DeleteDocuments(new Term(ID_FIELD, id.ToString())); } } } }
public override MergeSpecification FindForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, IDictionary <SegmentCommitInfo, bool?> segmentsToMerge) { IList <SegmentCommitInfo> eligibleSegments = new List <SegmentCommitInfo>(); foreach (SegmentCommitInfo info in segmentInfos.Segments) { if (segmentsToMerge.ContainsKey(info)) { eligibleSegments.Add(info); } } //System.out.println("MRMP: findMerges sis=" + segmentInfos + " eligible=" + eligibleSegments); MergeSpecification mergeSpec = null; if (eligibleSegments.Count > 1 || (eligibleSegments.Count == 1 && eligibleSegments[0].HasDeletions())) { mergeSpec = new MergeSpecification(); // Already shuffled having come out of a set but // shuffle again for good measure: eligibleSegments = CollectionsHelper.Shuffle(eligibleSegments); int upto = 0; while (upto < eligibleSegments.Count) { int max = Math.Min(10, eligibleSegments.Count - upto); int inc = max <= 2 ? max : TestUtil.NextInt(Random, 2, max); mergeSpec.Add(new OneMerge(eligibleSegments.SubList(upto, upto + inc))); upto += inc; } } if (mergeSpec != null) { foreach (OneMerge merge in mergeSpec.Merges) { foreach (SegmentCommitInfo info in merge.Segments) { Debug.Assert(segmentsToMerge.ContainsKey(info)); } } } return(mergeSpec); }
/// <summary> /// Makes a bunch of single-char tokens (the max freq will at most be 255). /// shuffles them around, and returns the whole list with Arrays.toString(). /// this works fine because we use lettertokenizer. /// puts the max-frequency term into expected, to be checked against the norm. /// </summary> private string AddValue() { IList <string> terms = new List <string>(); int maxCeiling = TestUtil.NextInt(Random(), 0, 255); int max = 0; for (char ch = 'a'; ch <= 'z'; ch++) { int num = TestUtil.NextInt(Random(), 0, maxCeiling); for (int i = 0; i < num; i++) { terms.Add(char.ToString(ch)); } max = Math.Max(max, num); } Expected.Add(max); terms = CollectionsHelper.Shuffle(terms); return(Arrays.ToString(terms.ToArray())); }
public virtual void TestNextVaryingNumberOfTerms() { IList <string> termsList = new List <string>(); termsList.AddRange(Arrays.AsList(CommonTerms)); termsList.AddRange(Arrays.AsList(MediumTerms)); termsList.AddRange(Arrays.AsList(RareTerms)); termsList = CollectionsHelper.Shuffle(termsList); for (int numTerms = 2; numTerms <= termsList.Count; numTerms++) { string[] terms = termsList.SubList(0, numTerms).ToArray(/*new string[0]*/); for (int minNrShouldMatch = 1; minNrShouldMatch <= terms.Length; minNrShouldMatch++) { Scorer expected = Scorer(terms, minNrShouldMatch, true); Scorer actual = Scorer(terms, minNrShouldMatch, false); AssertNext(expected, actual); } } }
private void TestSavedTerms(IndexReader r, IList <BytesRef> terms) { Console.WriteLine("TEST: run " + terms.Count + " terms on reader=" + r); IndexSearcher s = NewSearcher(r); terms = CollectionsHelper.Shuffle(terms); TermsEnum termsEnum = MultiFields.GetTerms(r, "field").Iterator(null); bool failed = false; for (int iter = 0; iter < 10 * terms.Count; iter++) { BytesRef term = terms[Random().Next(terms.Count)]; Console.WriteLine("TEST: search " + term); long t0 = Environment.TickCount; int count = s.Search(new TermQuery(new Term("field", term)), 1).TotalHits; if (count <= 0) { Console.WriteLine(" FAILED: count=" + count); failed = true; } long t1 = Environment.TickCount; Console.WriteLine(" took " + (t1 - t0) + " millis"); TermsEnum.SeekStatus result = termsEnum.SeekCeil(term); if (result != TermsEnum.SeekStatus.FOUND) { if (result == TermsEnum.SeekStatus.END) { Console.WriteLine(" FAILED: got END"); } else { Console.WriteLine(" FAILED: wrong term: got " + termsEnum.Term()); } failed = true; } } Assert.IsFalse(failed); }
internal virtual string FieldValue(int maxTF) { IList <string> shuffled = new List <string>(); StringBuilder sb = new StringBuilder(); int i = Random().Next(Terms.Length); while (i < Terms.Length) { int tf = TestUtil.NextInt(Random(), 1, maxTF); for (int j = 0; j < tf; j++) { shuffled.Add(Terms[i]); } i++; } shuffled = CollectionsHelper.Shuffle(shuffled); foreach (string term in shuffled) { sb.Append(term); sb.Append(' '); } return(sb.ToString()); }
public virtual void TestSimple() { int numNodes = TestUtil.NextInt(Random(), 1, 10); double runTimeSec = AtLeast(3); int minDocsToMakeTerms = TestUtil.NextInt(Random(), 5, 20); int maxSearcherAgeSeconds = TestUtil.NextInt(Random(), 1, 3); if (VERBOSE) { Console.WriteLine("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds); } Start(numNodes, runTimeSec, maxSearcherAgeSeconds); List <PreviousSearchState> priorSearches = new List <PreviousSearchState>(); List <BytesRef> terms = null; while (TimeHelper.NanoTime() < EndTimeNanos) { bool doFollowon = priorSearches.Count > 0 && Random().Next(7) == 1; // Pick a random node; we will run the query on this node: int myNodeID = Random().Next(numNodes); NodeState.ShardIndexSearcher localShardSearcher; PreviousSearchState prevSearchState; if (doFollowon) { // Pretend user issued a followon query: prevSearchState = priorSearches[Random().Next(priorSearches.Count)]; if (VERBOSE) { Console.WriteLine("\nTEST: follow-on query age=" + ((TimeHelper.NanoTime() - prevSearchState.SearchTimeNanos) / 1000000000.0)); } try { localShardSearcher = Nodes[myNodeID].Acquire(prevSearchState.Versions); } catch (SearcherExpiredException see) { // Expected, sometimes; in a "real" app we would // either forward this error to the user ("too // much time has passed; please re-run your // search") or sneakily just switch to newest // searcher w/o telling them... if (VERBOSE) { Console.WriteLine(" searcher expired during local shard searcher init: " + see); } priorSearches.Remove(prevSearchState); continue; } } else { if (VERBOSE) { Console.WriteLine("\nTEST: fresh query"); } // Do fresh query: localShardSearcher = Nodes[myNodeID].Acquire(); prevSearchState = null; } IndexReader[] subs = new IndexReader[numNodes]; PreviousSearchState searchState = null; try { // Mock: now make a single reader (MultiReader) from all node // searchers. In a real shard env you can't do this... we // do it to confirm results from the shard searcher // are correct: int docCount = 0; try { for (int nodeID = 0; nodeID < numNodes; nodeID++) { long subVersion = localShardSearcher.NodeVersions[nodeID]; IndexSearcher sub = Nodes[nodeID].Searchers.Acquire(subVersion); if (sub == null) { nodeID--; while (nodeID >= 0) { subs[nodeID].DecRef(); subs[nodeID] = null; nodeID--; } throw new SearcherExpiredException("nodeID=" + nodeID + " version=" + subVersion); } subs[nodeID] = sub.IndexReader; docCount += subs[nodeID].MaxDoc; } } catch (SearcherExpiredException see) { // Expected if (VERBOSE) { Console.WriteLine(" searcher expired during mock reader init: " + see); } continue; } IndexReader mockReader = new MultiReader(subs); IndexSearcher mockSearcher = new IndexSearcher(mockReader); Query query; Sort sort; if (prevSearchState != null) { query = prevSearchState.Query; sort = prevSearchState.Sort; } else { if (terms == null && docCount > minDocsToMakeTerms) { // TODO: try to "focus" on high freq terms sometimes too // TODO: maybe also periodically reset the terms...? TermsEnum termsEnum = MultiFields.GetTerms(mockReader, "body").Iterator(null); terms = new List <BytesRef>(); while (termsEnum.Next() != null) { terms.Add(BytesRef.DeepCopyOf(termsEnum.Term())); } if (VERBOSE) { Console.WriteLine("TEST: init terms: " + terms.Count + " terms"); } if (terms.Count == 0) { terms = null; } } if (VERBOSE) { Console.WriteLine(" maxDoc=" + mockReader.MaxDoc); } if (terms != null) { if (Random().NextBoolean()) { query = new TermQuery(new Term("body", terms[Random().Next(terms.Count)])); } else { string t = terms[Random().Next(terms.Count)].Utf8ToString(); string prefix; if (t.Length <= 1) { prefix = t; } else { prefix = t.Substring(0, TestUtil.NextInt(Random(), 1, 2)); } query = new PrefixQuery(new Term("body", prefix)); } if (Random().NextBoolean()) { sort = null; } else { // TODO: sort by more than 1 field int what = Random().Next(3); if (what == 0) { sort = new Sort(SortField.FIELD_SCORE); } else if (what == 1) { // TODO: this sort doesn't merge // correctly... it's tricky because you // could have > 2.1B docs across all shards: //sort = new Sort(SortField.FIELD_DOC); sort = null; } else if (what == 2) { sort = new Sort(new SortField[] { new SortField("docid", SortField.Type_e.INT, Random().NextBoolean()) }); } else { sort = new Sort(new SortField[] { new SortField("title", SortField.Type_e.STRING, Random().NextBoolean()) }); } } } else { query = null; sort = null; } } if (query != null) { try { searchState = AssertSame(mockSearcher, localShardSearcher, query, sort, prevSearchState); } catch (SearcherExpiredException see) { // Expected; in a "real" app we would // either forward this error to the user ("too // much time has passed; please re-run your // search") or sneakily just switch to newest // searcher w/o telling them... if (VERBOSE) { Console.WriteLine(" searcher expired during search: " + see); Console.Out.Write(see.StackTrace); } // We can't do this in general: on a very slow // computer it's possible the local searcher // expires before we can finish our search: // assert prevSearchState != null; if (prevSearchState != null) { priorSearches.Remove(prevSearchState); } } } } finally { Nodes[myNodeID].Release(localShardSearcher); foreach (IndexReader sub in subs) { if (sub != null) { sub.DecRef(); } } } if (searchState != null && searchState.SearchAfterLocal != null && Random().Next(5) == 3) { priorSearches.Add(searchState); if (priorSearches.Count > 200) { priorSearches = (List <PreviousSearchState>)CollectionsHelper.Shuffle(priorSearches); priorSearches.SubList(100, priorSearches.Count).Clear(); } } } Finish(); }
public virtual void Test() { IList <string> postingsList = new List <string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field")); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random())); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1)) { // Otherwise test can take way too long (> 2 hours) numTerms /= 2; } if (VERBOSE) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i); for (int j = 0; j < i; j++) { postingsList.Add(term); } } postingsList = CollectionsHelper.Shuffle(postingsList); ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir(GetFullMethodName())); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int threadCount = TestUtil.NextInt(Random(), 1, 5); if (VERBOSE) { Console.WriteLine("config: " + iw.w.Config); Console.WriteLine("threadCount=" + threadCount); } Field prototype = NewTextField("field", "", Field.Store.NO); FieldType fieldType = new FieldType((FieldType)prototype.FieldType); if (Random().NextBoolean()) { fieldType.OmitNorms = true; } int options = Random().Next(3); if (options == 0) { fieldType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; // we dont actually need positions fieldType.StoreTermVectors = true; // but enforce term vectors when we do this so we check SOMETHING } else if (options == 1 && !DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat("field"))) { fieldType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } // else just positions ThreadClass[] threads = new ThreadClass[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { Random threadRandom = new Random(Random().Next()); Document document = new Document(); Field field = new Field("field", "", fieldType); document.Add(field); threads[threadID] = new ThreadAnonymousInnerClassHelper(this, numTerms, maxTermsPerDoc, postings, iw, startingGun, threadRandom, document, field); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.Reader; Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.Terms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: Assert.AreEqual(numTerms - 1, terms.Size()); TermsEnum termsEnum = terms.Iterator(null); BytesRef termBR; while ((termBR = termsEnum.Next()) != null) { int value = Convert.ToInt32(termBR.Utf8ToString()); Assert.AreEqual(value, termsEnum.TotalTermFreq()); // don't really need to check more than this, as CheckIndex // will verify that totalTermFreq == total number of positions seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
public virtual void TestRandomStoredFields() { Directory dir = NewDirectory(); Random rand = Random(); RandomIndexWriter w = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20))); //w.w.setNoCFSRatio(0.0); int docCount = AtLeast(200); int fieldCount = TestUtil.NextInt(rand, 1, 5); IList <int?> fieldIDs = new List <int?>(); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.Tokenized = false; Field idField = NewField("id", "", customType); for (int i = 0; i < fieldCount; i++) { fieldIDs.Add(i); } IDictionary <string, Document> docs = new Dictionary <string, Document>(); if (VERBOSE) { Console.WriteLine("TEST: build index docCount=" + docCount); } FieldType customType2 = new FieldType(); customType2.Stored = true; for (int i = 0; i < docCount; i++) { Document doc = new Document(); doc.Add(idField); string id = "" + i; idField.StringValue = id; docs[id] = doc; if (VERBOSE) { Console.WriteLine("TEST: add doc id=" + id); } foreach (int field in fieldIDs) { string s; if (rand.Next(4) != 3) { s = TestUtil.RandomUnicodeString(rand, 1000); doc.Add(NewField("f" + field, s, customType2)); } else { s = null; } } w.AddDocument(doc); if (rand.Next(50) == 17) { // mixup binding of field name -> Number every so often fieldIDs = CollectionsHelper.Shuffle(fieldIDs); } if (rand.Next(5) == 3 && i > 0) { string delID = "" + rand.Next(i); if (VERBOSE) { Console.WriteLine("TEST: delete doc id=" + delID); } w.DeleteDocuments(new Term("id", delID)); docs.Remove(delID); } } if (VERBOSE) { Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields"); } if (docs.Count > 0) { string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/); for (int x = 0; x < 2; x++) { IndexReader r = w.Reader; IndexSearcher s = NewSearcher(r); if (VERBOSE) { Console.WriteLine("TEST: cycle x=" + x + " r=" + r); } int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string testID = idsList[rand.Next(idsList.Length)]; if (VERBOSE) { Console.WriteLine("TEST: test id=" + testID); } TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1); Assert.AreEqual(1, hits.TotalHits); Document doc = r.Document(hits.ScoreDocs[0].Doc); Document docExp = docs[testID]; for (int i = 0; i < fieldCount; i++) { Assert.AreEqual("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i)); } } r.Dispose(); w.ForceMerge(1); } } w.Dispose(); dir.Dispose(); }
public virtual void Test() { IList <string> postingsList = new List <string>(); int numTerms = AtLeast(300); int maxTermsPerDoc = TestUtil.NextInt(Random(), 10, 20); bool isSimpleText = "SimpleText".Equals(TestUtil.GetPostingsFormat("field")); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, new MockAnalyzer(Random())); if ((isSimpleText || iwc.MergePolicy is MockRandomMergePolicy) && (TEST_NIGHTLY || RANDOM_MULTIPLIER > 1)) { // Otherwise test can take way too long (> 2 hours) numTerms /= 2; } if (VERBOSE) { Console.WriteLine("maxTermsPerDoc=" + maxTermsPerDoc); Console.WriteLine("numTerms=" + numTerms); } for (int i = 0; i < numTerms; i++) { string term = Convert.ToString(i); for (int j = 0; j < i; j++) { postingsList.Add(term); } } postingsList = CollectionsHelper.Shuffle(postingsList); ConcurrentQueue <string> postings = new ConcurrentQueue <string>(postingsList); Directory dir = NewFSDirectory(CreateTempDir("bagofpostings")); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int threadCount = TestUtil.NextInt(Random(), 1, 5); if (VERBOSE) { Console.WriteLine("config: " + iw.w.Config); Console.WriteLine("threadCount=" + threadCount); } ThreadClass[] threads = new ThreadClass[threadCount]; CountdownEvent startingGun = new CountdownEvent(1); for (int threadID = 0; threadID < threadCount; threadID++) { threads[threadID] = new ThreadAnonymousInnerClassHelper(this, maxTermsPerDoc, postings, iw, startingGun); threads[threadID].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } iw.ForceMerge(1); DirectoryReader ir = iw.Reader; Assert.AreEqual(1, ir.Leaves.Count); AtomicReader air = (AtomicReader)ir.Leaves[0].Reader; Terms terms = air.Terms("field"); // numTerms-1 because there cannot be a term 0 with 0 postings: Assert.AreEqual(numTerms - 1, air.Fields.UniqueTermCount); if (iwc.Codec is Lucene3xCodec == false) { Assert.AreEqual(numTerms - 1, terms.Size()); } TermsEnum termsEnum = terms.Iterator(null); BytesRef term_; while ((term_ = termsEnum.Next()) != null) { int value = Convert.ToInt32(term_.Utf8ToString()); Assert.AreEqual(value, termsEnum.DocFreq()); // don't really need to check more than this, as CheckIndex // will verify that docFreq == actual number of documents seen // from a docsAndPositionsEnum. } ir.Dispose(); iw.Dispose(); dir.Dispose(); }
private void AssertTermsSeeking(Terms leftTerms, Terms rightTerms) { TermsEnum leftEnum = null; TermsEnum rightEnum = null; // just an upper bound int numTests = AtLeast(20); Random random = Random(); // collect this number of terms from the left side HashSet <BytesRef> tests = new HashSet <BytesRef>(); int numPasses = 0; while (numPasses < 10 && tests.Count < numTests) { leftEnum = leftTerms.Iterator(leftEnum); BytesRef term = null; while ((term = leftEnum.Next()) != null) { int code = random.Next(10); if (code == 0) { // the term tests.Add(BytesRef.DeepCopyOf(term)); } else if (code == 1) { // truncated subsequence of term term = BytesRef.DeepCopyOf(term); if (term.Length > 0) { // truncate it term.Length = random.Next(term.Length); } } else if (code == 2) { // term, but ensure a non-zero offset var newbytes = new byte[term.Length + 5]; Array.Copy(term.Bytes, term.Offset, newbytes, 5, term.Length); tests.Add(new BytesRef(newbytes, 5, term.Length)); } } numPasses++; } List <BytesRef> shuffledTests = new List <BytesRef>(tests); shuffledTests = (List <BytesRef>)CollectionsHelper.Shuffle(shuffledTests); foreach (BytesRef b in shuffledTests) { leftEnum = leftTerms.Iterator(leftEnum); rightEnum = rightTerms.Iterator(rightEnum); Assert.AreEqual(leftEnum.SeekExact(b), rightEnum.SeekExact(b)); Assert.AreEqual(leftEnum.SeekExact(b), rightEnum.SeekExact(b)); SeekStatus leftStatus; SeekStatus rightStatus; leftStatus = leftEnum.SeekCeil(b); rightStatus = rightEnum.SeekCeil(b); Assert.AreEqual(leftStatus, rightStatus); if (leftStatus != SeekStatus.END) { Assert.AreEqual(leftEnum.Term(), rightEnum.Term()); } leftStatus = leftEnum.SeekCeil(b); rightStatus = rightEnum.SeekCeil(b); Assert.AreEqual(leftStatus, rightStatus); if (leftStatus != SeekStatus.END) { Assert.AreEqual(leftEnum.Term(), rightEnum.Term()); } } }