public virtual void TestAddBinaryTwice() { Analyzer analyzer = new MockAnalyzer(Random()); Directory directory = NewDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); doc.Add(new BinaryDocValuesField("dv", new BytesRef("foo!"))); doc.Add(new BinaryDocValuesField("dv", new BytesRef("bar!"))); try { iwriter.AddDocument(doc); Assert.Fail("didn't hit expected exception"); } catch (System.ArgumentException expected) { // expected } iwriter.Dispose(); directory.Dispose(); }
public virtual void TestStartPositions() { Directory dir = NewDirectory(); // mimic StopAnalyzer CharacterRunAutomaton stopSet = new CharacterRunAutomaton((new RegExp("the|a|of")).ToAutomaton()); Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, analyzer); Document doc = new Document(); doc.Add(NewTextField("field", "the quick brown fox", Field.Store.NO)); writer.AddDocument(doc); Document doc2 = new Document(); doc2.Add(NewTextField("field", "quick brown fox", Field.Store.NO)); writer.AddDocument(doc2); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); // user queries on "starts-with quick" SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1); Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits); // user queries on "starts-with the quick" SpanQuery include = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 2); sfq = new SpanNotQuery(include, sfq); Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
// TODO: create a testNormsNotPresent ourselves by adding/deleting/merging docs public virtual void BuildIndex(Directory dir) { Random random = Random(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Similarity provider = new MySimProvider(this); config.SetSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); int num = AtLeast(100); for (int i = 0; i < num; i++) { Document doc = docs.NextDoc(); int boost = Random().Next(255); Field f = new TextField(ByteTestField, "" + boost, Field.Store.YES); f.Boost = boost; doc.Add(f); writer.AddDocument(doc); doc.RemoveField(ByteTestField); if (Rarely()) { writer.Commit(); } } writer.Commit(); writer.Dispose(); docs.Dispose(); }
public virtual void TestMixedMerge() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy(2))); Document d = new Document(); // this field will have norms Field f1 = NewTextField("f1", "this field has norms", Field.Store.NO); d.Add(f1); // this field will NOT have norms FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f2 = NewField("f2", "this field has NO norms in all docs", customType); d.Add(f2); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } // now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverese d.Add(NewField("f1", "this field has norms", customType)); d.Add(NewTextField("f2", "this field has NO norms in all docs", Field.Store.NO)); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.IsTrue(fi.FieldInfo("f1").OmitsNorms(), "OmitNorms field bit should be set."); Assert.IsTrue(fi.FieldInfo("f2").OmitsNorms(), "OmitNorms field bit should be set."); reader.Dispose(); ram.Dispose(); }
public virtual void Test() { Directory d = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); MyIndexWriter w = new MyIndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); // Try to make an index that requires merging: w.Config.SetMaxBufferedDocs(TestUtil.NextInt(Random(), 2, 11)); int numStartDocs = AtLeast(20); LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues()); for (int docIDX = 0; docIDX < numStartDocs; docIDX++) { w.AddDocument(docs.NextDoc()); } MergePolicy mp = w.Config.MergePolicy; int mergeAtOnce = 1 + w.GetSegmentInfosSize_Nunit(); if (mp is TieredMergePolicy) { ((TieredMergePolicy)mp).MaxMergeAtOnce = mergeAtOnce; } else if (mp is LogMergePolicy) { ((LogMergePolicy)mp).MergeFactor = mergeAtOnce; } else { // skip test w.Dispose(); d.Dispose(); return; } AtomicBoolean doStop = new AtomicBoolean(); w.Config.SetMaxBufferedDocs(2); ThreadClass t = new ThreadAnonymousInnerClassHelper(this, w, numStartDocs, docs, doStop); t.Start(); w.ForceMerge(1); doStop.Set(true); t.Join(); Assert.IsTrue(w.MergeCount.Get() <= 1, "merge count is " + w.MergeCount.Get()); w.Dispose(); d.Dispose(); docs.Dispose(); }
public virtual void TestFloatNorms() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Similarity provider = new MySimProvider(this); config.SetSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config); LineFileDocs docs = new LineFileDocs(Random()); int num = AtLeast(100); for (int i = 0; i < num; i++) { Document doc = docs.NextDoc(); float nextFloat = (float)Random().NextDouble(); // Cast to a double to get more precision output to the string. Field f = new TextField(FloatTestField, "" + (double)nextFloat, Field.Store.YES); f.Boost = nextFloat; doc.Add(f); writer.AddDocument(doc); doc.RemoveField(FloatTestField); if (Rarely()) { writer.Commit(); } } writer.Commit(); writer.Dispose(); AtomicReader open = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir)); NumericDocValues norms = open.GetNormValues(FloatTestField); Assert.IsNotNull(norms); for (int i = 0; i < open.MaxDoc; i++) { Document document = open.Document(i); float expected = Convert.ToSingle(document.Get(FloatTestField)); Assert.AreEqual(expected, Number.IntBitsToFloat((int)norms.Get(i)), 0.0f); } open.Dispose(); dir.Dispose(); docs.Dispose(); }
public virtual void TestDemo() { Analyzer analyzer = new MockAnalyzer(random()); // Store the index in memory: Directory directory = newDirectory(); // To store an index on disk, use this instead: // Directory directory = FSDirectory.open(new File("/tmp/testindex")); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, analyzer); Document doc = new Document(); string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; string text = "this is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = newSearcher(ireader); Assert.AreEqual(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, null, 1); Assert.AreEqual(1, hits.totalHits); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc); Assert.AreEqual(text, hitDoc.get("fieldname")); } // Test simple phrase query PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.add(new Term("fieldname", "to")); phraseQuery.add(new Term("fieldname", "be")); Assert.AreEqual(1, isearcher.search(phraseQuery, null, 1).totalHits); ireader.close(); directory.close(); }
public virtual void TestDemo() { Analyzer analyzer = new MockAnalyzer(random()); // Store the index in memory: Directory directory = newDirectory(); // To store an index on disk, use this instead: // Directory directory = FSDirectory.open(new File("/tmp/testindex")); RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, analyzer); Document doc = new Document(); string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; string text = "this is the text to be indexed. " + longTerm; doc.add(newTextField("fieldname", text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); // Now search the index: IndexReader ireader = DirectoryReader.open(directory); // read-only=true IndexSearcher isearcher = newSearcher(ireader); Assert.AreEqual(1, isearcher.search(new TermQuery(new Term("fieldname", longTerm)), 1).totalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, null, 1); Assert.AreEqual(1, hits.totalHits); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc); Assert.AreEqual(text, hitDoc.get("fieldname")); } // Test simple phrase query PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.add(new Term("fieldname", "to")); phraseQuery.add(new Term("fieldname", "be")); Assert.AreEqual(1, isearcher.search(phraseQuery, null, 1).totalHits); ireader.close(); directory.close(); }
public virtual void TestMmapIndex() { // sometimes the directory is not cleaned by rmDir, because on Windows it // may take some time until the files are finally dereferenced. So clean the // directory up front, or otherwise new IndexWriter will fail. var dirPath = CreateTempDir("testLuceneMmap"); RmDir(dirPath.FullName); var dir = new MMapDirectory(dirPath, null); // plan to add a set of useful stopwords, consider changing some of the // interior filters. using (var analyzer = new MockAnalyzer(Random())) { // TODO: something about lock timeouts and leftover locks. using (var writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode( IndexWriterConfig.OpenMode_e.CREATE))) { writer.Commit(); using (IndexReader reader = DirectoryReader.Open(dir)) { var searcher = NewSearcher(reader); var num = AtLeast(1000); for (int dx = 0; dx < num; dx++) { var f = RandomField(); var doc = new Document(); doc.Add(NewTextField("data", f, Field.Store.YES)); writer.AddDocument(doc); } } } RmDir(dirPath.FullName); } }
public virtual void Test() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone); LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues()); int charsToIndex = AtLeast(100000); int charsIndexed = 0; //System.out.println("bytesToIndex=" + charsToIndex); while (charsIndexed < charsToIndex) { Document doc = docs.NextDoc(); charsIndexed += doc.Get("body").Length; w.AddDocument(doc); //System.out.println(" bytes=" + charsIndexed + " add: " + doc); } IndexReader r = w.Reader; //System.out.println("numDocs=" + r.NumDocs); w.Dispose(); IndexSearcher s = NewSearcher(r); Terms terms = MultiFields.GetFields(r).Terms("body"); int termCount = 0; TermsEnum termsEnum = terms.Iterator(null); while (termsEnum.Next() != null) { termCount++; } Assert.IsTrue(termCount > 0); // Target ~10 terms to search: double chance = 10.0 / termCount; termsEnum = terms.Iterator(termsEnum); IDictionary<BytesRef, TopDocs> answers = new Dictionary<BytesRef, TopDocs>(); while (termsEnum.Next() != null) { if (Random().NextDouble() <= chance) { BytesRef term = BytesRef.DeepCopyOf(termsEnum.Term()); answers[term] = s.Search(new TermQuery(new Term("body", term)), 100); } } if (answers.Count > 0) { CountdownEvent startingGun = new CountdownEvent(1); int numThreads = TestUtil.NextInt(Random(), 2, 5); ThreadClass[] threads = new ThreadClass[numThreads]; for (int threadID = 0; threadID < numThreads; threadID++) { ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, s, answers, startingGun); threads[threadID] = thread; thread.Start(); } startingGun.Signal(); foreach (ThreadClass thread in threads) { thread.Join(); } } r.Dispose(); dir.Dispose(); }
public virtual void TestOmitTermFreqAndPositions() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // this field will have Tf Field f1 = NewField("f1", "this field has term freqs", NormalType); d.Add(f1); // this field will NOT have Tf Field f2 = NewField("f2", "this field has NO Tf in all docs", OmitType); d.Add(f2); writer.AddDocument(d); writer.ForceMerge(1); // now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger // keep things constant d = new Document(); // Reverse f1 = NewField("f1", "this field has term freqs", OmitType); d.Add(f1); f2 = NewField("f2", "this field has NO Tf in all docs", NormalType); d.Add(f2); writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f1").FieldIndexOptions, "OmitTermFreqAndPositions field bit should be set."); Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").FieldIndexOptions, "OmitTermFreqAndPositions field bit should be set."); reader.Dispose(); ram.Dispose(); }
public virtual void TestNoNrmFile() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy)writer.Config.MergePolicy; lmp.MergeFactor = 2; lmp.NoCFSRatio = 0.0; Document d = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; Field f1 = NewField("f1", "this field has no norms", customType); d.Add(f1); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } writer.Commit(); AssertNoNrm(ram); // force merge writer.ForceMerge(1); // flush writer.Dispose(); AssertNoNrm(ram); ram.Dispose(); }
public virtual void TestRollbackAndCommitWithThreads() { BaseDirectoryWrapper d = NewDirectory(); if (d is MockDirectoryWrapper) { ((MockDirectoryWrapper)d).PreventDoubleWrite = false; } int threadCount = TestUtil.NextInt(Random(), 2, 6); AtomicReference<IndexWriter> writerRef = new AtomicReference<IndexWriter>(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); writerRef.Value = new IndexWriter(d, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); LineFileDocs docs = new LineFileDocs(Random()); ThreadClass[] threads = new ThreadClass[threadCount]; int iters = AtLeast(100); AtomicBoolean failed = new AtomicBoolean(); ReentrantLock rollbackLock = new ReentrantLock(); ReentrantLock commitLock = new ReentrantLock(); for (int threadID = 0; threadID < threadCount; threadID++) { threads[threadID] = new ThreadAnonymousInnerClassHelper(this, d, writerRef, docs, iters, failed, rollbackLock, commitLock); threads[threadID].Start(); } for (int threadID = 0; threadID < threadCount; threadID++) { threads[threadID].Join(); } Assert.IsTrue(!failed.Get()); writerRef.Value.Dispose(); d.Dispose(); }
public virtual void TestReuseDocsEnumDifferentReader() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader firstReader = DirectoryReader.Open(dir); DirectoryReader secondReader = DirectoryReader.Open(dir); IList<AtomicReaderContext> leaves = firstReader.Leaves; IList<AtomicReaderContext> leaves2 = secondReader.Leaves; foreach (AtomicReaderContext ctx in leaves) { Terms terms = ((AtomicReader)ctx.Reader).Terms("body"); TermsEnum iterator = terms.Iterator(null); IdentityHashMap<DocsEnum, bool?> enums = new IdentityHashMap<DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc); iterator = terms.Iterator(null); DocsEnum docs = null; BytesRef term = null; while ((term = iterator.Next()) != null) { docs = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); iterator = terms.Iterator(null); enums.Clear(); docs = null; while ((term = iterator.Next()) != null) { docs = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE); enums[docs] = true; } Assert.AreEqual(terms.Size(), enums.Count); } IOUtils.Close(writer, firstReader, secondReader, dir); }
public virtual void TestPhraseQueryPositionIncrements() { PhraseQuery expected = new PhraseQuery(); expected.Add(new Term("field", "1")); expected.Add(new Term("field", "2"), 2); CharacterRunAutomaton stopList = new CharacterRunAutomaton((new RegExp("[sS][tT][oO][pP]")).ToAutomaton()); Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false, stopList); QueryBuilder builder = new QueryBuilder(analyzer); Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "1 stop 2")); }
public virtual void TestBasic() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(2).SetSimilarity(new SimpleSimilarity()).SetMergePolicy(NewLogMergePolicy(2))); StringBuilder sb = new StringBuilder(265); string term = "term"; for (int i = 0; i < 30; i++) { Document doc = new Document(); sb.Append(term).Append(" "); string content = sb.ToString(); Field noTf = NewField("noTf", content + (i % 2 == 0 ? "" : " notf"), OmitType); doc.Add(noTf); Field tf = NewField("tf", content + (i % 2 == 0 ? " tf" : ""), NormalType); doc.Add(tf); writer.AddDocument(doc); //System.out.println(d); } writer.ForceMerge(1); // flush writer.Dispose(); /* * Verify the index */ IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); searcher.Similarity = new SimpleSimilarity(); Term a = new Term("noTf", term); Term b = new Term("tf", term); Term c = new Term("noTf", "notf"); Term d = new Term("tf", "tf"); TermQuery q1 = new TermQuery(a); TermQuery q2 = new TermQuery(b); TermQuery q3 = new TermQuery(c); TermQuery q4 = new TermQuery(d); PhraseQuery pq = new PhraseQuery(); pq.Add(a); pq.Add(c); try { searcher.Search(pq, 10); Assert.Fail("did not hit expected exception"); } catch (Exception e) { Exception cause = e; // If the searcher uses an executor service, the IAE is wrapped into other exceptions while (cause.InnerException != null) { cause = cause.InnerException; } if (!(cause is InvalidOperationException)) { throw new InvalidOperationException("Expected an IAE", e); } // else OK because positions are not indexed } searcher.Search(q1, new CountingHitCollectorAnonymousInnerClassHelper(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q2, new CountingHitCollectorAnonymousInnerClassHelper2(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q3, new CountingHitCollectorAnonymousInnerClassHelper3(this)); //System.out.println(CountingHitCollector.getCount()); searcher.Search(q4, new CountingHitCollectorAnonymousInnerClassHelper4(this)); //System.out.println(CountingHitCollector.getCount()); BooleanQuery bq = new BooleanQuery(); bq.Add(q1, Occur.MUST); bq.Add(q4, Occur.MUST); searcher.Search(bq, new CountingHitCollectorAnonymousInnerClassHelper5(this)); Assert.AreEqual(15, CountingHitCollector.Count); reader.Dispose(); dir.Dispose(); }
public virtual void TestNoPrxFile() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMaxBufferedDocs(3).SetMergePolicy(NewLogMergePolicy())); LogMergePolicy lmp = (LogMergePolicy)writer.Config.MergePolicy; lmp.MergeFactor = 2; lmp.NoCFSRatio = 0.0; Document d = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; Field f1 = NewField("f1", "this field has term freqs", ft); d.Add(f1); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } writer.Commit(); AssertNoPrx(ram); // now add some documents with positions, and check there is no prox after optimization d = new Document(); f1 = NewTextField("f1", "this field has positions", Field.Store.NO); d.Add(f1); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } // force merge writer.ForceMerge(1); // flush writer.Dispose(); AssertNoPrx(ram); ram.Dispose(); }
public virtual void TestBooleanSpanQuery() { bool failed = false; int hits = 0; Directory directory = NewDirectory(); Analyzer indexerAnalyzer = new MockAnalyzer(Random()); IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer); IndexWriter writer = new IndexWriter(directory, config); string FIELD = "content"; Document d = new Document(); d.Add(new TextField(FIELD, "clockwork orange", Field.Store.YES)); writer.AddDocument(d); writer.Dispose(); IndexReader indexReader = DirectoryReader.Open(directory); IndexSearcher searcher = NewSearcher(indexReader); BooleanQuery query = new BooleanQuery(); SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork")); SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork")); query.Add(sq1, BooleanClause.Occur.SHOULD); query.Add(sq2, BooleanClause.Occur.SHOULD); TopScoreDocCollector collector = TopScoreDocCollector.Create(1000, true); searcher.Search(query, collector); hits = collector.TopDocs().ScoreDocs.Length; foreach (ScoreDoc scoreDoc in collector.TopDocs().ScoreDocs) { Console.WriteLine(scoreDoc.Doc); } indexReader.Dispose(); Assert.AreEqual(failed, false, "Bug in boolean query composed of span queries"); Assert.AreEqual(hits, 1, "Bug in boolean query composed of span queries"); directory.Dispose(); }
public virtual void TestTwoFieldsTwoFormats() { Analyzer analyzer = new MockAnalyzer(Random()); Directory directory = NewDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); DocValuesFormat fast = DocValuesFormat.ForName("Lucene45"); DocValuesFormat slow = DocValuesFormat.ForName("Lucene45"); iwc.SetCodec(new Lucene46CodecAnonymousInnerClassHelper(this, fast, slow)); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); string longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; string text = "this is the text to be indexed. " + longTerm; doc.Add(NewTextField("fieldname", text, Field.Store.YES)); doc.Add(new NumericDocValuesField("dv1", 5)); doc.Add(new BinaryDocValuesField("dv2", new BytesRef("hello world"))); iwriter.AddDocument(doc); iwriter.Dispose(); // Now search the index: IndexReader ireader = DirectoryReader.Open(directory); // read-only=true IndexSearcher isearcher = NewSearcher(ireader); Assert.AreEqual(1, isearcher.Search(new TermQuery(new Term("fieldname", longTerm)), 1).TotalHits); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.Search(query, null, 1); Assert.AreEqual(1, hits.TotalHits); BytesRef scratch = new BytesRef(); // Iterate through the results: for (int i = 0; i < hits.ScoreDocs.Length; i++) { Document hitDoc = isearcher.Doc(hits.ScoreDocs[i].Doc); Assert.AreEqual(text, hitDoc.Get("fieldname")); Debug.Assert(ireader.Leaves.Count == 1); NumericDocValues dv = ((AtomicReader)ireader.Leaves[0].Reader).GetNumericDocValues("dv1"); Assert.AreEqual(5, dv.Get(hits.ScoreDocs[i].Doc)); BinaryDocValues dv2 = ((AtomicReader)ireader.Leaves[0].Reader).GetBinaryDocValues("dv2"); dv2.Get(hits.ScoreDocs[i].Doc, scratch); Assert.AreEqual(new BytesRef("hello world"), scratch); } ireader.Dispose(); directory.Dispose(); }
public virtual void TestSortedTermsEnum() { Directory directory = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwconfig.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, iwconfig); Document doc = new Document(); doc.Add(new StringField("field", "hello", Field.Store.NO)); iwriter.AddDocument(doc); doc = new Document(); doc.Add(new StringField("field", "world", Field.Store.NO)); iwriter.AddDocument(doc); doc = new Document(); doc.Add(new StringField("field", "beer", Field.Store.NO)); iwriter.AddDocument(doc); iwriter.ForceMerge(1); DirectoryReader ireader = iwriter.Reader; iwriter.Dispose(); AtomicReader ar = GetOnlySegmentReader(ireader); SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field"); Assert.AreEqual(3, dv.ValueCount); TermsEnum termsEnum = dv.TermsEnum(); // next() Assert.AreEqual("beer", termsEnum.Next().Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord()); Assert.AreEqual("hello", termsEnum.Next().Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord()); Assert.AreEqual("world", termsEnum.Next().Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord()); // seekCeil() Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!"))); Assert.AreEqual("hello", termsEnum.Term().Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord()); Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer"))); Assert.AreEqual("beer", termsEnum.Term().Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord()); Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz"))); // seekExact() Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer"))); Assert.AreEqual("beer", termsEnum.Term().Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord()); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello"))); Assert.AreEqual("hello", termsEnum.Term().Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord()); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world"))); Assert.AreEqual("world", termsEnum.Term().Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord()); Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus"))); // seek(ord) termsEnum.SeekExact(0); Assert.AreEqual("beer", termsEnum.Term().Utf8ToString()); Assert.AreEqual(0, termsEnum.Ord()); termsEnum.SeekExact(1); Assert.AreEqual("hello", termsEnum.Term().Utf8ToString()); Assert.AreEqual(1, termsEnum.Ord()); termsEnum.SeekExact(2); Assert.AreEqual("world", termsEnum.Term().Utf8ToString()); Assert.AreEqual(2, termsEnum.Ord()); ireader.Dispose(); directory.Dispose(); }
public virtual void TestEndOffsetPositionWithCachingTokenFilter() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); IOException priorException = null; TokenStream stream = analyzer.TokenStream("field", new StringReader("abcd ")); try { stream.Reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct? TokenStream cachedStream = new CachingTokenFilter(stream); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorOffsets = true; Field f = new Field("field", cachedStream, customType); doc.Add(f); doc.Add(f); w.AddDocument(doc); } catch (IOException e) { priorException = e; } finally { IOUtils.CloseWhileHandlingException(priorException, stream); } w.Dispose(); IndexReader r = DirectoryReader.Open(dir); TermsEnum termsEnum = r.GetTermVectors(0).Terms("field").Iterator(null); Assert.IsNotNull(termsEnum.Next()); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.AreEqual(2, termsEnum.TotalTermFreq()); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.NextPosition(); Assert.AreEqual(0, dpEnum.StartOffset()); Assert.AreEqual(4, dpEnum.EndOffset()); dpEnum.NextPosition(); Assert.AreEqual(8, dpEnum.StartOffset()); Assert.AreEqual(12, dpEnum.EndOffset()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); r.Dispose(); dir.Dispose(); }
// Collections.synchronizedMap(new WeakHashMap<SegmentCoreReaders, bool?>()); public virtual void RunTest(string testName) { Failed.Set(false); AddCount.Set(0); DelCount.Set(0); PackCount.Set(0); DateTime t0 = DateTime.UtcNow; Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); DirectoryInfo tempDir = CreateTempDir(testName); Dir = GetDirectory(NewMockFSDirectory(tempDir)); // some subclasses rely on this being MDW if (Dir is BaseDirectoryWrapper) { ((BaseDirectoryWrapper)Dir).CheckIndexOnClose = false; // don't double-checkIndex, we do it ourselves. } MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetInfoStream(new FailOnNonBulkMergesInfoStream()); if (LuceneTestCase.TEST_NIGHTLY) { // newIWConfig makes smallish max seg size, which // results in tons and tons of segments for this test // when run nightly: MergePolicy mp = conf.MergePolicy; if (mp is TieredMergePolicy) { ((TieredMergePolicy)mp).MaxMergedSegmentMB = 5000.0; } else if (mp is LogByteSizeMergePolicy) { ((LogByteSizeMergePolicy)mp).MaxMergeMB = 1000.0; } else if (mp is LogMergePolicy) { ((LogMergePolicy)mp).MaxMergeDocs = 100000; } } conf.SetMergedSegmentWarmer(new IndexReaderWarmerAnonymousInnerClassHelper(this)); if (VERBOSE) { conf.InfoStream = new PrintStreamInfoStreamAnonymousInnerClassHelper(this, Console.Out); } Writer = new IndexWriter(Dir, conf); TestUtil.ReduceOpenFiles(Writer); //TaskScheduler es = Random().NextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName)); TaskScheduler es = null; DoAfterWriter(es); int NUM_INDEX_THREADS = TestUtil.NextInt(Random(), 2, 4); int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER; ISet<string> delIDs = new ConcurrentHashSet<string>(new HashSet<string>()); ISet<string> delPackIDs = new ConcurrentHashSet<string>(new HashSet<string>()); IList<SubDocs> allSubDocs = new SynchronizedCollection<SubDocs>(); DateTime stopTime = DateTime.UtcNow.AddSeconds(RUN_TIME_SEC); ThreadClass[] indexThreads = LaunchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs); if (VERBOSE) { Console.WriteLine("TEST: DONE start " + NUM_INDEX_THREADS + " indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } // Let index build up a bit Thread.Sleep(100); DoSearching(es, stopTime); if (VERBOSE) { Console.WriteLine("TEST: all searching done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } for (int thread = 0; thread < indexThreads.Length; thread++) { indexThreads[thread].Join(); } if (VERBOSE) { Console.WriteLine("TEST: done join indexing threads [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]; addCount=" + AddCount + " delCount=" + DelCount); } IndexSearcher s = FinalSearcher; if (VERBOSE) { Console.WriteLine("TEST: finalSearcher=" + s); } Assert.IsFalse(Failed.Get()); bool doFail = false; // Verify: make sure delIDs are in fact deleted: foreach (string id in delIDs) { TopDocs hits = s.Search(new TermQuery(new Term("docid", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("doc id=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " hits; first docID=" + hits.ScoreDocs[0].Doc); doFail = true; } } // Verify: make sure delPackIDs are in fact deleted: foreach (string id in delPackIDs) { TopDocs hits = s.Search(new TermQuery(new Term("packID", id)), 1); if (hits.TotalHits != 0) { Console.WriteLine("packID=" + id + " is supposed to be deleted, but got " + hits.TotalHits + " matches"); doFail = true; } } // Verify: make sure each group of sub-docs are still in docID order: foreach (SubDocs subDocs in allSubDocs.ToList()) { TopDocs hits = s.Search(new TermQuery(new Term("packID", subDocs.PackID)), 20); if (!subDocs.Deleted) { // We sort by relevance but the scores should be identical so sort falls back to by docID: if (hits.TotalHits != subDocs.SubIDs.Count) { Console.WriteLine("packID=" + subDocs.PackID + ": expected " + subDocs.SubIDs.Count + " hits but got " + hits.TotalHits); doFail = true; } else { int lastDocID = -1; int startDocID = -1; foreach (ScoreDoc scoreDoc in hits.ScoreDocs) { int docID = scoreDoc.Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } else { startDocID = docID; } lastDocID = docID; Document doc = s.Doc(docID); Assert.AreEqual(subDocs.PackID, doc.Get("packID")); } lastDocID = startDocID - 1; foreach (string subID in subDocs.SubIDs) { hits = s.Search(new TermQuery(new Term("docid", subID)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; if (lastDocID != -1) { Assert.AreEqual(1 + lastDocID, docID); } lastDocID = docID; } } } else { // Pack was deleted -- make sure its docs are // deleted. We can't verify packID is deleted // because we can re-use packID for update: foreach (string subID in subDocs.SubIDs) { Assert.AreEqual(0, s.Search(new TermQuery(new Term("docid", subID)), 1).TotalHits); } } } // Verify: make sure all not-deleted docs are in fact // not deleted: int endID = Convert.ToInt32(docs.NextDoc().Get("docid")); docs.Dispose(); for (int id = 0; id < endID; id++) { string stringID = "" + id; if (!delIDs.Contains(stringID)) { TopDocs hits = s.Search(new TermQuery(new Term("docid", stringID)), 1); if (hits.TotalHits != 1) { Console.WriteLine("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.TotalHits + "; delIDs=" + string.Join(",", delIDs.ToArray())); doFail = true; } } } Assert.IsFalse(doFail); Assert.AreEqual(AddCount.Get() - DelCount.Get(), s.IndexReader.NumDocs, "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); ReleaseSearcher(s); Writer.Commit(); Assert.AreEqual(AddCount.Get() - DelCount.Get(), Writer.NumDocs(), "index=" + Writer.SegString() + " addCount=" + AddCount + " delCount=" + DelCount); DoClose(); Writer.Dispose(false); // Cannot shutdown until after writer is closed because // writer has merged segment warmer that uses IS to run // searches, and that IS may be using this es! /*if (es != null) { es.shutdown(); es.awaitTermination(1, TimeUnit.SECONDS); }*/ TestUtil.CheckIndex(Dir); Dir.Dispose(); System.IO.Directory.Delete(tempDir.FullName, true); if (VERBOSE) { Console.WriteLine("TEST: done [" + (DateTime.UtcNow - t0).TotalMilliseconds + " ms]"); } }
// Produces a realistic unicode random string that // survives MockAnalyzer unchanged: private string GetRandomTerm(string other) { Analyzer a = new MockAnalyzer(Random()); while (true) { string s = TestUtil.RandomRealisticUnicodeString(Random()); if (other != null && s.Equals(other)) { continue; } IOException priorException = null; TokenStream ts = a.TokenStream("foo", new StringReader(s)); try { ITermToBytesRefAttribute termAtt = ts.GetAttribute<ITermToBytesRefAttribute>(); BytesRef termBytes = termAtt.BytesRef; ts.Reset(); int count = 0; bool changed = false; while (ts.IncrementToken()) { termAtt.FillBytesRef(); if (count == 0 && !termBytes.Utf8ToString().Equals(s)) { // The value was changed during analysis. Keep iterating so the // tokenStream is exhausted. changed = true; } count++; } ts.End(); // Did we iterate just once and the value was unchanged? if (!changed && count == 1) { return s; } } catch (IOException e) { priorException = e; } finally { IOUtils.CloseWhileHandlingException(priorException, ts); } } }
public virtual void TestGiga() { MockAnalyzer analyzer = new MockAnalyzer(Random()); Directory index = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), index); AddDoc("Lucene in Action", w); AddDoc("Lucene for Dummies", w); //addDoc("Giga", w); AddDoc("Giga byte", w); AddDoc("ManagingGigabytesManagingGigabyte", w); AddDoc("ManagingGigabytesManagingGigabytes", w); AddDoc("The Art of Computer Science", w); AddDoc("J. K. Rowling", w); AddDoc("JK Rowling", w); AddDoc("Joanne K Roling", w); AddDoc("Bruce Willis", w); AddDoc("Willis bruce", w); AddDoc("Brute willis", w); AddDoc("B. willis", w); IndexReader r = w.Reader; w.Dispose(); Query q = new FuzzyQuery(new Term("field", "giga"), 0); // 3. search IndexSearcher searcher = NewSearcher(r); ScoreDoc[] hits = searcher.Search(q, 10).ScoreDocs; Assert.AreEqual(1, hits.Length); Assert.AreEqual("Giga byte", searcher.Doc(hits[0].Doc).Get("field")); r.Dispose(); index.Dispose(); }
public virtual void TestPositions() { Directory ram = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // f1,f2,f3: docs only FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY; Field f1 = NewField("f1", "this field has docs only", ft); d.Add(f1); Field f2 = NewField("f2", "this field has docs only", ft); d.Add(f2); Field f3 = NewField("f3", "this field has docs only", ft); d.Add(f3); FieldType ft2 = new FieldType(TextField.TYPE_NOT_STORED); ft2.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; // f4,f5,f6 docs and freqs Field f4 = NewField("f4", "this field has docs and freqs", ft2); d.Add(f4); Field f5 = NewField("f5", "this field has docs and freqs", ft2); d.Add(f5); Field f6 = NewField("f6", "this field has docs and freqs", ft2); d.Add(f6); FieldType ft3 = new FieldType(TextField.TYPE_NOT_STORED); ft3.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; // f7,f8,f9 docs/freqs/positions Field f7 = NewField("f7", "this field has docs and freqs and positions", ft3); d.Add(f7); Field f8 = NewField("f8", "this field has docs and freqs and positions", ft3); d.Add(f8); Field f9 = NewField("f9", "this field has docs and freqs and positions", ft3); d.Add(f9); writer.AddDocument(d); writer.ForceMerge(1); // now we add another document which has docs-only for f1, f4, f7, docs/freqs for f2, f5, f8, // and docs/freqs/positions for f3, f6, f9 d = new Document(); // f1,f4,f7: docs only f1 = NewField("f1", "this field has docs only", ft); d.Add(f1); f4 = NewField("f4", "this field has docs only", ft); d.Add(f4); f7 = NewField("f7", "this field has docs only", ft); d.Add(f7); // f2, f5, f8: docs and freqs f2 = NewField("f2", "this field has docs and freqs", ft2); d.Add(f2); f5 = NewField("f5", "this field has docs and freqs", ft2); d.Add(f5); f8 = NewField("f8", "this field has docs and freqs", ft2); d.Add(f8); // f3, f6, f9: docs and freqs and positions f3 = NewField("f3", "this field has docs and freqs and positions", ft3); d.Add(f3); f6 = NewField("f6", "this field has docs and freqs and positions", ft3); d.Add(f6); f9 = NewField("f9", "this field has docs and freqs and positions", ft3); d.Add(f9); writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; // docs + docs = docs Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f1").FieldIndexOptions); // docs + docs/freqs = docs Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f2").FieldIndexOptions); // docs + docs/freqs/pos = docs Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f3").FieldIndexOptions); // docs/freqs + docs = docs Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f4").FieldIndexOptions); // docs/freqs + docs/freqs = docs/freqs Assert.AreEqual(FieldInfo.IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f5").FieldIndexOptions); // docs/freqs + docs/freqs/pos = docs/freqs Assert.AreEqual(FieldInfo.IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f6").FieldIndexOptions); // docs/freqs/pos + docs = docs Assert.AreEqual(FieldInfo.IndexOptions.DOCS_ONLY, fi.FieldInfo("f7").FieldIndexOptions); // docs/freqs/pos + docs/freqs = docs/freqs Assert.AreEqual(FieldInfo.IndexOptions.DOCS_AND_FREQS, fi.FieldInfo("f8").FieldIndexOptions); // docs/freqs/pos + docs/freqs/pos = docs/freqs/pos Assert.AreEqual(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, fi.FieldInfo("f9").FieldIndexOptions); reader.Dispose(); ram.Dispose(); }
public virtual void TestCustomEncoder() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); config.SetSimilarity(new CustomNormEncodingSimilarity(this)); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config); Document doc = new Document(); Field foo = NewTextField("foo", "", Field.Store.NO); Field bar = NewTextField("bar", "", Field.Store.NO); doc.Add(foo); doc.Add(bar); for (int i = 0; i < 100; i++) { bar.StringValue = "singleton"; writer.AddDocument(doc); } IndexReader reader = writer.Reader; writer.Dispose(); NumericDocValues fooNorms = MultiDocValues.GetNormValues(reader, "foo"); for (int i = 0; i < reader.MaxDoc; i++) { Assert.AreEqual(0, fooNorms.Get(i)); } NumericDocValues barNorms = MultiDocValues.GetNormValues(reader, "bar"); for (int i = 0; i < reader.MaxDoc; i++) { Assert.AreEqual(1, barNorms.Get(i)); } reader.Dispose(); dir.Dispose(); }
public virtual void Test() { MockDirectoryWrapper dir = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors")); dir.PreventDoubleWrite = false; double rate = Random().NextDouble() * 0.01; //System.out.println("rate=" + rate); dir.RandomIOExceptionRateOnOpen = rate; int iters = AtLeast(20); LineFileDocs docs = new LineFileDocs(Random(), DefaultCodecSupportsDocValues()); IndexReader r = null; DirectoryReader r2 = null; bool any = false; MockDirectoryWrapper dirCopy = null; int lastNumDocs = 0; for (int iter = 0; iter < iters; iter++) { IndexWriter w = null; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter); } try { MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); if (VERBOSE) { // Do this ourselves instead of relying on LTC so // we see incrementing messageID: iwc.InfoStream = new PrintStreamInfoStream(Console.Out); } var ms = iwc.MergeScheduler; if (ms is IConcurrentMergeScheduler) { ((IConcurrentMergeScheduler)ms).SetSuppressExceptions(); } w = new IndexWriter(dir, iwc); if (r != null && Random().Next(5) == 3) { if (Random().NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: addIndexes IR[]"); } w.AddIndexes(new IndexReader[] { r }); } else { if (VERBOSE) { Console.WriteLine("TEST: addIndexes Directory[]"); } w.AddIndexes(new Directory[] { dirCopy }); } } else { if (VERBOSE) { Console.WriteLine("TEST: addDocument"); } w.AddDocument(docs.NextDoc()); } dir.RandomIOExceptionRateOnOpen = 0.0; w.Dispose(); w = null; // NOTE: this is O(N^2)! Only enable for temporary debugging: //dir.setRandomIOExceptionRateOnOpen(0.0); //TestUtil.CheckIndex(dir); //dir.setRandomIOExceptionRateOnOpen(rate); // Verify numDocs only increases, to catch IndexWriter // accidentally deleting the index: dir.RandomIOExceptionRateOnOpen = 0.0; Assert.IsTrue(DirectoryReader.IndexExists(dir)); if (r2 == null) { r2 = DirectoryReader.Open(dir); } else { DirectoryReader r3 = DirectoryReader.OpenIfChanged(r2); if (r3 != null) { r2.Dispose(); r2 = r3; } } Assert.IsTrue(r2.NumDocs >= lastNumDocs, "before=" + lastNumDocs + " after=" + r2.NumDocs); lastNumDocs = r2.NumDocs; //System.out.println("numDocs=" + lastNumDocs); dir.RandomIOExceptionRateOnOpen = rate; any = true; if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": success"); } } catch (IOException ioe) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + iter + ": exception"); Console.WriteLine(ioe.ToString()); Console.Write(ioe.StackTrace); } if (w != null) { // NOTE: leave random IO exceptions enabled here, // to verify that rollback does not try to write // anything: w.Rollback(); } } if (any && r == null && Random().NextBoolean()) { // Make a copy of a non-empty index so we can use // it to addIndexes later: dir.RandomIOExceptionRateOnOpen = 0.0; r = DirectoryReader.Open(dir); dirCopy = NewMockFSDirectory(CreateTempDir("TestIndexWriterOutOfFileDescriptors.copy")); HashSet<string> files = new HashSet<string>(); foreach (string file in dir.ListAll()) { dir.Copy(dirCopy, file, file, IOContext.DEFAULT); files.Add(file); } dirCopy.Sync(files); // Have IW kiss the dir so we remove any leftover // files ... we can easily have leftover files at // the time we take a copy because we are holding // open a reader: (new IndexWriter(dirCopy, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())))).Dispose(); dirCopy.RandomIOExceptionRate = rate; dir.RandomIOExceptionRateOnOpen = rate; } } if (r2 != null) { r2.Dispose(); } if (r != null) { r.Dispose(); dirCopy.Dispose(); } dir.Dispose(); }
public NodeState(ShardSearchingTestBase outerInstance, Random random, int nodeID, int numNodes) { this.OuterInstance = outerInstance; MyNodeID = nodeID; Dir = NewFSDirectory(CreateTempDir("ShardSearchingTestBase")); // TODO: set warmer MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE); if (VERBOSE) { iwc.InfoStream = new PrintStreamInfoStream(Console.Out); } Writer = new IndexWriter(Dir, iwc); Mgr = new SearcherManager(Writer, true, null); Searchers = new SearcherLifetimeManager(); // Init w/ 0s... caller above will do initial // "broadcast" by calling initSearcher: CurrentNodeVersions = new long[numNodes]; }
public virtual void TestRollingUpdates_Mem() { Random random = new Random(Random().Next()); BaseDirectoryWrapper dir = NewDirectory(); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); //provider.register(new MemoryCodec()); // LUCENE TODO: uncomment this out once MemoryPostingsFormat is brought over //if ((!"Lucene3x".Equals(Codec.Default.Name)) && Random().NextBoolean()) //{ // Codec.Default = // TestUtil.AlwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.NextFloat())); //} MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); int SIZE = AtLeast(20); int id = 0; IndexReader r = null; IndexSearcher s = null; int numUpdates = (int)(SIZE * (2 + (TEST_NIGHTLY ? 200 * Random().NextDouble() : 5 * Random().NextDouble()))); if (VERBOSE) { Console.WriteLine("TEST: numUpdates=" + numUpdates); } int updateCount = 0; // TODO: sometimes update ids not in order... for (int docIter = 0; docIter < numUpdates; docIter++) { Documents.Document doc = docs.NextDoc(); string myID = "" + id; if (id == SIZE - 1) { id = 0; } else { id++; } if (VERBOSE) { Console.WriteLine(" docIter=" + docIter + " id=" + id); } ((Field)doc.GetField("docid")).StringValue = myID; Term idTerm = new Term("docid", myID); bool doUpdate; if (s != null && updateCount < SIZE) { TopDocs hits = s.Search(new TermQuery(idTerm), 1); Assert.AreEqual(1, hits.TotalHits); doUpdate = !w.TryDeleteDocument(r, hits.ScoreDocs[0].Doc); if (VERBOSE) { if (doUpdate) { Console.WriteLine(" tryDeleteDocument failed"); } else { Console.WriteLine(" tryDeleteDocument succeeded"); } } } else { doUpdate = true; if (VERBOSE) { Console.WriteLine(" no searcher: doUpdate=true"); } } updateCount++; if (doUpdate) { w.UpdateDocument(idTerm, doc); } else { w.AddDocument(doc); } if (docIter >= SIZE && Random().Next(50) == 17) { if (r != null) { r.Dispose(); } bool applyDeletions = Random().NextBoolean(); if (VERBOSE) { Console.WriteLine("TEST: reopen applyDeletions=" + applyDeletions); } r = w.GetReader(applyDeletions); if (applyDeletions) { s = NewSearcher(r); } else { s = null; } Assert.IsTrue(!applyDeletions || r.NumDocs == SIZE, "applyDeletions=" + applyDeletions + " r.NumDocs=" + r.NumDocs + " vs SIZE=" + SIZE); updateCount = 0; } } if (r != null) { r.Dispose(); } w.Commit(); Assert.AreEqual(SIZE, w.NumDocs()); w.Dispose(); TestIndexWriter.AssertNoUnreferencedFiles(dir, "leftover files after rolling updates"); docs.Dispose(); // LUCENE-4455: SegmentInfos infos = new SegmentInfos(); infos.Read(dir); long totalBytes = 0; foreach (SegmentCommitInfo sipc in infos.Segments) { totalBytes += sipc.SizeInBytes(); } long totalBytes2 = 0; foreach (string fileName in dir.ListAll()) { if (!fileName.StartsWith(IndexFileNames.SEGMENTS)) { totalBytes2 += dir.FileLength(fileName); } } Assert.AreEqual(totalBytes2, totalBytes); dir.Dispose(); }
public virtual void TestTooLargeTermSortedSetBytes() { AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet()); Analyzer analyzer = new MockAnalyzer(Random()); Directory directory = NewDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); byte[] bytes = new byte[100000]; BytesRef b = new BytesRef(bytes); Random().NextBytes((byte[])(Array)bytes); doc.Add(new SortedSetDocValuesField("dv", b)); try { iwriter.AddDocument(doc); Assert.Fail("did not get expected exception"); } catch (System.ArgumentException expected) { // expected } iwriter.Dispose(); directory.Dispose(); }
public virtual void Test() { Random random = new Random(Random().Next()); LineFileDocs docs = new LineFileDocs(random, DefaultCodecSupportsDocValues()); Directory d = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter(Random(), d, analyzer); int numDocs = AtLeast(10); for (int docCount = 0; docCount < numDocs; docCount++) { w.AddDocument(docs.NextDoc()); } IndexReader r = w.Reader; w.Dispose(); List<BytesRef> terms = new List<BytesRef>(); TermsEnum termsEnum = MultiFields.GetTerms(r, "body").Iterator(null); BytesRef term; while ((term = termsEnum.Next()) != null) { terms.Add(BytesRef.DeepCopyOf(term)); } if (VERBOSE) { Console.WriteLine("TEST: " + terms.Count + " terms"); } int upto = -1; int iters = AtLeast(200); for (int iter = 0; iter < iters; iter++) { bool isEnd; if (upto != -1 && Random().NextBoolean()) { // next if (VERBOSE) { Console.WriteLine("TEST: iter next"); } isEnd = termsEnum.Next() == null; upto++; if (isEnd) { if (VERBOSE) { Console.WriteLine(" end"); } Assert.AreEqual(upto, terms.Count); upto = -1; } else { if (VERBOSE) { Console.WriteLine(" got term=" + termsEnum.Term().Utf8ToString() + " expected=" + terms[upto].Utf8ToString()); } Assert.IsTrue(upto < terms.Count); Assert.AreEqual(terms[upto], termsEnum.Term()); } } else { BytesRef target; string exists; if (Random().NextBoolean()) { // likely fake term if (Random().NextBoolean()) { target = new BytesRef(TestUtil.RandomSimpleString(Random())); } else { target = new BytesRef(TestUtil.RandomRealisticUnicodeString(Random())); } exists = "likely not"; } else { // real term target = terms[Random().Next(terms.Count)]; exists = "yes"; } upto = terms.BinarySearch(target); if (Random().NextBoolean()) { if (VERBOSE) { Console.WriteLine("TEST: iter seekCeil target=" + target.Utf8ToString() + " exists=" + exists); } // seekCeil TermsEnum.SeekStatus status = termsEnum.SeekCeil(target); if (VERBOSE) { Console.WriteLine(" got " + status); } if (upto < 0) { upto = -(upto + 1); if (upto >= terms.Count) { Assert.AreEqual(TermsEnum.SeekStatus.END, status); upto = -1; } else { Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status); Assert.AreEqual(terms[upto], termsEnum.Term()); } } else { Assert.AreEqual(TermsEnum.SeekStatus.FOUND, status); Assert.AreEqual(terms[upto], termsEnum.Term()); } } else { if (VERBOSE) { Console.WriteLine("TEST: iter seekExact target=" + target.Utf8ToString() + " exists=" + exists); } // seekExact bool result = termsEnum.SeekExact(target); if (VERBOSE) { Console.WriteLine(" got " + result); } if (upto < 0) { Assert.IsFalse(result); upto = -1; } else { Assert.IsTrue(result); Assert.AreEqual(target, termsEnum.Term()); } } } } r.Dispose(); d.Dispose(); docs.Dispose(); }