public virtual void TestRareVectors() { IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 100; i++) { Document doc = new Document(); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); writer.AddDocument(doc); } for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(new Field("field", English.IntToEnglish(100 + i), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); Query query = new TermQuery(new Term("field", "hundred")); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(10, hits.Length); for (int i = 0; i < hits.Length; i++) { TermFreqVector[] vector = searcher.reader_ForNUnit.GetTermFreqVectors(hits[i].doc); Assert.IsTrue(vector != null); Assert.IsTrue(vector.Length == 1); } }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field noPayloadField = new Field(PayloadHelper.NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED); //noPayloadField.setBoost(0); doc.Add(noPayloadField); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("multiField", English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory); searcher.SetSimilarity(similarity); }
public override void SetUp() { base.SetUp(); System.String tempDir = System.IO.Path.GetTempPath(); if (tempDir == null) { throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test"); } indexDir = new System.IO.DirectoryInfo(Path.Combine(tempDir, "RAMDirIndex")); Directory dir = FSDirectory.Open(indexDir); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); // add some documents Document doc = null; for (int i = 0; i < docsToAdd; i++) { doc = new Document(); doc.Add(new Field("content", English.IntToEnglish(i).Trim(), Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc, null); } Assert.AreEqual(docsToAdd, writer.MaxDoc()); writer.Close(); dir.Close(); }
override public void Run() { try { for (int j = 0; j < Lucene.Net.Index.TestThreadedOptimize.NUM_ITER2; j++) { writerFinal.Optimize(false, null); for (int k = 0; k < 17 * (1 + iFinal); k++) { Document d = new Document(); d.Add(new Field("id", iterFinal + "_" + iFinal + "_" + j + "_" + k, Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("contents", English.IntToEnglish(iFinal + k), Field.Store.NO, Field.Index.ANALYZED)); writerFinal.AddDocument(d, null); } for (int k = 0; k < 9 * (1 + iFinal); k++) { writerFinal.DeleteDocuments(null, new Term("id", iterFinal + "_" + iFinal + "_" + j + "_" + k)); } writerFinal.Optimize(null); } } catch (System.Exception t) { Enclosing_Instance.setFailed(); System.Console.Out.WriteLine(ThreadClass.Current().Name + ": hit exception"); System.Console.Out.WriteLine(t.StackTrace); } }
public virtual void Test() { PayloadNearQuery query; TopDocs hits; query = NewPhraseQuery("field", "twenty two", true); QueryUtils.Check(query); // all 10 hits should have score = 3 because adjacent terms have payloads of 2,4 // and all the similarity factors are set to 1 hits = searcher.Search(query, null, 100); Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); Assert.IsTrue(hits.TotalHits == 10, "should be 10 hits"); for (int j = 0; j < hits.ScoreDocs.Length; j++) { ScoreDoc doc = hits.ScoreDocs[j]; Assert.IsTrue(doc.Score == 3, doc.Score + " does not equal: " + 3); } for (int i = 1; i < 10; i++) { query = NewPhraseQuery("field", English.IntToEnglish(i) + " hundred", true); // all should have score = 3 because adjacent terms have payloads of 2,4 // and all the similarity factors are set to 1 hits = searcher.Search(query, null, 100); Assert.IsTrue(hits != null, "hits is null and it shouldn't be"); Assert.IsTrue(hits.TotalHits == 100, "should be 100 hits"); for (int j = 0; j < hits.ScoreDocs.Length; j++) { ScoreDoc doc = hits.ScoreDocs[j]; // System.out.println("Doc: " + doc.toString()); // System.out.println("Explain: " + searcher.explain(query, doc.doc)); Assert.IsTrue(doc.Score == 3, doc.Score + " does not equal: " + 3); } } }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); //writer.setUseCompoundFile(true); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); Field.TermVector termVector; int mod3 = i % 3; int mod2 = i % 2; if (mod2 == 0 && mod3 == 0) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else if (mod2 == 0) { termVector = Field.TermVector.WITH_POSITIONS; } else if (mod3 == 0) { termVector = Field.TermVector.WITH_OFFSETS; } else { termVector = Field.TermVector.YES; } doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED, termVector)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
public override void SetUp() { base.SetUp(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); //writer.setUseCompoundFile(false); //writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); Fieldable fld = new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES); doc.Add(fld); writer.AddDocument(doc); } writer.Close(); }
public virtual void SetUp() { IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true); //writer.setUseCompoundFile(true); //writer.infoStream = System.out; System.Text.StringBuilder buffer = new System.Text.StringBuilder(); for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(Field.Text("Field", English.IntToEnglish(i), true)); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
override public void Run() { for (int j = 1; j < Enclosing_Instance.docsPerThread; j++) { Document doc = new Document(); doc.Add(new Field("sizeContent", English.IntToEnglish(num * Enclosing_Instance.docsPerThread + j).Trim(), Field.Store.YES, Field.Index.NOT_ANALYZED)); try { writer.AddDocument(doc); } catch (System.IO.IOException e) { throw new System.SystemException("", e); } } }
public virtual void SetUp() { RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(Field.Text("Field", English.IntToEnglish(i))); writer.AddDocument(doc); } writer.Close(); searcher = new IndexSearcher(directory); }
private void DoTestStopPositons(StopFilter stpf, bool enableIcrements) { Log("---> test with enable-increments-" + (enableIcrements?"enabled":"disabled")); stpf.EnablePositionIncrements = enableIcrements; ITermAttribute termAtt = stpf.GetAttribute <ITermAttribute>(); IPositionIncrementAttribute posIncrAtt = stpf.GetAttribute <IPositionIncrementAttribute>(); for (int i = 0; i < 20; i += 3) { Assert.IsTrue(stpf.IncrementToken()); Log("Token " + i + ": " + stpf); System.String w = English.IntToEnglish(i).Trim(); Assert.AreEqual(w, termAtt.Term, "expecting token " + i + " to be " + w); Assert.AreEqual(enableIcrements?(i == 0?1:3):1, posIncrAtt.PositionIncrement, "all but first token must have position increment of 3"); } Assert.IsFalse(stpf.IncrementToken()); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc, null); } writer.Close(); searcher = new IndexSearcher(directory, true, null); }
private void VerifyVectors(TermFreqVector[] vectors, int num) { System.Text.StringBuilder temp = new System.Text.StringBuilder(); System.String[] terms = null; for (int i = 0; i < vectors.Length; i++) { terms = vectors[i].GetTerms(); for (int z = 0; z < terms.Length; z++) { temp.Append(terms[z]); } } if (!English.IntToEnglish(num).Trim().Equals(temp.ToString().Trim())) { System.Console.Out.WriteLine("wrong term result"); } }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document, null); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true, null); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader, null); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext();) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo)iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public override void SetUp() { base.SetUp(); RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); doc.Add(new Field("field", English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); System.String txt = English.IntToEnglish(i) + ' ' + English.IntToEnglish(i + 1); doc.Add(new Field("field2", txt, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory, true); searcher.Similarity = similarity; }
/// <summary> Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField /// and analyzes them using the PayloadAnalyzer /// </summary> /// <param name="similarity">The Similarity class to use in the Searcher /// </param> /// <param name="numDocs">The num docs to add /// </param> /// <returns> An IndexSearcher /// </returns> /// <throws> IOException </throws> public virtual IndexSearcher SetUp(Similarity similarity, int numDocs) { RAMDirectory directory = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true); writer.SetSimilarity(similarity); //writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new Field(FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(MULTI_FIELD, English.IntToEnglish(i) + " " + English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field(NO_PAYLOAD_FIELD, English.IntToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } //writer.optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(directory); searcher.SetSimilarity(similarity); return(searcher); }
/// <summary> Not an explicit test, just useful to print out some info on performance /// /// </summary> /// <throws> Exception </throws> public virtual void Performance() { int[] tokCount = new int[] { 100, 500, 1000, 2000, 5000, 10000 }; int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 }; for (int k = 0; k < tokCount.Length; k++) { System.Text.StringBuilder buffer = new System.Text.StringBuilder(); System.Console.Out.WriteLine("-----Tokens: " + tokCount[k] + "-----"); for (int i = 0; i < tokCount[k]; i++) { buffer.Append(English.IntToEnglish(i).ToUpper()).Append(' '); } //make sure we produce the same tokens TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString())))); TokenStream sink = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, 100)); teeStream.ConsumeAllTokens(); TokenStream stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))), 100); ITermAttribute tfTok = stream.AddAttribute <ITermAttribute>(); ITermAttribute sinkTok = sink.AddAttribute <ITermAttribute>(); for (int i = 0; stream.IncrementToken(); i++) { Assert.IsTrue(sink.IncrementToken()); Assert.IsTrue(tfTok.Equals(sinkTok) == true, tfTok + " is not equal to " + sinkTok + " at token: " + i); } //simulate two fields, each being analyzed once, for 20 documents for (int j = 0; j < modCounts.Length; j++) { int tfPos = 0; long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); for (int i = 0; i < 20; i++) { stream = new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))); IPositionIncrementAttribute posIncrAtt = stream.GetAttribute <IPositionIncrementAttribute>(); while (stream.IncrementToken()) { tfPos += posIncrAtt.PositionIncrement; } stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString()))), modCounts[j]); posIncrAtt = stream.GetAttribute <IPositionIncrementAttribute>(); while (stream.IncrementToken()) { tfPos += posIncrAtt.PositionIncrement; } } long finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms"); int sinkPos = 0; //simulate one field with one sink start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); for (int i = 0; i < 20; i++) { teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new System.IO.StringReader(buffer.ToString())))); sink = teeStream.NewSinkTokenStream(new ModuloSinkFilter(this, modCounts[j])); IPositionIncrementAttribute posIncrAtt = teeStream.GetAttribute <IPositionIncrementAttribute>(); while (teeStream.IncrementToken()) { sinkPos += posIncrAtt.PositionIncrement; } //System.out.println("Modulo--------"); posIncrAtt = sink.GetAttribute <IPositionIncrementAttribute>(); while (sink.IncrementToken()) { sinkPos += posIncrAtt.PositionIncrement; } } finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms"); Assert.IsTrue(sinkPos == tfPos, sinkPos + " does not equal: " + tfPos); } System.Console.Out.WriteLine("- End Tokens: " + tokCount[k] + "-----"); } }
/// <summary> Not an explicit test, just useful to print out some info on performance /// /// </summary> /// <throws> Exception </throws> public virtual void Performance() { int[] tokCount = new int[] { 100, 500, 1000, 2000, 5000, 10000 }; int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 }; for (int k = 0; k < tokCount.Length; k++) { System.Text.StringBuilder buffer = new System.Text.StringBuilder(); System.Console.Out.WriteLine("-----Tokens: " + tokCount[k] + "-----"); for (int i = 0; i < tokCount[k]; i++) { buffer.Append(English.IntToEnglish(i).ToUpper()).Append(' '); } //make sure we produce the same tokens ModuloSinkTokenizer sink = new ModuloSinkTokenizer(this, tokCount[k], 100); Token reusableToken = new Token(); TokenStream stream = new TeeTokenFilter(new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), sink); while (stream.Next(reusableToken) != null) { } stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), 100); System.Collections.IList tmp = new System.Collections.ArrayList(); for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken)) { tmp.Add(nextToken.Clone()); } System.Collections.IList sinkList = sink.GetTokens(); Assert.IsTrue(tmp.Count == sinkList.Count, "tmp Size: " + tmp.Count + " is not: " + sinkList.Count); for (int i = 0; i < tmp.Count; i++) { Token tfTok = (Token)tmp[i]; Token sinkTok = (Token)sinkList[i]; Assert.IsTrue(tfTok.Term().Equals(sinkTok.Term()) == true, tfTok.Term() + " is not equal to " + sinkTok.Term() + " at token: " + i); } //simulate two fields, each being analyzed once, for 20 documents for (int j = 0; j < modCounts.Length; j++) { int tfPos = 0; long start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); for (int i = 0; i < 20; i++) { stream = new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))); for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken)) { tfPos += nextToken.GetPositionIncrement(); } stream = new ModuloTokenFilter(this, new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), modCounts[j]); for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken)) { tfPos += nextToken.GetPositionIncrement(); } } long finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms"); int sinkPos = 0; //simulate one field with one sink start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); for (int i = 0; i < 20; i++) { sink = new ModuloSinkTokenizer(this, tokCount[k], modCounts[j]); stream = new TeeTokenFilter(new StandardFilter(new StandardTokenizer(new System.IO.StringReader(buffer.ToString()))), sink); for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken)) { sinkPos += nextToken.GetPositionIncrement(); } //System.out.println("Modulo--------"); stream = sink; for (Token nextToken = stream.Next(reusableToken); nextToken != null; nextToken = stream.Next(reusableToken)) { sinkPos += nextToken.GetPositionIncrement(); } } finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); System.Console.Out.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms"); Assert.IsTrue(sinkPos == tfPos, sinkPos + " does not equal: " + tfPos); } System.Console.Out.WriteLine("- End Tokens: " + tokCount[k] + "-----"); } }
public virtual void TestStopPositons() { System.Text.StringBuilder sb = new System.Text.StringBuilder(); System.Collections.Generic.List <string> a = new System.Collections.Generic.List <string>(); for (int i = 0; i < 20; i++) { System.String w = English.IntToEnglish(i).Trim(); sb.Append(w).Append(" "); if (i % 3 != 0) { a.Add(w); } } Log(sb.ToString()); System.String[] stopWords = (System.String[])a.ToArray(); for (int i = 0; i < a.Count; i++) { Log("Stop: " + stopWords[i]); } var stopSet = StopFilter.MakeStopSet(stopWords); // with increments System.IO.StringReader reader = new System.IO.StringReader(sb.ToString()); StopFilter stpf = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet); DoTestStopPositons(stpf, true); // without increments reader = new System.IO.StringReader(sb.ToString()); stpf = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet); DoTestStopPositons(stpf, false); // with increments, concatenating two stop filters System.Collections.Generic.List <System.String> a0 = new System.Collections.Generic.List <System.String>(); System.Collections.Generic.List <System.String> a1 = new System.Collections.Generic.List <System.String>(); for (int i = 0; i < a.Count; i++) { if (i % 2 == 0) { a0.Add(a[i]); } else { a1.Add(a[i]); } } System.String[] stopWords0 = (System.String[])a0.ToArray(); for (int i = 0; i < a0.Count; i++) { Log("Stop0: " + stopWords0[i]); } System.String[] stopWords1 = (System.String[])a1.ToArray(); for (int i = 0; i < a1.Count; i++) { Log("Stop1: " + stopWords1[i]); } var stopSet0 = StopFilter.MakeStopSet(stopWords0); var stopSet1 = StopFilter.MakeStopSet(stopWords1); reader = new System.IO.StringReader(sb.ToString()); StopFilter stpf0 = new StopFilter(false, new WhitespaceTokenizer(reader), stopSet0); // first part of the set stpf0.EnablePositionIncrements = true; StopFilter stpf01 = new StopFilter(false, stpf0, stopSet1); // two stop filters concatenated! DoTestStopPositons(stpf01, true); }
public virtual void runTest(Directory directory, MergeScheduler merger) { IndexWriter writer = new IndexWriter(directory, ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMaxBufferedDocs(2); if (merger != null) { writer.SetMergeScheduler(merger, null); } for (int iter = 0; iter < NUM_ITER; iter++) { int iterFinal = iter; writer.MergeFactor = 1000; for (int i = 0; i < 200; i++) { Document d = new Document(); d.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); d.Add(new Field("contents", English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d, null); } writer.MergeFactor = 4; //writer.setInfoStream(System.out); ThreadClass[] threads = new ThreadClass[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { int iFinal = i; IndexWriter writerFinal = writer; threads[i] = new AnonymousClassThread(writerFinal, iFinal, iterFinal, this); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Start(); } for (int i = 0; i < NUM_THREADS; i++) { threads[i].Join(); } Assert.IsTrue(!failed); int expectedDocCount = (int)((1 + iter) * (200 + 8 * NUM_ITER2 * (NUM_THREADS / 2.0) * (1 + NUM_THREADS))); // System.out.println("TEST: now index=" + writer.segString()); Assert.AreEqual(expectedDocCount, writer.MaxDoc()); writer.Close(); writer = new IndexWriter(directory, ANALYZER, false, IndexWriter.MaxFieldLength.UNLIMITED, null); writer.SetMaxBufferedDocs(2); IndexReader reader = IndexReader.Open(directory, true, null); Assert.IsTrue(reader.IsOptimized()); Assert.AreEqual(expectedDocCount, reader.NumDocs()); reader.Close(); } writer.Close(); }