private void SetUpDirs(Directory dir, Directory aux) { IndexWriter writer = null; writer = NewWriter(dir, true); writer.SetMaxBufferedDocs(1000); // add 1000 documents in 1 segment AddDocs(writer, 1000); Assert.AreEqual(1000, writer.DocCount()); Assert.AreEqual(1, writer.GetSegmentCount()); writer.Close(); writer = NewWriter(aux, true); writer.SetUseCompoundFile(false); // use one without a compound file writer.SetMaxBufferedDocs(100); writer.SetMergeFactor(10); // add 30 documents in 3 segments for (int i = 0; i < 3; i++) { AddDocs(writer, 10); writer.Close(); writer = NewWriter(aux, false); writer.SetUseCompoundFile(false); // use one without a compound file writer.SetMaxBufferedDocs(100); writer.SetMergeFactor(10); } Assert.AreEqual(30, writer.DocCount()); Assert.AreEqual(3, writer.GetSegmentCount()); writer.Close(); }
public void IndexFile(string filePath) { PropertyDescriptors descriptors = new PropertyDescriptors(); descriptors.LoadData(System.Windows.Forms.Application.StartupPath + "\\PropertyDescriptors.xml"); Analyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer(); bool create = !(System.IO.Directory.Exists(_idxDir) && IndexReader.IndexExists(_idxDir)); IndexWriter iw = new IndexWriter(_idxDir, a, create); iw.SetUseCompoundFile(true); AdDataStream adStream = new AdDataStream(filePath); adStream.LoadData(); foreach (Advert ad in adStream.FetchAd()) { Document doc = new Document(); foreach (string s in ad.GetDictionary().Keys) { string temp = descriptors.GetIndexableFormat(descriptors[s], ad[s]); doc.Add(Field.Text(s, temp)); } iw.AddDocument(doc); if (_updateCallback != null) { _updateCallback("Added Document: " + ad["Title"]); } } iw.Optimize(); iw.Close(); }
private Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = NewRandom(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return(dir); }
private Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = NewRandom(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Document doc = new Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return dir; }
public virtual void TestNoPrxFile() { Directory ram = new MockRAMDirectory(); Analyzer analyzer = new StandardAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(3); writer.SetMergeFactor(2); writer.SetUseCompoundFile(false); Document d = new Document(); Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); f1.SetOmitTermFreqAndPositions(true); d.Add(f1); for (int i = 0; i < 30; i++) { writer.AddDocument(d); } writer.Commit(); AssertNoPrx(ram); // force merge writer.Optimize(); // flush writer.Close(); AssertNoPrx(ram); _TestUtil.CheckIndex(ram); ram.Close(); }
private static Directory MakeIndex() { Directory dir = new RAMDirectory(); try { System.Random r = new System.Random((System.Int32) (BASE_SEED + 42)); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true); writer.SetUseCompoundFile(false); for (int d = 1; d <= NUM_DOCS; d++) { Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document(); for (int f = 1; f <= NUM_FIELDS; f++) { doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.TOKENIZED)); } writer.AddDocument(doc); } writer.Close(); } catch (System.Exception e) { throw new System.SystemException("", e); } return dir; }
public virtual void TestTargetCFS() { Directory dir = new RAMDirectory(); IndexWriter writer = NewWriter(dir, true); writer.SetUseCompoundFile(false); AddDocs(writer, 1); writer.Close(); Directory other = new RAMDirectory(); writer = NewWriter(other, true); writer.SetUseCompoundFile(true); writer.AddIndexesNoOptimize(new Directory[] { dir }); Assert.IsTrue(writer.NewestSegment().GetUseCompoundFile()); writer.Close(); }
public virtual void TestAddSelf() { // main directory Directory dir = new RAMDirectory(); // auxiliary directory Directory aux = new RAMDirectory(); IndexWriter writer = null; writer = NewWriter(dir, true); // add 100 documents AddDocs(writer, 100); Assert.AreEqual(100, writer.DocCount()); writer.Close(); writer = NewWriter(aux, true); writer.SetUseCompoundFile(false); // use one without a compound file writer.SetMaxBufferedDocs(1000); // add 140 documents in separate files AddDocs(writer, 40); writer.Close(); writer = NewWriter(aux, true); writer.SetUseCompoundFile(false); // use one without a compound file writer.SetMaxBufferedDocs(1000); AddDocs(writer, 100); writer.Close(); writer = NewWriter(dir, false); try { // cannot add self writer.AddIndexesNoOptimize(new Directory[] { aux, dir }); Assert.IsTrue(false); } catch (System.ArgumentException e) { Assert.AreEqual(100, writer.DocCount()); } writer.Close(); // make sure the index is correct VerifyNumDocs(dir, 100); }
public void CreateIndex(string databaseFileName) { IndexWriter writer = new IndexWriter(indexFolderName, new StandardAnalyzer(), true); writer.SetUseCompoundFile(false); IndexDatabase(writer, Container.GetContainer(databaseFileName)); writer.Optimize(); writer.Close(); }
private void CreateIndex(Directory dir) { IndexWriter iw = new IndexWriter(dir, anlzr, true, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.SetSimilarity(similarityOne); iw.SetUseCompoundFile(true); iw.Close(); }
public static IndexWriter GetAzureIndexWriter(this LuceneIndexer indexer) { indexer.EnsureIndex(false); var writer = new IndexWriter(indexer.GetLuceneDirectory(), indexer.IndexingAnalyzer, false, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetRAMBufferSizeMB(10.0); writer.SetUseCompoundFile(false); writer.SetMaxMergeDocs(10000); writer.SetMergeFactor(100); return writer; }
public override void SetUp() { base.SetUp(); fieldInfos = new FieldInfos(); DocHelper.SetupDoc(testDoc); fieldInfos.Add(testDoc); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetUseCompoundFile(false); writer.AddDocument(testDoc); writer.Close(); segmentName = writer.NewestSegment().name; }
/// <summary> Setting to turn on usage of a compound file. When on, multiple files /// for each segment are merged into a single file once the segment creation /// is finished. This is done regardless of what directory is in use. /// </summary> /// <seealso cref="IndexWriter#SetUseCompoundFile(boolean)"> /// </seealso> /// <throws> IllegalStateException if the index is closed </throws> public virtual void SetUseCompoundFile(bool useCompoundFile) { lock (directory) { AssureOpen(); if (indexWriter != null) { indexWriter.SetUseCompoundFile(useCompoundFile); } this.useCompoundFile = useCompoundFile; } }
public override void SetUp() { base.SetUp(); fieldInfos = new FieldInfos(); DocHelper.SetupDoc(testDoc); fieldInfos.Add(testDoc); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); writer.AddDocument(testDoc); writer.Close(); }
private void AddDocs(Directory dir, int ndocs, bool compound) { IndexWriter iw = new IndexWriter(dir, anlzr, false, IndexWriter.MaxFieldLength.LIMITED); iw.SetMaxBufferedDocs(5); iw.SetMergeFactor(3); iw.SetSimilarity(similarityOne); iw.SetUseCompoundFile(compound); for (int i = 0; i < ndocs; i++) { iw.AddDocument(NewDoc()); } iw.Close(); }
private void DoTest(System.IO.StringWriter out_Renamed, bool useCompoundFiles) { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true); writer.SetUseCompoundFile(useCompoundFiles); int MAX_DOCS = 225; for (int j = 0; j < MAX_DOCS; j++) { Document d = new Document(); d.Add(Field.Text(PRIORITY_FIELD, HIGH_PRIORITY)); d.Add(Field.Text(ID_FIELD, System.Convert.ToString(j))); writer.AddDocument(d); } writer.Close(); // try a search without OR Searcher searcher = new IndexSearcher(directory); Hits hits = null; QueryParsers.QueryParser parser = new QueryParsers.QueryParser(PRIORITY_FIELD, analyzer); Query query = parser.Parse(HIGH_PRIORITY); out_Renamed.WriteLine("Query: " + query.ToString(PRIORITY_FIELD)); hits = searcher.Search(query); PrintHits(out_Renamed, hits); CheckHits(hits, MAX_DOCS); searcher.Close(); // try a new search with OR searcher = new IndexSearcher(directory); hits = null; parser = new QueryParsers.QueryParser(PRIORITY_FIELD, analyzer); query = parser.Parse(HIGH_PRIORITY + " OR " + MED_PRIORITY); out_Renamed.WriteLine("Query: " + query.ToString(PRIORITY_FIELD)); hits = searcher.Search(query); PrintHits(out_Renamed, hits); CheckHits(hits, MAX_DOCS); searcher.Close(); }
static void Main() { _indexWriter = new IndexWriter(FSDirectory.Open(new DirectoryInfo(_indexPath)), new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); _indexWriter.SetUseCompoundFile(false); // TODO: we can do this in batches, and trigger another thread to do that var sw = ParseDisks(disk => disks.Add(disk)); Console.WriteLine("Elapsed: " + sw.Elapsed); sw.Restart(); foreach (var disk in disks) { AddDocShortVersion(disk); } _indexWriter.SetUseCompoundFile(true); _indexWriter.Optimize(); _indexWriter.Close(true); Console.WriteLine("Elapsed: " + sw.Elapsed); }
private void button1_Click(object sender, EventArgs e) { //FilterData.PrepareCharMap(); int total = this.databaseDataSet.trans.Count; int counter = 1; string fsPath = indexpath; if (!System.IO.Directory.Exists(fsPath)) System.IO.Directory.CreateDirectory(fsPath); if (IndexReader.IndexExists(fsPath)) return; RAMDirectory dir = new RAMDirectory(); IndexWriter ramWriter = new IndexWriter(dir, new DiacriticAnalyzer(FilterData.stopWords), true); IndexWriter fsWriter = new IndexWriter(fsPath, new DiacriticAnalyzer(FilterData.stopWords), !IndexReader.IndexExists(fsPath)); ramWriter.SetUseCompoundFile(false); fsWriter.SetUseCompoundFile(false); foreach (DataRow row in this.databaseDataSet.trans.Rows) { Document doc = new Document(); string pid = row[this.databaseDataSet.trans.pidColumn].ToString(); string sid = row[this.databaseDataSet.trans.sidColumn].ToString(); string ayatno = row[this.databaseDataSet.trans.ayatnoColumn].ToString(); string arabic = row[this.databaseDataSet.trans.ayat_arabicColumn].ToString(); string urdu = row[this.databaseDataSet.trans.ayat_urduColumn].ToString(); string english = row[this.databaseDataSet.trans.ayat_descColumn].ToString(); doc.Add(Field.Keyword("pid", pid)); doc.Add(Field.Keyword("sid", sid)); doc.Add(Field.Keyword("ayatno", ayatno)); doc.Add(Field.Text("ayat_desc", english)); doc.Add(Field.Text("ayat_arabic", arabic)); doc.Add(Field.Text("ayat_urdu", urdu)); doc.Add(Field.Text("contents", arabic + Environment.NewLine + urdu + Environment.NewLine + english)); ramWriter.AddDocument(doc); int percent = counter * 100 / total; this.progressBar1.Value = percent; label1.Text = percent.ToString() + "%"; counter++; Application.DoEvents(); } ramWriter.Optimize(); fsWriter.AddIndexes(new Lucene.Net.Store.Directory[] { dir }); ramWriter.Close(); fsWriter.Close(); MessageBox.Show("Done Indexing!"); }
/// <summary> /// Constructor /// </summary> /// <param name="indexPath">Path where you wish to create the index</param> /// <param name="mode">One of Create, Append or Search</param> public Indexer(string indexPath, IndexMode mode) { m_indexMode = mode; m_bSucess = false; m_analyzer = new StandardAnalyzer(); if (mode == IndexMode.CREATE) { try { m_indexWriter = new IndexWriter(indexPath, m_analyzer, true); m_indexWriter.SetUseCompoundFile(true); m_bSucess = true; } catch (Exception e) { Logger.Instance.LogException(e); m_bSucess = false; } } else if (mode == IndexMode.APPEND) { try { m_indexWriter = new IndexWriter(indexPath, m_analyzer, false); m_indexWriter.SetUseCompoundFile(true); m_bSucess = true; } catch (Exception e) { Logger.Instance.LogException(e); m_bSucess = false; } } else if (mode == IndexMode.SEARCH) { try { m_indexSearcher = new IndexSearcher(indexPath); m_bSucess = true; } catch (Exception e) { Logger.Instance.LogException(e); m_bSucess = false; } } }
/// <summary> Close the IndexReader and open an IndexWriter.</summary> /// <throws> IOException </throws> protected internal virtual void CreateIndexWriter() { if (indexWriter == null) { if (indexReader != null) { indexReader.Close(); indexReader = null; } indexWriter = new IndexWriter(directory, analyzer, false); indexWriter.SetInfoStream(infoStream); indexWriter.SetUseCompoundFile(useCompoundFile); indexWriter.SetMaxBufferedDocs(maxBufferedDocs); indexWriter.SetMaxFieldLength(maxFieldLength); indexWriter.SetMergeFactor(mergeFactor); } }
public virtual void TestHangOnClose() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMergePolicy(new LogByteSizeMergePolicy(writer)); writer.SetMaxBufferedDocs(5); writer.SetUseCompoundFile(false); writer.SetMergeFactor(100); Document doc = new Document(); doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); for (int i = 0; i < 60; i++) { writer.AddDocument(doc); } writer.SetMaxBufferedDocs(200); Document doc2 = new Document(); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO)); for (int i = 0; i < 10; i++) { writer.AddDocument(doc2); } writer.Close(); Directory dir2 = new MockRAMDirectory(); writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer); lmp.SetMinMergeMB(0.0001); writer.SetMergePolicy(lmp); writer.SetMergeFactor(4); writer.SetUseCompoundFile(false); writer.SetMergeScheduler(new SerialMergeScheduler()); writer.AddIndexesNoOptimize(new Directory[] { dir }); writer.Close(); dir.Close(); dir2.Close(); }
private void DoTestSearch(System.IO.StreamWriter out_Renamed, bool useCompoundFile) { Directory directory = new RAMDirectory(); Analyzer analyzer = new SimpleAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(useCompoundFile); System.String[] docs = new System.String[]{"a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c"}; for (int j = 0; j < docs.Length; j++) { Document d = new Document(); d.Add(new Field("contents", docs[j], Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d); } writer.Close(); Searcher searcher = new IndexSearcher(directory); System.String[] queries = new System.String[]{"a b", "\"a b\"", "\"a b c\"", "a c", "\"a c\"", "\"a c e\""}; ScoreDoc[] hits = null; QueryParser parser = new QueryParser("contents", analyzer); parser.SetPhraseSlop(4); for (int j = 0; j < queries.Length; j++) { Query query = parser.Parse(queries[j]); out_Renamed.WriteLine("Query: " + query.ToString("contents")); //DateFilter filter = // new DateFilter("modified", Time(1997,0,1), Time(1998,0,1)); //DateFilter filter = DateFilter.Before("modified", Time(1997,00,01)); //System.out.println(filter); hits = searcher.Search(query, null, 1000).scoreDocs; out_Renamed.WriteLine(hits.Length + " total results"); for (int i = 0; i < hits.Length && i < 10; i++) { Document d = searcher.Doc(hits[i].doc); out_Renamed.WriteLine(i + " " + hits[i].score + " " + d.Get("contents")); } } searcher.Close(); }
public virtual void TestCloseStoredFields() { Directory dir = new MockRAMDirectory(); IndexWriter w = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); w.SetUseCompoundFile(false); Document doc = new Document(); doc.Add(new Field("field", "yes it's stored", Field.Store.YES, Field.Index.ANALYZED)); w.AddDocument(doc); w.Close(); IndexReader r1 = IndexReader.Open(dir); IndexReader r2 = r1.Clone(false); r1.Close(); r2.Close(); dir.Close(); }
public virtual void TestIndexing() { Directory mainDir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(mainDir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); IndexReader reader = writer.GetReader(); // start pooling readers reader.Close(); writer.SetMergeFactor(2); writer.SetMaxBufferedDocs(10); RunThread[] indexThreads = new RunThread[4]; for (int x = 0; x < indexThreads.Length; x++) { indexThreads[x] = new RunThread(this, x % 2, writer); indexThreads[x].Name = "Thread " + x; indexThreads[x].Start(); } long startTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); long duration = 5 * 1000; while (((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - startTime) < duration) { System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 100)); } int delCount = 0; int addCount = 0; for (int x = 0; x < indexThreads.Length; x++) { indexThreads[x].run_Renamed_Field = false; Assert.IsTrue(indexThreads[x].ex == null); addCount += indexThreads[x].addCount; delCount += indexThreads[x].delCount; } for (int x = 0; x < indexThreads.Length; x++) { indexThreads[x].Join(); } //System.out.println("addCount:"+addCount); //System.out.println("delCount:"+delCount); writer.Close(); mainDir.Close(); }
public virtual void TestKeepNoneOnInitDeletionPolicy() { for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepNoneOnInitDeletionPolicy policy = new KeepNoneOnInitDeletionPolicy(this); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int i = 0; i < 107; i++) { AddDoc(writer); } writer.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Optimize(); writer.Close(); Assert.AreEqual(2, policy.numOnInit); if (!autoCommit) { // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.numOnCommit); } // Simplistic check: just verify the index is in fact // readable: IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.Close(); } }
private void CreateIndex(int numHits) { int numDocs = 500; Directory directory = new SeekCountingDirectory(this); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); writer.SetMaxBufferedDocs(10); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); System.String content; if (i % (numDocs / numHits) == 0) { // add a document that matches the query "term1 term2" content = this.term1 + " " + this.term2; } else if (i % 15 == 0) { // add a document that only contains term1 content = this.term1 + " " + this.term1; } else { // add a document that contains term2 but not term 1 content = this.term3 + " " + this.term2; } doc.Add(new Field(this.field, content, Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } // make sure the index has only a single segment writer.Optimize(); writer.Close(); SegmentReader reader = SegmentReader.GetOnlySegmentReader(directory); this.searcher = new IndexSearcher(reader); }
public virtual void TestSimpleCase() { System.String[] keywords = new System.String[] { "1", "2" }; System.String[] unindexed = new System.String[] { "Netherlands", "Italy" }; System.String[] unstored = new System.String[] { "Amsterdam has lots of bridges", "Venice has lots of canals" }; System.String[] text = new System.String[] { "Amsterdam", "Venice" }; for (int pass = 0; pass < 2; pass++) { bool autoCommit = (0 == pass); Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); modifier.SetUseCompoundFile(true); modifier.SetMaxBufferedDeleteTerms(1); for (int i = 0; i < keywords.Length; i++) { Document doc = new Document(); doc.Add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.Add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); modifier.AddDocument(doc); } modifier.Optimize(); modifier.Commit(); Term term = new Term("city", "Amsterdam"); int hitCount = GetHitCount(dir, term); Assert.AreEqual(1, hitCount); modifier.DeleteDocuments(term); modifier.Commit(); hitCount = GetHitCount(dir, term); Assert.AreEqual(0, hitCount); modifier.Close(); dir.Close(); } }
/// <summary> Close the IndexReader and open an IndexWriter.</summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<code>write.lock</code> could not /// be obtained) /// </summary> /// <throws> IOException if there is a low-level IO error </throws> protected internal virtual void CreateIndexWriter() { if (indexWriter == null) { if (indexReader != null) { indexReader.Close(); indexReader = null; } indexWriter = new IndexWriter(directory, analyzer, false, new IndexWriter.MaxFieldLength(maxFieldLength)); // IndexModifier cannot use ConcurrentMergeScheduler // because it synchronizes on the directory which can // cause deadlock indexWriter.SetMergeScheduler(new SerialMergeScheduler()); indexWriter.SetInfoStream(infoStream); indexWriter.SetUseCompoundFile(useCompoundFile); if (maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH) { indexWriter.SetMaxBufferedDocs(maxBufferedDocs); } indexWriter.SetMergeFactor(mergeFactor); } }
public virtual void CreateIndex(System.String dirName, bool doCFS) { RmDir(dirName); dirName = FullDir(dirName); Directory dir = FSDirectory.Open(new System.IO.FileInfo(dirName)); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(doCFS); writer.SetMaxBufferedDocs(10); for (int i = 0; i < 35; i++) { AddDoc(writer, i); } Assert.AreEqual(35, writer.DocCount(), "wrong doc count"); writer.Close(); // open fresh writer so we get no prx file in the added segment writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(doCFS); writer.SetMaxBufferedDocs(10); AddNoProxDoc(writer); writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float)1.5); reader.Close(); }
/// <summary> /// IndexWriter that can be used to apply updates to an index /// </summary> /// <param name="indexPath">File system path to the target index</param> /// <param name="oAnalyzer">Lucene Analyzer to be used by the underlying IndexWriter</param> /// <param name="bCompoundFile">Setting to dictate if the index should use compound format</param> /// <returns></returns> private IndexWriter GetIndexWriter(string indexPath, Analyzer oAnalyzer, bool bCompoundFile) { bool bExists = System.IO.Directory.Exists(indexPath); if (bExists==false) System.IO.Directory.CreateDirectory(indexPath); bExists=IndexReader.IndexExists(FSDirectory.GetDirectory(indexPath, false)); IndexWriter idxWriter = new IndexWriter(indexPath, oAnalyzer, !bExists); idxWriter.SetUseCompoundFile(bCompoundFile); return idxWriter; }
public virtual void TestVariableSchema() { MockRAMDirectory dir = new MockRAMDirectory(); int delID = 0; for (int i = 0; i < 20; i++) { IndexWriter writer = new IndexWriter(dir, false, new WhitespaceAnalyzer()); writer.SetMaxBufferedDocs(2); writer.SetMergeFactor(2); writer.SetUseCompoundFile(false); Document doc = new Document(); System.String contents = "aa bb cc dd ee ff gg hh ii jj kk"; if (i == 7) { // Add empty docs here doc.Add(new Field("content3", "", Field.Store.NO, Field.Index.TOKENIZED)); } else { Field.Store storeVal; if (i % 2 == 0) { doc.Add(new Field("content4", contents, Field.Store.YES, Field.Index.TOKENIZED)); storeVal = Field.Store.YES; } else storeVal = Field.Store.NO; doc.Add(new Field("content1", contents, storeVal, Field.Index.TOKENIZED)); doc.Add(new Field("content3", "", Field.Store.YES, Field.Index.TOKENIZED)); doc.Add(new Field("content5", "", storeVal, Field.Index.TOKENIZED)); } for (int j = 0; j < 4; j++) writer.AddDocument(doc); writer.Close(); IndexReader reader = IndexReader.Open(dir); reader.DeleteDocument(delID++); reader.Close(); if (0 == i % 4) { writer = new IndexWriter(dir, false, new WhitespaceAnalyzer()); writer.SetUseCompoundFile(false); writer.Optimize(); writer.Close(); } } }
public override void SetUp() { base.SetUp(); /* for (int i = 0; i < testFields.length; i++) { fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]); } */ System.Array.Sort(testTerms); int tokenUpto = 0; for (int i = 0; i < testTerms.Length; i++) { positions[i] = new int[TERM_FREQ]; offsets[i] = new TermVectorOffsetInfo[TERM_FREQ]; // first position must be 0 for (int j = 0; j < TERM_FREQ; j++) { // positions are always sorted in increasing order positions[i][j] = (int) (j * 10 + (new System.Random().NextDouble()) * 10); // offsets are always sorted in increasing order offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].Length); TestToken token = tokens[tokenUpto++] = new TestToken(this); token.text = testTerms[i]; token.pos = positions[i][j]; token.startOffset = offsets[i][j].GetStartOffset(); token.endOffset = offsets[i][j].GetEndOffset(); } } System.Array.Sort(tokens); IndexWriter writer = new IndexWriter(dir, new MyAnalyzer(this), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); Document doc = new Document(); for (int i = 0; i < testFields.Length; i++) { Field.TermVector tv; if (testFieldsStorePos[i] && testFieldsStoreOff[i]) tv = Field.TermVector.WITH_POSITIONS_OFFSETS; else if (testFieldsStorePos[i] && !testFieldsStoreOff[i]) tv = Field.TermVector.WITH_POSITIONS; else if (!testFieldsStorePos[i] && testFieldsStoreOff[i]) tv = Field.TermVector.WITH_OFFSETS; else tv = Field.TermVector.YES; doc.Add(new Field(testFields[i], "", Field.Store.NO, Field.Index.ANALYZED, tv)); } //Create 5 documents for testing, they all have the same //terms for (int j = 0; j < 5; j++) writer.AddDocument(doc); writer.Flush(); seg = writer.NewestSegment().name; writer.Close(); fieldInfos = new FieldInfos(dir, seg + "." + IndexFileNames.FIELD_INFOS_EXTENSION); }
public override void SetUp() { base.SetUp(); /* * for (int i = 0; i < testFields.length; i++) { * fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]); * } */ System.Array.Sort(testTerms); int tokenUpto = 0; for (int i = 0; i < testTerms.Length; i++) { positions[i] = new int[TERM_FREQ]; offsets[i] = new TermVectorOffsetInfo[TERM_FREQ]; // first position must be 0 for (int j = 0; j < TERM_FREQ; j++) { // positions are always sorted in increasing order positions[i][j] = (int)(j * 10 + (new System.Random().NextDouble()) * 10); // offsets are always sorted in increasing order offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].Length); TestToken token = tokens[tokenUpto++] = new TestToken(this); token.text = testTerms[i]; token.pos = positions[i][j]; token.startOffset = offsets[i][j].GetStartOffset(); token.endOffset = offsets[i][j].GetEndOffset(); } } System.Array.Sort(tokens); IndexWriter writer = new IndexWriter(dir, new MyAnalyzer(this), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); Document doc = new Document(); for (int i = 0; i < testFields.Length; i++) { Field.TermVector tv; if (testFieldsStorePos[i] && testFieldsStoreOff[i]) { tv = Field.TermVector.WITH_POSITIONS_OFFSETS; } else if (testFieldsStorePos[i] && !testFieldsStoreOff[i]) { tv = Field.TermVector.WITH_POSITIONS; } else if (!testFieldsStorePos[i] && testFieldsStoreOff[i]) { tv = Field.TermVector.WITH_OFFSETS; } else { tv = Field.TermVector.YES; } doc.Add(new Field(testFields[i], "", Field.Store.NO, Field.Index.ANALYZED, tv)); } //Create 5 documents for testing, they all have the same //terms for (int j = 0; j < 5; j++) { writer.AddDocument(doc); } writer.Flush(); seg = writer.NewestSegment().name; writer.Close(); fieldInfos = new FieldInfos(dir, seg + "." + IndexFileNames.FIELD_INFOS_EXTENSION); }
/// <summary> /// 建立内存索引。。。 /// </summary> /// <param name="ramdir">内存索引</param> public IntranetIndexer(Lucene.Net.Store.Directory ramdir) { writer = new IndexWriter(ramdir, new StandardAnalyzer(), true); writer.SetUseCompoundFile(true); }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int i = 0; i < 107; i++) { AddDoc(writer); } writer.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Optimize(); writer.Close(); Assert.AreEqual(2, policy.numOnInit); if (autoCommit) { Assert.IsTrue(policy.numOnCommit > 2); } else { // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.numOnCommit); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); while (gen > 0) { IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.List().Length; writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false, policy); writer.Close(); int postCount = dir.List().Length; Assert.IsTrue(postCount < preCount); } } dir.Close(); } }
public virtual void TestErrorAfterApplyDeletes() { MockRAMDirectory.Failure failure = new AnonymousClassFailure(this); // create a couple of files System.String[] keywords = new System.String[] { "1", "2" }; System.String[] unindexed = new System.String[] { "Netherlands", "Italy" }; System.String[] unstored = new System.String[] { "Amsterdam has lots of bridges", "Venice has lots of canals" }; System.String[] text = new System.String[] { "Amsterdam", "Venice" }; for (int pass = 0; pass < 2; pass++) { bool autoCommit = (0 == pass); MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); modifier.SetUseCompoundFile(true); modifier.SetMaxBufferedDeleteTerms(2); dir.FailOn(failure.Reset()); for (int i = 0; i < keywords.Length; i++) { Document doc = new Document(); doc.Add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.Add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); modifier.AddDocument(doc); } // flush (and commit if ac) modifier.Optimize(); modifier.Commit(); // one of the two files hits Term term = new Term("city", "Amsterdam"); int hitCount = GetHitCount(dir, term); Assert.AreEqual(1, hitCount); // open the writer again (closed above) // delete the doc // max buf del terms is two, so this is buffered modifier.DeleteDocuments(term); // add a doc (needed for the !ac case; see below) // doc remains buffered Document doc2 = new Document(); modifier.AddDocument(doc2); // commit the changes, the buffered deletes, and the new doc // The failure object will fail on the first write after the del // file gets created when processing the buffered delete // in the ac case, this will be when writing the new segments // files so we really don't need the new doc, but it's harmless // in the !ac case, a new segments file won't be created but in // this case, creation of the cfs file happens next so we need // the doc (to test that it's okay that we don't lose deletes if // failing while creating the cfs file) bool failed = false; try { modifier.Commit(); } catch (System.IO.IOException ioe) { failed = true; } Assert.IsTrue(failed); // The commit above failed, so we need to retry it (which will // succeed, because the failure is a one-shot) modifier.Commit(); hitCount = GetHitCount(dir, term); // Make sure the delete was successfully flushed: Assert.AreEqual(0, hitCount); modifier.Close(); dir.Close(); } }
private void menuItemOptimize_Click(object sender, System.EventArgs e) { if (indexReader == null) { ShowStatus(resources.GetString("NoIndex")); return; } if (_readOnly) { ShowStatus(resources.GetString("Readonly")); return; } try { indexReader.Close(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); writer.SetUseCompoundFile(useCompound); long startSize = FilesTabPage.CalcTotalFileSize(dir); DateTime startTime = DateTime.Now; writer.Optimize(); DateTime endTime = DateTime.Now; long endSize = FilesTabPage.CalcTotalFileSize(dir); long deltaSize = startSize - endSize; String sign = deltaSize < 0 ? " Increased " : " Reduced "; String sizeMsg = sign + FilesTabPage.NormalizeSize(Math.Abs(deltaSize)) + FilesTabPage.NormalizeUnit(Math.Abs(deltaSize)); String timeMsg = ((TimeSpan)(endTime - startTime)).TotalMilliseconds + " ms"; ShowStatus(sizeMsg + " in " + timeMsg); tabFiles.ShowFiles(dir); writer.Close(); indexReader = IndexReader.Open(dir, true); InitOverview(); } catch (Exception exc) { ShowStatus(exc.Message); } }
public virtual void TestExpirationTimeDeletionPolicy() { double SECONDS = 2.0; bool autoCommit = false; bool useCompoundFile = true; Directory dir = new RAMDirectory(); ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(this, dir, SECONDS); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); long lastDeleteTime = 0; for (int i = 0; i < 7; i++) { // Record last time when writer performed deletes of // past commits lastDeleteTime = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } writer.Close(); // Make sure to sleep long enough so that some commit // points will be deleted: System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * (int) (1000.0 * (SECONDS / 5.0)))); } // First, make sure the policy in fact deleted something: Assert.IsTrue(policy.numDelete > 0, "no commits were deleted"); // Then simplistic check: just verify that the // segments_N's that still exist are in fact within SECONDS // seconds of the last one's mod time, and, that I can // open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); while (gen > 0) { try { IndexReader reader = IndexReader.Open(dir); reader.Close(); fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); long modTime = dir.FileModified(fileName); Assert.IsTrue(lastDeleteTime - modTime <= (SECONDS * 1000), "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted"); } catch (System.IO.IOException) { // OK break; } dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; } dir.Close(); }
public virtual void TestLazyPerformance() { System.String tmpIODir = SupportClass.AppSettings.Get("tempDir", ""); System.String userName = System.Environment.UserName; System.String path = tmpIODir + System.IO.Path.DirectorySeparatorChar.ToString() + "lazyDir" + userName; System.IO.FileInfo file = new System.IO.FileInfo(path); _TestUtil.RmDir(file); FSDirectory tmpDir = FSDirectory.Open(file); Assert.IsTrue(tmpDir != null); IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetUseCompoundFile(false); writer.AddDocument(testDoc); writer.Close(); Assert.IsTrue(fieldInfos != null); FieldsReader reader; long lazyTime = 0; long regularTime = 0; int length = 50; System.Collections.Hashtable lazyFieldNames = new System.Collections.Hashtable(); SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LARGE_LAZY_FIELD_KEY); SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(new System.Collections.Hashtable(), lazyFieldNames); for (int i = 0; i < length; i++) { reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos); Assert.IsTrue(reader != null); Assert.IsTrue(reader.Size() == 1); Document doc; doc = reader.Doc(0, null); //Load all of them Assert.IsTrue(doc != null, "doc is null and it shouldn't be"); Fieldable field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy() == false, "field is lazy"); System.String value_Renamed; long start; long finish; start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); //On my machine this was always 0ms. value_Renamed = field.StringValue(); finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); Assert.IsTrue(field != null, "field is null and it shouldn't be"); regularTime += (finish - start); reader.Close(); reader = null; doc = null; //Hmmm, are we still in cache??? System.GC.Collect(); reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos); doc = reader.Doc(0, fieldSelector); field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); Assert.IsTrue(field.IsLazy() == true, "field is not lazy"); start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); //On my machine this took around 50 - 70ms value_Renamed = field.StringValue(); finish = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be"); lazyTime += (finish - start); reader.Close(); } System.Console.Out.WriteLine("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads"); System.Console.Out.WriteLine("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads"); }
public virtual void TestErrorAfterApplyDeletes() { MockRAMDirectory.Failure failure = new AnonymousClassFailure(this); // create a couple of files System.String[] keywords = new System.String[]{"1", "2"}; System.String[] unindexed = new System.String[]{"Netherlands", "Italy"}; System.String[] unstored = new System.String[]{"Amsterdam has lots of bridges", "Venice has lots of canals"}; System.String[] text = new System.String[]{"Amsterdam", "Venice"}; for (int pass = 0; pass < 2; pass++) { bool autoCommit = (0 == pass); MockRAMDirectory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); modifier.SetUseCompoundFile(true); modifier.SetMaxBufferedDeleteTerms(2); dir.FailOn(failure.Reset()); for (int i = 0; i < keywords.Length; i++) { Document doc = new Document(); doc.Add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.Add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); modifier.AddDocument(doc); } // flush (and commit if ac) modifier.Optimize(); modifier.Commit(); // one of the two files hits Term term = new Term("city", "Amsterdam"); int hitCount = GetHitCount(dir, term); Assert.AreEqual(1, hitCount); // open the writer again (closed above) // delete the doc // max buf del terms is two, so this is buffered modifier.DeleteDocuments(term); // add a doc (needed for the !ac case; see below) // doc remains buffered Document doc2 = new Document(); modifier.AddDocument(doc2); // commit the changes, the buffered deletes, and the new doc // The failure object will fail on the first write after the del // file gets created when processing the buffered delete // in the ac case, this will be when writing the new segments // files so we really don't need the new doc, but it's harmless // in the !ac case, a new segments file won't be created but in // this case, creation of the cfs file happens next so we need // the doc (to test that it's okay that we don't lose deletes if // failing while creating the cfs file) bool failed = false; try { modifier.Commit(); } catch (System.IO.IOException ioe) { failed = true; } Assert.IsTrue(failed); // The commit above failed, so we need to retry it (which will // succeed, because the failure is a one-shot) modifier.Commit(); hitCount = GetHitCount(dir, term); // Make sure the delete was successfully flushed: Assert.AreEqual(0, hitCount); modifier.Close(); dir.Close(); } }
public virtual void TestSimpleCase() { System.String[] keywords = new System.String[]{"1", "2"}; System.String[] unindexed = new System.String[]{"Netherlands", "Italy"}; System.String[] unstored = new System.String[]{"Amsterdam has lots of bridges", "Venice has lots of canals"}; System.String[] text = new System.String[]{"Amsterdam", "Venice"}; for (int pass = 0; pass < 2; pass++) { bool autoCommit = (0 == pass); Directory dir = new MockRAMDirectory(); IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true); modifier.SetUseCompoundFile(true); modifier.SetMaxBufferedDeleteTerms(1); for (int i = 0; i < keywords.Length; i++) { Document doc = new Document(); doc.Add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.Add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.Add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.Add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); modifier.AddDocument(doc); } modifier.Optimize(); modifier.Commit(); Term term = new Term("city", "Amsterdam"); int hitCount = GetHitCount(dir, term); Assert.AreEqual(1, hitCount); modifier.DeleteDocuments(term); modifier.Commit(); hitCount = GetHitCount(dir, term); Assert.AreEqual(0, hitCount); modifier.Close(); dir.Close(); } }
public virtual void TestKeepLastNDeletionPolicy() { int N = 5; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; Directory dir = new RAMDirectory(); KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); for (int j = 0; j < N + 1; j++) { IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int i = 0; i < 17; i++) { AddDoc(writer); } writer.Optimize(); writer.Close(); } Assert.IsTrue(policy.numDelete > 0); Assert.AreEqual(N + 1, policy.numOnInit); if (autoCommit) { Assert.IsTrue(policy.numOnCommit > 1); } else { Assert.AreEqual(N + 1, policy.numOnCommit); } // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); reader.Close(); if (i == N) { Assert.Fail("should have failed on commits prior to last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
public virtual void TestDeleteLeftoverFiles() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true); writer.SetMaxBufferedDocs(10); int i; for (i = 0; i < 35; i++) { AddDoc(writer, i); } writer.SetUseCompoundFile(false); for (; i < 45; i++) { AddDoc(writer, i); } writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float) 1.5); reader.Close(); // Now, artificially create an extra .del file & extra // .s0 file: System.String[] files = dir.List(); /* for(int i=0;i<files.length;i++) { System.out.println(i + ": " + files[i]); } */ // The numbering of fields can vary depending on which // JRE is in use. On some JREs we see content bound to // field 0; on others, field 1. So, here we have to // figure out which field number corresponds to // "content", and then set our expected file names below // accordingly: CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm"); int contentFieldIndex = - 1; for (i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.Name_ForNUnitTest.Equals("content")) { contentFieldIndex = i; break; } } cfsReader.Close(); Assert.IsTrue(contentFieldIndex != - 1, "could not locate the 'content' field number in the _2.cfs segment"); System.String normSuffix = "s" + contentFieldIndex; // Create a bogus separate norms file for a // segment/field that actually has a separate norms file // already: CopyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix); // Create a bogus separate norms file for a // segment/field that actually has a separate norms file // already, using the "not compound file" extension: CopyFile(dir, "_2_1." + normSuffix, "_2_2.f" + contentFieldIndex); // Create a bogus separate norms file for a // segment/field that does not have a separate norms // file already: CopyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix); // Create a bogus separate norms file for a // segment/field that does not have a separate norms // file already using the "not compound file" extension: CopyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex); // Create a bogus separate del file for a // segment that already has a separate del file: CopyFile(dir, "_0_1.del", "_0_2.del"); // Create a bogus separate del file for a // segment that does not yet have a separate del file: CopyFile(dir, "_0_1.del", "_1_1.del"); // Create a bogus separate del file for a // non-existent segment: CopyFile(dir, "_0_1.del", "_188_1.del"); // Create a bogus segment file: CopyFile(dir, "_0.cfs", "_188.cfs"); // Create a bogus fnm file when the CFS already exists: CopyFile(dir, "_0.cfs", "_0.fnm"); // Create a deletable file: CopyFile(dir, "_0.cfs", "deletable"); // Create some old segments file: CopyFile(dir, "segments_a", "segments"); CopyFile(dir, "segments_a", "segments_2"); // Create a bogus cfs file shadowing a non-cfs segment: CopyFile(dir, "_2.cfs", "_3.cfs"); System.String[] filesPre = dir.List(); // Open & close a writer: it should delete the above 4 // files and nothing more: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false); writer.Close(); System.String[] files2 = dir.List(); dir.Close(); System.Array.Sort(files); System.Array.Sort(files2); if (!ArrayEquals(files, files2)) { Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n " + AsString(files) + "\n actual files:\n " + AsString(files2)); } }
public virtual void TestKeepLastNDeletionPolicyWithCreates() { int N = 10; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } // this is a commit when autoCommit=false: writer.Close(); IndexReader reader = IndexReader.Open(dir, policy); reader.DeleteDocument(3); reader.SetNorm(5, "content", 2.0F); IndexSearcher searcher = new IndexSearcher(reader); ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs; Assert.AreEqual(16, hits.Length); // this is a commit when autoCommit=false: reader.Close(); searcher.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); // This will not commit: there are no changes // pending because we opened for "create": writer.Close(); } Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit); if (!autoCommit) { Assert.AreEqual(3 * (N + 1), policy.numOnCommit); } IndexSearcher searcher2 = new IndexSearcher(dir); ScoreDoc[] hits2 = searcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(0, hits2.Length); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 0; for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); // Work backwards in commits on what the expected // count should be. Only check this in the // autoCommit false case: if (!autoCommit) { searcher2 = new IndexSearcher(reader); hits2 = searcher2.Search(query, null, 1000).scoreDocs; Assert.AreEqual(expectedCount, hits2.Length); searcher2.Close(); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } } reader.Close(); if (i == N) { Assert.Fail("should have failed on commits before last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
public virtual void TestKeepLastNDeletionPolicyWithCreates() { int N = 10; for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); Term searchTerm = new Term("content", "aaa"); Query query = new TermQuery(searchTerm); for (int i = 0; i < N + 1; i++) { writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } // this is a commit when autoCommit=false: writer.Close(); IndexReader reader = IndexReader.Open(dir, policy); reader.DeleteDocument(3); reader.SetNorm(5, "content", 2.0F); IndexSearcher searcher = new IndexSearcher(reader); Hits hits = searcher.Search(query); Assert.AreEqual(16, hits.Length()); // this is a commit when autoCommit=false: reader.Close(); searcher.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); // This will not commit: there are no changes // pending because we opened for "create": writer.Close(); } Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit); if (autoCommit) { Assert.IsTrue(policy.numOnCommit > 3 * (N + 1) - 1); } else { Assert.AreEqual(2 * (N + 1), policy.numOnCommit); } IndexSearcher searcher2 = new IndexSearcher(dir); Hits hits2 = searcher2.Search(query); Assert.AreEqual(0, hits2.Length()); // Simplistic check: just verify only the past N segments_N's still // exist, and, I can open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); int expectedCount = 0; for (int i = 0; i < N + 1; i++) { try { IndexReader reader = IndexReader.Open(dir); // Work backwards in commits on what the expected // count should be. Only check this in the // autoCommit false case: if (!autoCommit) { searcher2 = new IndexSearcher(reader); hits2 = searcher2.Search(query); Assert.AreEqual(expectedCount, hits2.Length()); searcher2.Close(); if (expectedCount == 0) { expectedCount = 16; } else if (expectedCount == 16) { expectedCount = 17; } else if (expectedCount == 17) { expectedCount = 0; } } reader.Close(); if (i == N) { Assert.Fail("should have failed on commits before last " + N); } } catch (System.IO.IOException e) { if (i != N) { throw e; } } if (i < N) { dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); } gen--; } dir.Close(); } }
/// <summary> /// Creates a new index in <c>directory</c>. Overwrites the existing index in that directory. /// </summary> /// <param name="directory">Path to index (will be created if not existing).</param> public IntranetIndexer(string directory) { writer = new IndexWriter(directory, new StandardAnalyzer(), true); writer.SetUseCompoundFile(true); }
/// <summary> Close the IndexReader and open an IndexWriter.</summary> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> LockObtainFailedException if another writer </throws> /// <summary> has this index open (<c>write.lock</c> could not /// be obtained) /// </summary> /// <throws> IOException if there is a low-level IO error </throws> protected internal virtual void CreateIndexWriter() { if (indexWriter == null) { if (indexReader != null) { indexReader.Close(); indexReader = null; } indexWriter = new IndexWriter(directory, analyzer, false, new IndexWriter.MaxFieldLength(maxFieldLength)); // IndexModifier cannot use ConcurrentMergeScheduler // because it synchronizes on the directory which can // cause deadlock indexWriter.SetMergeScheduler(new SerialMergeScheduler()); indexWriter.SetInfoStream(infoStream); indexWriter.SetUseCompoundFile(useCompoundFile); if (maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH) indexWriter.SetMaxBufferedDocs(maxBufferedDocs); indexWriter.SetMergeFactor(mergeFactor); } }
/// <summary> /// Creates the index in the specified path, using the corpusReader object /// as the documents feed /// </summary> /// <param name="corpusReader"></param> /// <param name="indexPath"></param> public void CreateIndex(WikiDumpReader corpusReader, string indexPath) { cr = corpusReader; var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29); writer = new IndexWriter(FSDirectory.Open(new DirectoryInfo(indexPath)), analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.SetUseCompoundFile(false); // This will be called whenever a document is read by the provided ICorpusReader corpusReader.OnDocument += corpusDoc => { if (corpusReader.AbortReading) return; // Blaaaah that's ugly. Make sure parsing doesn't stick us in an infinite loop var t = Task.Factory.StartNew(() => corpusDoc.AsHtml()); var timeout = t.Wait(TimeSpan.FromMinutes(2)); var content = timeout ? t.Result : string.Empty; // skip blank documents, they are worthless to us (even though they have a title we could index) if (string.IsNullOrEmpty(content)) return; // Create a new index document var doc = new Document(); doc.Add(new Field("Id", corpusDoc.Id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); // Add title field var titleField = new Field("Title", corpusDoc.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); titleField.SetBoost(3.0f); doc.Add(titleField); doc.Add(new Field("Content", content, Field.Store.COMPRESS, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); writer.AddDocument(doc); }; // Progress reporting corpusReader.OnProgress += (percentage, status, isRunning) => { var pi = new ProgressInfo { IsStillRunning = true, Status = string.Format("{0} ({1}%)", status, percentage) }; Invoke(new ProgressChangedDelegate(UpdateProgress), null, new ProgressChangedEventArgs(percentage, pi)); }; // Execute corpus reading, which will trigger indexing for each document found corpusReader.Read(); cr = null; // Clean up and close writer.SetUseCompoundFile(true); writer.Optimize(); writer.Close(); writer = null; var pi1 = new ProgressInfo { IsStillRunning = false, Status = "Ready" }; Invoke(new ProgressChangedDelegate(UpdateProgress), null, new ProgressChangedEventArgs(100, pi1)); }
public virtual void TestExpirationTimeDeletionPolicy() { double SECONDS = 2.0; bool autoCommit = false; bool useCompoundFile = true; Directory dir = new RAMDirectory(); ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(this, dir, SECONDS); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Close(); long lastDeleteTime = 0; for (int i = 0; i < 7; i++) { // Record last time when writer performed deletes of // past commits lastDeleteTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); for (int j = 0; j < 17; j++) { AddDoc(writer); } writer.Close(); // Make sure to sleep long enough so that some commit // points will be deleted: System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * (int)(1000.0 * (SECONDS / 5.0)))); } // First, make sure the policy in fact deleted something: Assert.IsTrue(policy.numDelete > 0, "no commits were deleted"); // Then simplistic check: just verify that the // segments_N's that still exist are in fact within SECONDS // seconds of the last one's mod time, and, that I can // open a reader on each: long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); while (gen > 0) { try { IndexReader reader = IndexReader.Open(dir); reader.Close(); fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen); long modTime = dir.FileModified(fileName); Assert.IsTrue(lastDeleteTime - modTime <= (SECONDS * 1000), "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted"); } catch (System.IO.IOException e) { // OK break; } dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; } dir.Close(); }
public virtual void TestSimpleCase() { // main directory Directory dir = new RAMDirectory(); // two auxiliary directories Directory aux = new RAMDirectory(); Directory aux2 = new RAMDirectory(); IndexWriter writer = null; writer = NewWriter(dir, true); // add 100 documents AddDocs(writer, 100); Assert.AreEqual(100, writer.DocCount()); writer.Close(); writer = NewWriter(aux, true); writer.SetUseCompoundFile(false); // use one without a compound file // add 40 documents in separate files AddDocs(writer, 40); Assert.AreEqual(40, writer.DocCount()); writer.Close(); writer = NewWriter(aux2, true); // add 40 documents in compound files AddDocs2(writer, 50); Assert.AreEqual(50, writer.DocCount()); writer.Close(); // test doc count before segments are merged writer = NewWriter(dir, false); Assert.AreEqual(100, writer.DocCount()); writer.AddIndexesNoOptimize(new Directory[] { aux, aux2 }); Assert.AreEqual(190, writer.DocCount()); writer.Close(); // make sure the old index is correct VerifyNumDocs(aux, 40); // make sure the new index is correct VerifyNumDocs(dir, 190); // now add another set in. Directory aux3 = new RAMDirectory(); writer = NewWriter(aux3, true); // add 40 documents AddDocs(writer, 40); Assert.AreEqual(40, writer.DocCount()); writer.Close(); // test doc count before segments are merged/index is optimized writer = NewWriter(dir, false); Assert.AreEqual(190, writer.DocCount()); writer.AddIndexesNoOptimize(new Directory[] { aux3 }); Assert.AreEqual(230, writer.DocCount()); writer.Close(); // make sure the new index is correct VerifyNumDocs(dir, 230); VerifyTermDocs(dir, new Term("content", "aaa"), 180); VerifyTermDocs(dir, new Term("content", "bbb"), 50); // now optimize it. writer = NewWriter(dir, false); writer.Optimize(); writer.Close(); // make sure the new index is correct VerifyNumDocs(dir, 230); VerifyTermDocs(dir, new Term("content", "aaa"), 180); VerifyTermDocs(dir, new Term("content", "bbb"), 50); // now add a single document Directory aux4 = new RAMDirectory(); writer = NewWriter(aux4, true); AddDocs2(writer, 1); writer.Close(); writer = NewWriter(dir, false); Assert.AreEqual(230, writer.DocCount()); writer.AddIndexesNoOptimize(new Directory[] { aux4 }); Assert.AreEqual(231, writer.DocCount()); writer.Close(); VerifyNumDocs(dir, 231); VerifyTermDocs(dir, new Term("content", "bbb"), 51); }
public virtual void TestKeepAllDeletionPolicy() { for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; // Never deletes a commit KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this); Directory dir = new RAMDirectory(); policy.dir = dir; IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); writer.SetMergeScheduler(new SerialMergeScheduler()); for (int i = 0; i < 107; i++) { AddDoc(writer); if (autoCommit && i % 10 == 0) { writer.Commit(); } } writer.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Optimize(); writer.Close(); Assert.AreEqual(2, policy.numOnInit); if (!autoCommit) { // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.numOnCommit); } // Test listCommits System.Collections.ICollection commits = IndexReader.ListCommits(dir); if (!autoCommit) { // 1 from opening writer + 2 from closing writer Assert.AreEqual(3, commits.Count); } // 1 from opening writer + 2 from closing writer + // 11 from calling writer.commit() explicitly above else { Assert.AreEqual(14, commits.Count); } System.Collections.IEnumerator it = commits.GetEnumerator(); // Make sure we can open a reader on each commit: while (it.MoveNext()) { IndexCommit commit = (IndexCommit)it.Current; IndexReader r = IndexReader.Open(commit, null); r.Close(); } // Simplistic check: just verify all segments_N's still // exist, and, I can open a reader on each: dir.DeleteFile(IndexFileNames.SEGMENTS_GEN); long gen = SegmentInfos.GetCurrentSegmentGeneration(dir); while (gen > 0) { IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen)); gen--; if (gen > 0) { // Now that we've removed a commit point, which // should have orphan'd at least one index file. // Open & close a writer and assert that it // actually removed something: int preCount = dir.ListAll().Length; writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); int postCount = dir.ListAll().Length; Assert.IsTrue(postCount < preCount); } } dir.Close(); } }
public virtual void TestKeepNoneOnInitDeletionPolicy() { for (int pass = 0; pass < 4; pass++) { bool autoCommit = pass < 2; bool useCompoundFile = (pass % 2) > 0; KeepNoneOnInitDeletionPolicy policy = new KeepNoneOnInitDeletionPolicy(this); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy); writer.SetMaxBufferedDocs(10); writer.SetUseCompoundFile(useCompoundFile); for (int i = 0; i < 107; i++) { AddDoc(writer); } writer.Close(); writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy); writer.SetUseCompoundFile(useCompoundFile); writer.Optimize(); writer.Close(); Assert.AreEqual(2, policy.numOnInit); if (autoCommit) { Assert.IsTrue(policy.numOnCommit > 2); } else { // If we are not auto committing then there should // be exactly 2 commits (one per close above): Assert.AreEqual(2, policy.numOnCommit); } // Simplistic check: just verify the index is in fact // readable: IndexReader reader = IndexReader.Open(dir); reader.Close(); dir.Close(); } }
public virtual void TestDeleteLeftoverFiles() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(10); int i; for (i = 0; i < 35; i++) { AddDoc(writer, i); } writer.SetUseCompoundFile(false); for (; i < 45; i++) { AddDoc(writer, i); } writer.Close(); // Delete one doc so we get a .del file: IndexReader reader = IndexReader.Open(dir); Term searchTerm = new Term("id", "7"); int delCount = reader.DeleteDocuments(searchTerm); Assert.AreEqual(1, delCount, "didn't delete the right number of documents"); // Set one norm so we get a .s0 file: reader.SetNorm(21, "content", (float)1.5); reader.Close(); // Now, artificially create an extra .del file & extra // .s0 file: System.String[] files = dir.ListAll(); /* * for(int j=0;j<files.length;j++) { * System.out.println(j + ": " + files[j]); * } */ // The numbering of fields can vary depending on which // JRE is in use. On some JREs we see content bound to // field 0; on others, field 1. So, here we have to // figure out which field number corresponds to // "content", and then set our expected file names below // accordingly: CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm"); int contentFieldIndex = -1; for (i = 0; i < fieldInfos.Size(); i++) { FieldInfo fi = fieldInfos.FieldInfo(i); if (fi.name_ForNUnit.Equals("content")) { contentFieldIndex = i; break; } } cfsReader.Close(); Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment"); System.String normSuffix = "s" + contentFieldIndex; // Create a bogus separate norms file for a // segment/field that actually has a separate norms file // already: CopyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix); // Create a bogus separate norms file for a // segment/field that actually has a separate norms file // already, using the "not compound file" extension: CopyFile(dir, "_2_1." + normSuffix, "_2_2.f" + contentFieldIndex); // Create a bogus separate norms file for a // segment/field that does not have a separate norms // file already: CopyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix); // Create a bogus separate norms file for a // segment/field that does not have a separate norms // file already using the "not compound file" extension: CopyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex); // Create a bogus separate del file for a // segment that already has a separate del file: CopyFile(dir, "_0_1.del", "_0_2.del"); // Create a bogus separate del file for a // segment that does not yet have a separate del file: CopyFile(dir, "_0_1.del", "_1_1.del"); // Create a bogus separate del file for a // non-existent segment: CopyFile(dir, "_0_1.del", "_188_1.del"); // Create a bogus segment file: CopyFile(dir, "_0.cfs", "_188.cfs"); // Create a bogus fnm file when the CFS already exists: CopyFile(dir, "_0.cfs", "_0.fnm"); // Create a deletable file: CopyFile(dir, "_0.cfs", "deletable"); // Create some old segments file: CopyFile(dir, "segments_3", "segments"); CopyFile(dir, "segments_3", "segments_2"); // Create a bogus cfs file shadowing a non-cfs segment: CopyFile(dir, "_2.cfs", "_3.cfs"); System.String[] filesPre = dir.ListAll(); // Open & close a writer: it should delete the above 4 // files and nothing more: writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED); writer.Close(); System.String[] files2 = dir.ListAll(); dir.Close(); System.Array.Sort(files); System.Array.Sort(files2); System.Collections.Hashtable dif = DifFiles(files, files2); if (!SupportClass.CollectionsHelper.Equals(files, files2)) { Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n " + AsString(files) + "\n actual files:\n " + AsString(files2) + "\ndif: " + SupportClass.CollectionsHelper.CollectionToString(dif)); } }