public static void Main(System.String[] args) { System.String usage = typeof(DeleteFiles) + " <unique_term>"; if (args.Length == 0) { System.Console.Error.WriteLine("Usage: " + usage); System.Environment.Exit(1); } try { Directory directory = FSDirectory.Open(new System.IO.FileInfo("index")); IndexReader reader = IndexReader.Open(directory, false); // we don't want read-only because we are about to delete Term term = new Term("path", args[0]); int deleted = reader.DeleteDocuments(term); System.Console.Out.WriteLine("deleted " + deleted + " documents containing " + term); // one can also delete documents by their internal id: /* * for (int i = 0; i < reader.maxDoc(); i++) { * System.out.println("Deleting document with id " + i); * reader.delete(i); * }*/ reader.Close(); directory.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
/* Walk directory hierarchy in uid order, while keeping uid iterator from * /* existing index in sync. Mismatches indicate one of: (a) old documents to * /* be deleted; (b) unchanged documents, to be left alone; or (c) new * /* documents, to be indexed. */ private static void IndexDocs(System.IO.FileInfo file, System.IO.FileInfo index, bool create) { if (!create) { // incrementally update reader = IndexReader.Open(FSDirectory.Open(index), false); // open existing index uidIter = reader.Terms(new Term("uid", "")); // init uid iterator IndexDocs(file); if (deleting) { // delete rest of stale docs while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid") { System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text())); reader.DeleteDocuments(uidIter.Term()); uidIter.Next(); } deleting = false; } uidIter.Close(); // close uid iterator reader.Close(); // close existing index } // don't have exisiting else { IndexDocs(file); } }
private static void IndexDocs(System.IO.FileInfo file) { if (System.IO.Directory.Exists(file.FullName)) { // if a directory System.String[] files = System.IO.Directory.GetFileSystemEntries(file.FullName); // list its files System.Array.Sort(files); // sort the files for (int i = 0; i < files.Length; i++) { // recursively index them IndexDocs(new System.IO.FileInfo(System.IO.Path.Combine(file.FullName, files[i]))); } } else if (file.FullName.EndsWith(".html") || file.FullName.EndsWith(".htm") || file.FullName.EndsWith(".txt")) { // index .txt files if (uidIter != null) { System.String uid = HTMLDocument.Uid(file); // construct uid for doc while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) < 0) { if (deleting) { // delete stale docs System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text())); reader.DeleteDocuments(uidIter.Term()); } uidIter.Next(); } if (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) == 0) { uidIter.Next(); // keep matching docs } else if (!deleting) { // add new docs Document doc = HTMLDocument.Document(file); System.Console.Out.WriteLine("adding " + doc.Get("path")); writer.AddDocument(doc); } } else { // creating a new index Document doc = HTMLDocument.Document(file); System.Console.Out.WriteLine("adding " + doc.Get("path")); writer.AddDocument(doc); // add docs unconditionally } } }
/* Walk directory hierarchy in uid order, while keeping uid iterator from /* existing index in sync. Mismatches indicate one of: (a) old documents to /* be deleted; (b) unchanged documents, to be left alone; or (c) new /* documents, to be indexed. */ private static void IndexDocs(System.IO.DirectoryInfo file, System.IO.DirectoryInfo index, bool create) { if (!create) { // incrementally update reader = IndexReader.Open(FSDirectory.Open(index), false); // open existing index uidIter = reader.Terms(new Term("uid", "")); // init uid iterator IndexDocs(file); if (deleting) { // delete rest of stale docs while (uidIter.Term() != null && (System.Object) uidIter.Term().Field == (System.Object) "uid") { System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text)); reader.DeleteDocuments(uidIter.Term()); uidIter.Next(); } deleting = false; } uidIter.Close(); // close uid iterator reader.Close(); // close existing index } // don't have exisiting else IndexDocs(file); }
public virtual void TestEmptyIndex() { // creating two directories for indices Directory indexStoreA = new MockRAMDirectory(); Directory indexStoreB = new MockRAMDirectory(); // creating a document to store Document lDoc = new Document(); lDoc.Add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc.Add(new Field("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc2 = new Document(); lDoc2.Add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc2.Add(new Field("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc2.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc3 = new Document(); lDoc3.Add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.ANALYZED)); lDoc3.Add(new Field("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc3.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating an index writer for the first index IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); // creating an index writer for the second index, but writing nothing IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); //-------------------------------------------------------------------- // scenario 1 //-------------------------------------------------------------------- // writing the documents to the first index writerA.AddDocument(lDoc); writerA.AddDocument(lDoc2); writerA.AddDocument(lDoc3); writerA.Optimize(); writerA.Close(); // closing the second index writerB.Close(); // creating the query QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(Util.Version.LUCENE_CURRENT)); Query query = parser.Parse("handle:1"); // building the searchables Searcher[] searchers = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers[0] = new IndexSearcher(indexStoreB, true); searchers[1] = new IndexSearcher(indexStoreA, true); // creating the multiSearcher Searcher mSearcher = GetMultiSearcherInstance(searchers); // performing the search ScoreDoc[] hits = mSearcher.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits.Length); // iterating over the hit documents for (int i = 0; i < hits.Length; i++) { mSearcher.Doc(hits[i].Doc); } mSearcher.Close(); //-------------------------------------------------------------------- // scenario 2 //-------------------------------------------------------------------- // adding one document to the empty index writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED); writerB.AddDocument(lDoc); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers2 = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers2[0] = new IndexSearcher(indexStoreB, true); searchers2[1] = new IndexSearcher(indexStoreA, true); // creating the mulitSearcher MultiSearcher mSearcher2 = GetMultiSearcherInstance(searchers2); // performing the same search ScoreDoc[] hits2 = mSearcher2.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(4, hits2.Length); // iterating over the hit documents for (int i = 0; i < hits2.Length; i++) { // no exception should happen at this point mSearcher2.Doc(hits2[i].Doc); } // test the subSearcher() method: Query subSearcherQuery = parser.Parse("id:doc1"); hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs; Assert.AreEqual(2, hits2.Length); Assert.AreEqual(0, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[0] Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[1].Doc)); // hit from searchers2[1] subSearcherQuery = parser.Parse("id:doc2"); hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs; Assert.AreEqual(1, hits2.Length); Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[1] mSearcher2.Close(); //-------------------------------------------------------------------- // scenario 3 //-------------------------------------------------------------------- // deleting the document just added, this will cause a different exception to take place Term term = new Term("id", "doc1"); IndexReader readerB = IndexReader.Open(indexStoreB, false); readerB.DeleteDocuments(term); readerB.Close(); // optimizing the index with the writer writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers3 = new Searcher[2]; searchers3[0] = new IndexSearcher(indexStoreB, true); searchers3[1] = new IndexSearcher(indexStoreA, true); // creating the mulitSearcher Searcher mSearcher3 = GetMultiSearcherInstance(searchers3); // performing the same search ScoreDoc[] hits3 = mSearcher3.Search(query, null, 1000).ScoreDocs; Assert.AreEqual(3, hits3.Length); // iterating over the hit documents for (int i = 0; i < hits3.Length; i++) { mSearcher3.Doc(hits3[i].Doc); } mSearcher3.Close(); indexStoreA.Close(); indexStoreB.Close(); }
public static void DeleteArticleIndexing(Guid articleId) { Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["IndexingArticle"])); reader.DeleteDocuments(new Lucene.Net.Index.Term("ArticleId", articleId.ToString())); reader.Close(); }