public static void Main(System.String[] args) { try { Directory directory = FSDirectory.GetDirectory("demo index", false); IndexReader reader = IndexReader.Open(directory); // Term term = new Term("path", "pizza"); // int deleted = reader.delete(term); // System.out.println("deleted " + deleted + // " documents containing " + term); for (int i = 0; i < reader.MaxDoc(); i++) { reader.Delete(i); } reader.Close(); directory.Close(); } catch (System.Exception e) { System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message); } }
/* Walk directory hierarchy in uid order, while keeping uid iterator from * /* existing index in sync. Mismatches indicate one of: (a) old documents to * /* be deleted; (b) unchanged documents, to be left alone; or (c) new * /* documents, to be indexed. */ private static void IndexDocs(System.IO.FileInfo file, System.String index, bool create) { if (!create) { // incrementally update reader = IndexReader.Open(index); // open existing index uidIter = reader.Terms(new Term("uid", "")); // init uid iterator IndexDocs(file); if (deleting) { // delete rest of stale docs while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid") { System.Console.Out.WriteLine("deleting " + HTMLDocument.UID2URL(uidIter.Term().Text())); reader.Delete(uidIter.Term()); uidIter.Next(); } deleting = false; } uidIter.Close(); // close uid iterator reader.Close(); // close existing index } // don't have exisiting else { IndexDocs(file); } }
private static void IndexDocs(System.IO.FileInfo file) { if (System.IO.Directory.Exists(file.FullName)) { // if a directory System.String[] files = System.IO.Directory.GetFileSystemEntries(file.FullName); // list its files System.Array.Sort(files); // sort the files for (int i = 0; i < files.Length; i++) { // recursively index them IndexDocs(new System.IO.FileInfo(files[i])); } } else if (file.FullName.EndsWith(".html") || file.FullName.EndsWith(".htm") || file.FullName.EndsWith(".txt")) { // index .txt files if (uidIter != null) { System.String uid = HTMLDocument.UID(file); // construct uid for doc while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) < 0) { if (deleting) { // delete stale docs System.Console.Out.WriteLine("deleting " + HTMLDocument.UID2URL(uidIter.Term().Text())); reader.Delete(uidIter.Term()); } uidIter.Next(); } if (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) == 0) { uidIter.Next(); // keep matching docs } else if (!deleting) { // add new docs Document doc = HTMLDocument.Document(file); System.Console.Out.WriteLine("adding " + doc.Get("url")); writer.AddDocument(doc); } } else { // creating a new index Document doc = HTMLDocument.Document(file); System.Console.Out.WriteLine("adding " + doc.Get("url")); writer.AddDocument(doc); // add docs unconditionally } } }
/* Walk directory hierarchy in uid order, while keeping uid iterator from /* existing index in sync. Mismatches indicate one of: (a) old documents to /* be deleted; (b) unchanged documents, to be left alone; or (c) new /* documents, to be indexed. */ private static void IndexDocs(System.IO.FileInfo file, System.String index, bool create) { if (!create) { // incrementally update reader = IndexReader.Open(index); // open existing index uidIter = reader.Terms(new Term("uid", "")); // init uid iterator IndexDocs(file); if (deleting) { // delete rest of stale docs while (uidIter.Term() != null && (System.Object) uidIter.Term().Field() == (System.Object) "uid") { System.Console.Out.WriteLine("deleting " + HTMLDocument.UID2URL(uidIter.Term().Text())); reader.Delete(uidIter.Term()); uidIter.Next(); } deleting = false; } uidIter.Close(); // close uid iterator reader.Close(); // close existing index } // don't have exisiting else IndexDocs(file); }
public virtual void TestEmptyIndex() { // creating two directories for indices Directory indexStoreA = new RAMDirectory(); Directory indexStoreB = new RAMDirectory(); // creating a document to store Document lDoc = new Document(); lDoc.Add(Field.Text("fulltext", "Once upon a time.....")); lDoc.Add(Field.Keyword("id", "doc1")); lDoc.Add(Field.Keyword("handle", "1")); // creating a document to store Document lDoc2 = new Document(); lDoc2.Add(Field.Text("fulltext", "in a galaxy far far away.....")); lDoc2.Add(Field.Keyword("id", "doc2")); lDoc2.Add(Field.Keyword("handle", "1")); // creating a document to store Document lDoc3 = new Document(); lDoc3.Add(Field.Text("fulltext", "a bizarre bug manifested itself....")); lDoc3.Add(Field.Keyword("id", "doc3")); lDoc3.Add(Field.Keyword("handle", "1")); // creating an index writer for the first index IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(), true); // creating an index writer for the second index, but writing nothing IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), true); //-------------------------------------------------------------------- // scenario 1 //-------------------------------------------------------------------- // writing the documents to the first index writerA.AddDocument(lDoc); writerA.AddDocument(lDoc2); writerA.AddDocument(lDoc3); writerA.Optimize(); writerA.Close(); // closing the second index writerB.Close(); // creating the query Query query = Lucene.Net.QueryParsers.QueryParser.Parse("handle:1", "fulltext", new StandardAnalyzer()); // building the searchables Searcher[] searchers = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers[0] = new IndexSearcher(indexStoreB); searchers[1] = new IndexSearcher(indexStoreA); // creating the multiSearcher Searcher mSearcher = GetMultiSearcherInstance(searchers); // performing the search Hits hits = mSearcher.Search(query); Assert.AreEqual(3, hits.Length()); try { // iterating over the hit documents for (int i = 0; i < hits.Length(); i++) { Document d = hits.Doc(i); } } catch (System.IndexOutOfRangeException e) { Assert.Fail("ArrayIndexOutOfBoundsException thrown: " + e.Message); System.Console.Error.WriteLine(e.Source); } finally { mSearcher.Close(); } //-------------------------------------------------------------------- // scenario 2 //-------------------------------------------------------------------- // adding one document to the empty index writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), false); writerB.AddDocument(lDoc); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers2 = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers2[0] = new IndexSearcher(indexStoreB); searchers2[1] = new IndexSearcher(indexStoreA); // creating the mulitSearcher Searcher mSearcher2 = GetMultiSearcherInstance(searchers2); // performing the same search Hits hits2 = mSearcher2.Search(query); Assert.AreEqual(4, hits2.Length()); try { // iterating over the hit documents for (int i = 0; i < hits2.Length(); i++) { // no exception should happen at this point Document d = hits2.Doc(i); } } catch (System.Exception e) { Assert.Fail("Exception thrown: " + e.Message); System.Console.Error.WriteLine(e.Source); } finally { mSearcher2.Close(); } //-------------------------------------------------------------------- // scenario 3 //-------------------------------------------------------------------- // deleting the document just added, this will cause a different exception to take place Term term = new Term("id", "doc1"); IndexReader readerB = IndexReader.Open(indexStoreB); readerB.Delete(term); readerB.Close(); // optimizing the index with the writer writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), false); writerB.Optimize(); writerB.Close(); // building the searchables Searcher[] searchers3 = new Searcher[2]; searchers3[0] = new IndexSearcher(indexStoreB); searchers3[1] = new IndexSearcher(indexStoreA); // creating the mulitSearcher Searcher mSearcher3 = GetMultiSearcherInstance(searchers3); // performing the same search Hits hits3 = mSearcher3.Search(query); Assert.AreEqual(3, hits3.Length()); try { // iterating over the hit documents for (int i = 0; i < hits3.Length(); i++) { Document d = hits3.Doc(i); } } catch (System.IO.IOException e) { Assert.Fail("IOException thrown: " + e.Message); System.Console.Error.WriteLine(e.Source); } finally { mSearcher3.Close(); } }