Beispiel #1
0
        public static void  Main(System.String[] args)
        {
            System.String usage = typeof(DeleteFiles) + " <unique_term>";
            if (args.Length == 0)
            {
                System.Console.Error.WriteLine("Usage: " + usage);
                System.Environment.Exit(1);
            }
            try
            {
                Directory   directory = FSDirectory.Open(new System.IO.FileInfo("index"));
                IndexReader reader    = IndexReader.Open(directory, false);              // we don't want read-only because we are about to delete

                Term term    = new Term("path", args[0]);
                int  deleted = reader.DeleteDocuments(term);

                System.Console.Out.WriteLine("deleted " + deleted + " documents containing " + term);

                // one can also delete documents by their internal id:

                /*
                 * for (int i = 0; i < reader.maxDoc(); i++) {
                 * System.out.println("Deleting document with id " + i);
                 * reader.delete(i);
                 * }*/

                reader.Close();
                directory.Close();
            }
            catch (System.Exception e)
            {
                System.Console.Out.WriteLine(" caught a " + e.GetType() + "\n with message: " + e.Message);
            }
        }
Beispiel #2
0
        /* Walk directory hierarchy in uid order, while keeping uid iterator from
         * /* existing index in sync.  Mismatches indicate one of: (a) old documents to
         * /* be deleted; (b) unchanged documents, to be left alone; or (c) new
         * /* documents, to be indexed.
         */

        private static void  IndexDocs(System.IO.FileInfo file, System.IO.FileInfo index, bool create)
        {
            if (!create)
            {
                // incrementally update

                reader  = IndexReader.Open(FSDirectory.Open(index), false);  // open existing index
                uidIter = reader.Terms(new Term("uid", ""));                 // init uid iterator

                IndexDocs(file);

                if (deleting)
                {
                    // delete rest of stale docs
                    while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid")
                    {
                        System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text()));
                        reader.DeleteDocuments(uidIter.Term());
                        uidIter.Next();
                    }
                    deleting = false;
                }

                uidIter.Close();                // close uid iterator
                reader.Close();                 // close existing index
            }
            // don't have exisiting
            else
            {
                IndexDocs(file);
            }
        }
Beispiel #3
0
        private static void  IndexDocs(System.IO.FileInfo file)
        {
            if (System.IO.Directory.Exists(file.FullName))
            {
                // if a directory
                System.String[] files = System.IO.Directory.GetFileSystemEntries(file.FullName); // list its files
                System.Array.Sort(files);                                                        // sort the files
                for (int i = 0; i < files.Length; i++)
                {
                    // recursively index them
                    IndexDocs(new System.IO.FileInfo(System.IO.Path.Combine(file.FullName, files[i])));
                }
            }
            else if (file.FullName.EndsWith(".html") || file.FullName.EndsWith(".htm") || file.FullName.EndsWith(".txt"))
            {
                // index .txt files

                if (uidIter != null)
                {
                    System.String uid = HTMLDocument.Uid(file);                     // construct uid for doc

                    while (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) < 0)
                    {
                        if (deleting)
                        {
                            // delete stale docs
                            System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text()));
                            reader.DeleteDocuments(uidIter.Term());
                        }
                        uidIter.Next();
                    }
                    if (uidIter.Term() != null && (System.Object)uidIter.Term().Field() == (System.Object) "uid" && String.CompareOrdinal(uidIter.Term().Text(), uid) == 0)
                    {
                        uidIter.Next();                         // keep matching docs
                    }
                    else if (!deleting)
                    {
                        // add new docs
                        Document doc = HTMLDocument.Document(file);
                        System.Console.Out.WriteLine("adding " + doc.Get("path"));
                        writer.AddDocument(doc);
                    }
                }
                else
                {
                    // creating a new index
                    Document doc = HTMLDocument.Document(file);
                    System.Console.Out.WriteLine("adding " + doc.Get("path"));
                    writer.AddDocument(doc);                     // add docs unconditionally
                }
            }
        }
Beispiel #4
0
		/* Walk directory hierarchy in uid order, while keeping uid iterator from
		/* existing index in sync.  Mismatches indicate one of: (a) old documents to
		/* be deleted; (b) unchanged documents, to be left alone; or (c) new
		/* documents, to be indexed.
		*/

        private static void IndexDocs(System.IO.DirectoryInfo file, System.IO.DirectoryInfo index, bool create)
		{
			if (!create)
			{
				// incrementally update
				
				reader = IndexReader.Open(FSDirectory.Open(index), false); // open existing index
				uidIter = reader.Terms(new Term("uid", "")); // init uid iterator
				
				IndexDocs(file);
				
				if (deleting)
				{
					// delete rest of stale docs
					while (uidIter.Term() != null && (System.Object) uidIter.Term().Field == (System.Object) "uid")
					{
						System.Console.Out.WriteLine("deleting " + HTMLDocument.Uid2url(uidIter.Term().Text));
						reader.DeleteDocuments(uidIter.Term());
						uidIter.Next();
					}
					deleting = false;
				}
				
				uidIter.Close(); // close uid iterator
				reader.Close(); // close existing index
			}
			// don't have exisiting
			else
				IndexDocs(file);
		}
        public virtual void  TestEmptyIndex()
        {
            // creating two directories for indices
            Directory indexStoreA = new MockRAMDirectory();
            Directory indexStoreB = new MockRAMDirectory();

            // creating a document to store
            Document lDoc = new Document();

            lDoc.Add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED));
            lDoc.Add(new Field("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED));
            lDoc.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

            // creating a document to store
            Document lDoc2 = new Document();

            lDoc2.Add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.ANALYZED));
            lDoc2.Add(new Field("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED));
            lDoc2.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

            // creating a document to store
            Document lDoc3 = new Document();

            lDoc3.Add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.ANALYZED));
            lDoc3.Add(new Field("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED));
            lDoc3.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

            // creating an index writer for the first index
            IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
            // creating an index writer for the second index, but writing nothing
            IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);

            //--------------------------------------------------------------------
            // scenario 1
            //--------------------------------------------------------------------

            // writing the documents to the first index
            writerA.AddDocument(lDoc);
            writerA.AddDocument(lDoc2);
            writerA.AddDocument(lDoc3);
            writerA.Optimize();
            writerA.Close();

            // closing the second index
            writerB.Close();

            // creating the query
            QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(Util.Version.LUCENE_CURRENT));
            Query       query  = parser.Parse("handle:1");

            // building the searchables
            Searcher[] searchers = new Searcher[2];
            // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
            searchers[0] = new IndexSearcher(indexStoreB, true);
            searchers[1] = new IndexSearcher(indexStoreA, true);
            // creating the multiSearcher
            Searcher mSearcher = GetMultiSearcherInstance(searchers);

            // performing the search
            ScoreDoc[] hits = mSearcher.Search(query, null, 1000).ScoreDocs;

            Assert.AreEqual(3, hits.Length);

            // iterating over the hit documents
            for (int i = 0; i < hits.Length; i++)
            {
                mSearcher.Doc(hits[i].Doc);
            }
            mSearcher.Close();


            //--------------------------------------------------------------------
            // scenario 2
            //--------------------------------------------------------------------

            // adding one document to the empty index
            writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED);
            writerB.AddDocument(lDoc);
            writerB.Optimize();
            writerB.Close();

            // building the searchables
            Searcher[] searchers2 = new Searcher[2];
            // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
            searchers2[0] = new IndexSearcher(indexStoreB, true);
            searchers2[1] = new IndexSearcher(indexStoreA, true);
            // creating the mulitSearcher
            MultiSearcher mSearcher2 = GetMultiSearcherInstance(searchers2);

            // performing the same search
            ScoreDoc[] hits2 = mSearcher2.Search(query, null, 1000).ScoreDocs;

            Assert.AreEqual(4, hits2.Length);

            // iterating over the hit documents
            for (int i = 0; i < hits2.Length; i++)
            {
                // no exception should happen at this point
                mSearcher2.Doc(hits2[i].Doc);
            }

            // test the subSearcher() method:
            Query subSearcherQuery = parser.Parse("id:doc1");

            hits2 = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits2.Length);
            Assert.AreEqual(0, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[0]
            Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[1].Doc)); // hit from searchers2[1]
            subSearcherQuery = parser.Parse("id:doc2");
            hits2            = mSearcher2.Search(subSearcherQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits2.Length);
            Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[0].Doc)); // hit from searchers2[1]
            mSearcher2.Close();

            //--------------------------------------------------------------------
            // scenario 3
            //--------------------------------------------------------------------

            // deleting the document just added, this will cause a different exception to take place
            Term        term    = new Term("id", "doc1");
            IndexReader readerB = IndexReader.Open(indexStoreB, false);

            readerB.DeleteDocuments(term);
            readerB.Close();

            // optimizing the index with the writer
            writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED);
            writerB.Optimize();
            writerB.Close();

            // building the searchables
            Searcher[] searchers3 = new Searcher[2];

            searchers3[0] = new IndexSearcher(indexStoreB, true);
            searchers3[1] = new IndexSearcher(indexStoreA, true);
            // creating the mulitSearcher
            Searcher mSearcher3 = GetMultiSearcherInstance(searchers3);

            // performing the same search
            ScoreDoc[] hits3 = mSearcher3.Search(query, null, 1000).ScoreDocs;

            Assert.AreEqual(3, hits3.Length);

            // iterating over the hit documents
            for (int i = 0; i < hits3.Length; i++)
            {
                mSearcher3.Doc(hits3[i].Doc);
            }
            mSearcher3.Close();
            indexStoreA.Close();
            indexStoreB.Close();
        }
Beispiel #6
0
 public static void DeleteArticleIndexing(Guid articleId)
 {
     Lucene.Net.Index.IndexReader reader = Lucene.Net.Index.IndexReader.Open(HttpContext.Current.Server.MapPath(ConfigurationManager.AppSettings["IndexingArticle"]));
     reader.DeleteDocuments(new Lucene.Net.Index.Term("ArticleId", articleId.ToString()));
     reader.Close();
 }