DeleteDocuments() public method

Deletes all documents that have a given term indexed. This is useful if one uses a document field to hold a unique ID string for the document. Then to delete such a document, one merely constructs a term with the appropriate field and the unique ID string as its text and passes it to this method. See DeleteDocument(int) for information about when this deletion will become effective. since this reader was opened has this index open (write.lock could not be obtained)
public DeleteDocuments ( Lucene.Net.Index.Term term ) : int
term Lucene.Net.Index.Term
return int
Example #1
0
 /// <summary> Deletes all documents containing <code>term</code>.
 /// This is useful if one uses a document field to hold a unique ID string for
 /// the document.  Then to delete such a document, one merely constructs a
 /// term with the appropriate field and the unique ID string as its text and
 /// passes it to this method.  Returns the number of documents deleted.
 /// </summary>
 /// <returns> the number of documents deleted
 /// </returns>
 /// <seealso cref="IndexReader#DeleteDocuments(Term)">
 /// </seealso>
 /// <throws>  IllegalStateException if the index is closed </throws>
 public virtual int DeleteDocuments(Term term)
 {
     lock (directory)
     {
         AssureOpen();
         CreateIndexReader();
         return(indexReader.DeleteDocuments(term));
     }
 }
Example #2
0
        /* Open pre-lockless index, add docs, do a delete &
         * setNorm, and search */
        public virtual void  ChangeIndexNoAdds(System.String dirName)
        {
            dirName = FullDir(dirName);

            Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));

            // make sure searching sees right # hits
            IndexSearcher searcher = new IndexSearcher(dir, true);

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length, "wrong number of hits");
            Document d = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d.Get("id"), "wrong first document");
            searcher.Close();

            // make sure we can do a delete & setNorm against this
            // pre-lockless segment:
            IndexReader reader     = IndexReader.Open(dir, false);
            Term        searchTerm = new Term("id", "6");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "wrong delete count");
            reader.SetNorm(22, "content", (float)2.0);
            reader.Close();

            // make sure they "took":
            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(33, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 33, searcher.IndexReader);
            searcher.Close();

            // optimize
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(33, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 33, searcher.IndexReader);
            searcher.Close();

            dir.Close();
        }
Example #3
0
        public virtual void  TestMergeDocCount0()
        {
            Directory dir = new RAMDirectory();

            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED, null);

            writer.SetMergePolicy(new LogDocMergePolicy(writer));
            writer.SetMaxBufferedDocs(10);
            writer.MergeFactor = 100;

            for (int i = 0; i < 250; i++)
            {
                AddDoc(writer);
                CheckInvariants(writer);
            }
            writer.Close();

            IndexReader reader = IndexReader.Open(dir, false, null);

            reader.DeleteDocuments(new Term("content", "aaa"), null);
            reader.Close();

            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED, null);
            writer.SetMergePolicy(new LogDocMergePolicy(writer));
            writer.SetMaxBufferedDocs(10);
            writer.MergeFactor = 5;

            // merge factor is changed, so check invariants after all adds
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer);
            }
            writer.Commit(null);
            ((ConcurrentMergeScheduler)writer.MergeScheduler).Sync();
            writer.Commit(null);
            CheckInvariants(writer);
            Assert.AreEqual(10, writer.MaxDoc());

            writer.Close();
        }
Example #4
0
        public virtual void  CreateIndex(System.String dirName, bool doCFS)
        {
            RmDir(dirName);

            dirName = FullDir(dirName);

            Directory   dir    = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.UseCompoundFile = doCFS;
            writer.SetMaxBufferedDocs(10);

            for (int i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count");
            writer.Close();

            // open fresh writer so we get no prx file in the added segment
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            writer.UseCompoundFile = doCFS;
            writer.SetMaxBufferedDocs(10);
            AddNoProxDoc(writer);
            writer.Close();

            // Delete one doc so we get a .del file:
            IndexReader reader     = IndexReader.Open(dir, false);
            Term        searchTerm = new Term("id", "7");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "didn't delete the right number of documents");

            // Set one norm so we get a .s0 file:
            reader.SetNorm(21, "content", (float)1.5);
            reader.Close();
        }
        public virtual void  TestMergeDocCount0()
        {
            Directory dir = new RAMDirectory();

            IndexWriter writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), true);

            writer.SetMergePolicy(new LogDocMergePolicy(writer));
            writer.SetMaxBufferedDocs(10);
            writer.SetMergeFactor(100);

            for (int i = 0; i < 250; i++)
            {
                AddDoc(writer);
                CheckInvariants(writer);
            }
            writer.Close();

            IndexReader reader = IndexReader.Open(dir);

            reader.DeleteDocuments(new Term("content", "aaa"));
            reader.Close();

            writer = new IndexWriter(dir, true, new WhitespaceAnalyzer(), false);
            writer.SetMergePolicy(new LogDocMergePolicy(writer));
            writer.SetMaxBufferedDocs(10);
            writer.SetMergeFactor(5);

            // merge factor is changed, so check invariants after all adds
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer);
            }
            CheckInvariants(writer);
            Assert.AreEqual(10, writer.DocCount());

            writer.Close();
        }
Example #6
0
        public virtual void TestExactFileNames()
        {
            System.String outputDir = "lucene.backwardscompat0.index";
            RmDir(outputDir);

            try
            {
                Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(FullDir(outputDir)));

                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true,
                                                     IndexWriter.MaxFieldLength.UNLIMITED);
                writer.SetRAMBufferSizeMB(16.0);
                for (int i = 0; i < 35; i++)
                {
                    AddDoc(writer, i);
                }
                Assert.AreEqual(35, writer.MaxDoc(), "wrong doc count");
                writer.Close();

                // Delete one doc so we get a .del file:
                IndexReader reader     = IndexReader.Open(dir, false);
                Term        searchTerm = new Term("id", "7");
                int         delCount   = reader.DeleteDocuments(searchTerm);
                Assert.AreEqual(1, delCount, "didn't delete the right number of documents");

                // Set one norm so we get a .s0 file:
                reader.SetNorm(21, "content", (float)1.5);
                reader.Close();

                // The numbering of fields can vary depending on which
                // JRE is in use.  On some JREs we see content bound to
                // field 0; on others, field 1.  So, here we have to
                // figure out which field number corresponds to
                // "content", and then set our expected file names below
                // accordingly:
                CompoundFileReader cfsReader  = new CompoundFileReader(dir, "_0.cfs");
                FieldInfos         fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
                int contentFieldIndex         = -1;
                for (int i = 0; i < fieldInfos.Size(); i++)
                {
                    FieldInfo fi = fieldInfos.FieldInfo(i);
                    if (fi.name_ForNUnit.Equals("content"))
                    {
                        contentFieldIndex = i;
                        break;
                    }
                }
                cfsReader.Close();
                Assert.IsTrue(contentFieldIndex != -1,
                              "could not locate the 'content' field number in the _2.cfs segment");

                // Now verify file names:
                System.String[] expected;
                expected = new System.String[]
                { "_0.cfs", "_0_1.del", "_0_1.s" + contentFieldIndex, "segments_3", "segments.gen" };

                System.String[] actual = dir.ListAll();
                System.Array.Sort(expected);
                System.Array.Sort(actual);
                if (!CollectionsHelper.Equals(expected, actual))
                {
                    Assert.Fail("incorrect filenames in index: expected:\n    " + AsString(expected) +
                                "\n  actual:\n    " + AsString(actual));
                }
                dir.Close();
            }
            finally
            {
                RmDir(outputDir);
            }
        }
Example #7
0
        /* Open pre-lockless index, add docs, do a delete &
         * setNorm, and search */
        public virtual void  ChangeIndexWithAdds(System.String dirName)
        {
            System.String origDirName = dirName;
            dirName = FullDir(dirName);

            Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));

            // open writer
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);

            // add 10 docs
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, 35 + i);
            }

            // make sure writer sees right total -- writer seems not to know about deletes in .del?
            int expected;

            if (Compare(origDirName, "24") < 0)
            {
                expected = 45;
            }
            else
            {
                expected = 46;
            }
            Assert.AreEqual(expected, writer.MaxDoc(), "wrong doc count");
            writer.Close();

            // make sure searching sees right # hits
            IndexSearcher searcher = new IndexSearcher(dir, true);

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Document   d    = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d.Get("id"), "wrong first document");
            TestHits(hits, 44, searcher.IndexReader);
            searcher.Close();

            // make sure we can do delete & setNorm against this
            // pre-lockless segment:
            IndexReader reader     = IndexReader.Open(dir, false);
            Term        searchTerm = new Term("id", "6");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "wrong delete count");
            reader.SetNorm(22, "content", (float)2.0);
            reader.Close();

            // make sure they "took":
            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(43, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 43, searcher.IndexReader);
            searcher.Close();

            // optimize
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(43, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            TestHits(hits, 43, searcher.IndexReader);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            searcher.Close();

            dir.Close();
        }
Example #8
0
        public virtual void  TestDeleteLeftoverFiles()
        {
            Directory dir = new RAMDirectory();

            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMaxBufferedDocs(10);
            int i;

            for (i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            writer.SetUseCompoundFile(false);
            for (; i < 45; i++)
            {
                AddDoc(writer, i);
            }
            writer.Close();

            // Delete one doc so we get a .del file:
            IndexReader reader     = IndexReader.Open(dir);
            Term        searchTerm = new Term("id", "7");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "didn't delete the right number of documents");

            // Set one norm so we get a .s0 file:
            reader.SetNorm(21, "content", (float)1.5);
            reader.Close();

            // Now, artificially create an extra .del file & extra
            // .s0 file:
            System.String[] files = dir.ListAll();

            /*
             * for(int j=0;j<files.length;j++) {
             * System.out.println(j + ": " + files[j]);
             * }
             */

            // The numbering of fields can vary depending on which
            // JRE is in use.  On some JREs we see content bound to
            // field 0; on others, field 1.  So, here we have to
            // figure out which field number corresponds to
            // "content", and then set our expected file names below
            // accordingly:
            CompoundFileReader cfsReader  = new CompoundFileReader(dir, "_2.cfs");
            FieldInfos         fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
            int contentFieldIndex         = -1;

            for (i = 0; i < fieldInfos.Size(); i++)
            {
                FieldInfo fi = fieldInfos.FieldInfo(i);
                if (fi.name_ForNUnit.Equals("content"))
                {
                    contentFieldIndex = i;
                    break;
                }
            }
            cfsReader.Close();
            Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment");

            System.String normSuffix = "s" + contentFieldIndex;

            // Create a bogus separate norms file for a
            // segment/field that actually has a separate norms file
            // already:
            CopyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix);

            // Create a bogus separate norms file for a
            // segment/field that actually has a separate norms file
            // already, using the "not compound file" extension:
            CopyFile(dir, "_2_1." + normSuffix, "_2_2.f" + contentFieldIndex);

            // Create a bogus separate norms file for a
            // segment/field that does not have a separate norms
            // file already:
            CopyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix);

            // Create a bogus separate norms file for a
            // segment/field that does not have a separate norms
            // file already using the "not compound file" extension:
            CopyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex);

            // Create a bogus separate del file for a
            // segment that already has a separate del file:
            CopyFile(dir, "_0_1.del", "_0_2.del");

            // Create a bogus separate del file for a
            // segment that does not yet have a separate del file:
            CopyFile(dir, "_0_1.del", "_1_1.del");

            // Create a bogus separate del file for a
            // non-existent segment:
            CopyFile(dir, "_0_1.del", "_188_1.del");

            // Create a bogus segment file:
            CopyFile(dir, "_0.cfs", "_188.cfs");

            // Create a bogus fnm file when the CFS already exists:
            CopyFile(dir, "_0.cfs", "_0.fnm");

            // Create a deletable file:
            CopyFile(dir, "_0.cfs", "deletable");

            // Create some old segments file:
            CopyFile(dir, "segments_3", "segments");
            CopyFile(dir, "segments_3", "segments_2");

            // Create a bogus cfs file shadowing a non-cfs segment:
            CopyFile(dir, "_2.cfs", "_3.cfs");

            System.String[] filesPre = dir.ListAll();

            // Open & close a writer: it should delete the above 4
            // files and nothing more:
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
            writer.Close();

            System.String[] files2 = dir.ListAll();
            dir.Close();

            System.Array.Sort(files);
            System.Array.Sort(files2);

            System.Collections.Hashtable dif = DifFiles(files, files2);

            if (!SupportClass.CollectionsHelper.Equals(files, files2))
            {
                Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n    " + AsString(files) + "\n  actual files:\n    " + AsString(files2) + "\ndif: " + SupportClass.CollectionsHelper.CollectionToString(dif));
            }
        }
Example #9
0
		// Apply buffered delete terms to this reader.
		private void  ApplyDeletes(System.Collections.Hashtable deleteTerms, IndexReader reader)
		{
			System.Collections.IEnumerator iter = new System.Collections.Hashtable(deleteTerms).GetEnumerator();
			while (iter.MoveNext())
			{
				System.Collections.DictionaryEntry entry = (System.Collections.DictionaryEntry) iter.Current;
				reader.DeleteDocuments((Term) entry.Key);
			}
		}
Example #10
0
        public virtual void  TestEmptyIndex()
        {
            // creating two directories for indices
            Directory indexStoreA = new MockRAMDirectory();
            Directory indexStoreB = new MockRAMDirectory();

            // creating a document to store
            Document lDoc = new Document();

            lDoc.Add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED));
            lDoc.Add(new Field("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED));
            lDoc.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

            // creating a document to store
            Document lDoc2 = new Document();

            lDoc2.Add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.ANALYZED));
            lDoc2.Add(new Field("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED));
            lDoc2.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

            // creating a document to store
            Document lDoc3 = new Document();

            lDoc3.Add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.ANALYZED));
            lDoc3.Add(new Field("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED));
            lDoc3.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

            // creating an index writer for the first index
            IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null);
            // creating an index writer for the second index, but writing nothing
            IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null);

            //--------------------------------------------------------------------
            // scenario 1
            //--------------------------------------------------------------------

            // writing the documents to the first index
            writerA.AddDocument(lDoc, null);
            writerA.AddDocument(lDoc2, null);
            writerA.AddDocument(lDoc3, null);
            writerA.Optimize(null);
            writerA.Close();

            // closing the second index
            writerB.Close();

            // creating the query
            QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(Util.Version.LUCENE_CURRENT));
            Query       query  = parser.Parse("handle:1");

            // building the searchables
            Searcher[] searchers = new Searcher[2];
            // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
            searchers[0] = new IndexSearcher(indexStoreB, true, null);
            searchers[1] = new IndexSearcher(indexStoreA, true, null);
            // creating the multiSearcher
            Searcher mSearcher = GetMultiSearcherInstance(searchers);

            // performing the search
            ScoreDoc[] hits = mSearcher.Search(query, null, 1000, null).ScoreDocs;

            Assert.AreEqual(3, hits.Length);

            // iterating over the hit documents
            for (int i = 0; i < hits.Length; i++)
            {
                mSearcher.Doc(hits[i].Doc, null);
            }
            mSearcher.Close();


            //--------------------------------------------------------------------
            // scenario 2
            //--------------------------------------------------------------------

            // adding one document to the empty index
            writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED, null);
            writerB.AddDocument(lDoc, null);
            writerB.Optimize(null);
            writerB.Close();

            // building the searchables
            Searcher[] searchers2 = new Searcher[2];
            // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
            searchers2[0] = new IndexSearcher(indexStoreB, true, null);
            searchers2[1] = new IndexSearcher(indexStoreA, true, null);
            // creating the mulitSearcher
            MultiSearcher mSearcher2 = GetMultiSearcherInstance(searchers2);

            // performing the same search
            ScoreDoc[] hits2 = mSearcher2.Search(query, null, 1000, null).ScoreDocs;

            Assert.AreEqual(4, hits2.Length);

            // iterating over the hit documents
            for (int i = 0; i < hits2.Length; i++)
            {
                // no exception should happen at this point
                mSearcher2.Doc(hits2[i].Doc, null);
            }

            // test the subSearcher() method:
            Query subSearcherQuery = parser.Parse("id:doc1");

            hits2 = mSearcher2.Search(subSearcherQuery, null, 1000, null).ScoreDocs;
            Assert.AreEqual(2, hits2.Length);
            Assert.AreEqual(0, mSearcher2.SubSearcher(hits2[0].Doc));             // hit from searchers2[0]
            Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[1].Doc));             // hit from searchers2[1]
            subSearcherQuery = parser.Parse("id:doc2");
            hits2            = mSearcher2.Search(subSearcherQuery, null, 1000, null).ScoreDocs;
            Assert.AreEqual(1, hits2.Length);
            Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[0].Doc));             // hit from searchers2[1]
            mSearcher2.Close();

            //--------------------------------------------------------------------
            // scenario 3
            //--------------------------------------------------------------------

            // deleting the document just added, this will cause a different exception to take place
            Term        term    = new Term("id", "doc1");
            IndexReader readerB = IndexReader.Open(indexStoreB, false, null);

            readerB.DeleteDocuments(term, null);
            readerB.Close();

            // optimizing the index with the writer
            writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED, null);
            writerB.Optimize(null);
            writerB.Close();

            // building the searchables
            Searcher[] searchers3 = new Searcher[2];

            searchers3[0] = new IndexSearcher(indexStoreB, true, null);
            searchers3[1] = new IndexSearcher(indexStoreA, true, null);
            // creating the mulitSearcher
            Searcher mSearcher3 = GetMultiSearcherInstance(searchers3);

            // performing the same search
            ScoreDoc[] hits3 = mSearcher3.Search(query, null, 1000, null).ScoreDocs;

            Assert.AreEqual(3, hits3.Length);

            // iterating over the hit documents
            for (int i = 0; i < hits3.Length; i++)
            {
                mSearcher3.Doc(hits3[i].Doc, null);
            }
            mSearcher3.Close();
            indexStoreA.Close();
            indexStoreB.Close();
        }