Flush() 공개 메소드

Flush all in-memory buffered udpates (adds and deletes) to the Directory.
public Flush ( bool triggerMerge, bool flushDocStores, bool flushDeletes ) : void
triggerMerge bool if true, we may merge segments (if /// deletes or docs were flushed) if necessary ///
flushDocStores bool if false we are allowed to keep /// doc stores open to share with the next segment ///
flushDeletes bool whether pending deletes should also /// be flushed ///
리턴 void
예제 #1
0
        /// <summary> Tests creating a segment, then check to insure the segment can be seen via
        /// IW.getReader
        /// </summary>
        public virtual void  DoTestIndexWriterReopenSegment(bool optimize)
        {
            Directory   dir1   = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer.SetInfoStream(infoStream);
            IndexReader r1 = writer.GetReader();

            Assert.AreEqual(0, r1.MaxDoc());
            CreateIndexNoClose(false, "index1", writer);
            writer.Flush(!optimize, true, true);

            IndexReader iwr1 = writer.GetReader();

            Assert.AreEqual(100, iwr1.MaxDoc());

            IndexReader r2 = writer.GetReader();

            Assert.AreEqual(r2.MaxDoc(), 100);
            // add 100 documents
            for (int x = 10000; x < 10000 + 100; x++)
            {
                Document d = CreateDocument(x, "index1", 5);
                writer.AddDocument(d);
            }
            writer.Flush(false, true, true);
            // verify the reader was reopened internally
            IndexReader iwr2 = writer.GetReader();

            Assert.IsTrue(iwr2 != r1);
            Assert.AreEqual(200, iwr2.MaxDoc());
            // should have flushed out a segment
            IndexReader r3 = writer.GetReader();

            Assert.IsTrue(r2 != r3);
            Assert.AreEqual(200, r3.MaxDoc());

            // dec ref the readers rather than close them because
            // closing flushes changes to the writer
            r1.Close();
            iwr1.Close();
            r2.Close();
            r3.Close();
            iwr2.Close();
            writer.Close();

            // test whether the changes made it to the directory
            writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            IndexReader w2r1 = writer.GetReader();

            // insure the deletes were actually flushed to the directory
            Assert.AreEqual(200, w2r1.MaxDoc());
            w2r1.Close();
            writer.Close();

            dir1.Close();
        }
예제 #2
0
        public virtual void  TestTokenReuse()
        {
            Analyzer analyzer = new AnonymousClassAnalyzer1(this);

            IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            Document doc = new Document();

            doc.Add(new Field("f1", "a 5 a a", Field.Store.YES, Field.Index.ANALYZED));

            writer.AddDocument(doc);
            writer.Flush();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            SegmentReader reader = SegmentReader.Get(info);

            TermPositions termPositions = reader.TermPositions(new Term("f1", "a"));

            Assert.IsTrue(termPositions.Next());
            int freq = termPositions.Freq();

            Assert.AreEqual(3, freq);
            Assert.AreEqual(0, termPositions.NextPosition());
            Assert.AreEqual(true, termPositions.IsPayloadAvailable());
            Assert.AreEqual(6, termPositions.NextPosition());
            Assert.AreEqual(false, termPositions.IsPayloadAvailable());
            Assert.AreEqual(7, termPositions.NextPosition());
            Assert.AreEqual(false, termPositions.IsPayloadAvailable());
        }
예제 #3
0
        public void AddOrUpdateDocuments(params CmsDocument[] documents)
        {
            DeleteDocuments(documents);
            using (var writer = new IndexWriter(_Directory, _Analyzer, false, new IndexWriter.MaxFieldLength(1024 * 1024 * 4)))
            {
                foreach (var document in documents)
                {
                    if (document.Id == Guid.Empty)
                        throw new ArgumentOutOfRangeException("Attempt to index transient document: " + document.Title);

                    var doc = new Document();
                    doc.Add(new Field(CmsDocumentField.Id.ToString(), document.Id.ToString("b"), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    if (!String.IsNullOrEmpty(document.Title))
                        doc.Add(new Field(CmsDocumentField.Title.ToString(), document.Title, Field.Store.YES, Field.Index.ANALYZED));
                    foreach (var tag in document.Tags)
                    {
                        doc.Add(new Field(CmsDocumentField.Tag.ToString(), tag, Field.Store.YES, Field.Index.ANALYZED));
                    }
                    foreach (var partValue in document.Parts.Select(p => p.Value))
                    {
                        if(!String.IsNullOrEmpty(partValue))
                            doc.Add(new Field(CmsDocumentField.Value.ToString(), partValue, Field.Store.NO, Field.Index.ANALYZED));
                    }
                    writer.AddDocument(doc);
                }
                writer.Flush(true, true, true);
            }
        }
예제 #4
0
		public virtual void  TestSimpleSkip()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Term term = new Term("test", "a");
			for (int i = 0; i < 5000; i++)
			{
				Document d1 = new Document();
				d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED));
				writer.AddDocument(d1);
			}
			writer.Flush();
			writer.Optimize();
			writer.Close();
			
			IndexReader reader = SegmentReader.GetOnlySegmentReader(dir);
			SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions();
			tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit);
			
			for (int i = 0; i < 2; i++)
			{
				counter = 0;
				tp.Seek(term);
				
				CheckSkipTo(tp, 14, 185); // no skips
				CheckSkipTo(tp, 17, 190); // one skip on level 0
				CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0
				
				// this test would fail if we had only one skip level,
				// because than more bytes would be read from the freqStream
				CheckSkipTo(tp, 4800, 250); // one skip on level 2
			}
		}
예제 #5
0
        public virtual void  TestPreAnalyzedField()
        {
            IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            Document    doc    = new Document();

            doc.Add(new Field("preanalyzed", new AnonymousClassTokenStream(this), TermVector.NO));

            writer.AddDocument(doc);
            writer.Flush();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            SegmentReader reader = SegmentReader.Get(info);

            TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1"));

            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(0, termPositions.NextPosition());

            termPositions.Seek(new Term("preanalyzed", "term2"));
            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(2, termPositions.Freq());
            Assert.AreEqual(1, termPositions.NextPosition());
            Assert.AreEqual(3, termPositions.NextPosition());

            termPositions.Seek(new Term("preanalyzed", "term3"));
            Assert.IsTrue(termPositions.Next());
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(2, termPositions.NextPosition());
        }
예제 #6
0
        public virtual void  TestSimpleSkip()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            Term         term   = new Term("test", "a");

            for (int i = 0; i < 5000; i++)
            {
                Document d1 = new Document();
                d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(d1);
            }
            writer.Flush();
            writer.Optimize();
            writer.Close();

            IndexReader          reader = SegmentReader.GetOnlySegmentReader(dir);
            SegmentTermPositions tp     = (SegmentTermPositions)reader.TermPositions();

            tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit);

            for (int i = 0; i < 2; i++)
            {
                counter = 0;
                tp.Seek(term);

                CheckSkipTo(tp, 14, 185);                 // no skips
                CheckSkipTo(tp, 17, 190);                 // one skip on level 0
                CheckSkipTo(tp, 287, 200);                // one skip on level 1, two on level 0

                // this test would fail if we had only one skip level,
                // because than more bytes would be read from the freqStream
                CheckSkipTo(tp, 4800, 250);                 // one skip on level 2
            }
        }
예제 #7
0
        public virtual void  TestPositionIncrementGap()
        {
            Analyzer analyzer = new AnonymousClassAnalyzer(this);

            IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            Document doc = new Document();

            doc.Add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.ANALYZED));

            writer.AddDocument(doc);
            writer.Flush();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            SegmentReader reader = SegmentReader.Get(info);

            TermPositions termPositions = reader.TermPositions(new Term("repeated", "repeated"));

            Assert.IsTrue(termPositions.Next());
            int freq = termPositions.Freq();

            Assert.AreEqual(2, freq);
            Assert.AreEqual(0, termPositions.NextPosition());
            Assert.AreEqual(502, termPositions.NextPosition());
        }
예제 #8
0
        public virtual void  TestDeleteFromIndexWriter()
        {
            bool optimize = true;

            Directory   dir1   = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null);

            writer.ReaderTermsIndexDivisor = 2;
            writer.SetInfoStream(infoStream, null);
            // create the index
            CreateIndexNoClose(!optimize, "index1", writer);
            writer.Flush(false, true, true, null);
            // get a reader
            IndexReader r1 = writer.GetReader(null);

            System.String id10 = r1.Document(10, null).GetField("id").StringValue(null);

            // deleted IW docs should not show up in the next getReader
            writer.DeleteDocuments(null, new Term("id", id10));
            IndexReader r2 = writer.GetReader(null);

            Assert.AreEqual(1, Count(new Term("id", id10), r1));
            Assert.AreEqual(0, Count(new Term("id", id10), r2));

            System.String id50 = r1.Document(50, null).GetField("id").StringValue(null);
            Assert.AreEqual(1, Count(new Term("id", id50), r1));

            writer.DeleteDocuments(null, new Term("id", id50));

            IndexReader r3 = writer.GetReader(null);

            Assert.AreEqual(0, Count(new Term("id", id10), r3));
            Assert.AreEqual(0, Count(new Term("id", id50), r3));

            System.String id75 = r1.Document(75, null).GetField("id").StringValue(null);
            writer.DeleteDocuments(null, new TermQuery(new Term("id", id75)));
            IndexReader r4 = writer.GetReader(null);

            Assert.AreEqual(1, Count(new Term("id", id75), r3));
            Assert.AreEqual(0, Count(new Term("id", id75), r4));

            r1.Close();
            r2.Close();
            r3.Close();
            r4.Close();
            writer.Close();

            // reopen the writer to verify the delete made it to the directory
            writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null);
            writer.SetInfoStream(infoStream, null);
            IndexReader w2r1 = writer.GetReader(null);

            Assert.AreEqual(0, Count(new Term("id", id10), w2r1));
            w2r1.Close();
            writer.Close();
            dir1.Close();
        }
예제 #9
0
        public virtual void TestFlushExceptions()
        {
            MockRAMDirectory directory = new MockRAMDirectory();
            FailOnlyOnFlush  failure   = new FailOnlyOnFlush();

            directory.FailOn(failure);

            IndexWriter writer           = new IndexWriter(directory, ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);
            ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();

            writer.SetMergeScheduler(cms);
            writer.SetMaxBufferedDocs(2);
            Document doc     = new Document();
            Field    idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);

            doc.Add(idField);
            int extraCount = 0;

            for (int i = 0; i < 10; i++)
            {
                for (int j = 0; j < 20; j++)
                {
                    idField.SetValue(System.Convert.ToString(i * 20 + j));
                    writer.AddDocument(doc);
                }

                while (true)
                {
                    // must cycle here because sometimes the merge flushes
                    // the doc we just added and so there's nothing to
                    // flush, and we don't hit the exception
                    writer.AddDocument(doc);
                    failure.SetDoFail();
                    try
                    {
                        writer.Flush(true, false, true);
                        if (failure.hitExc)
                        {
                            Assert.Fail("failed to hit IOException");
                        }
                        extraCount++;
                    }
                    catch (System.IO.IOException ioe)
                    {
                        failure.ClearDoFail();
                        break;
                    }
                }
            }

            writer.Close();
            IndexReader reader = IndexReader.Open(directory, true);

            Assert.AreEqual(200 + extraCount, reader.NumDocs());
            reader.Close();
            directory.Close();
        }
예제 #10
0
        private SegmentInfo IndexDoc(IndexWriter writer, System.String fileName)
        {
            System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(workDir.FullName, fileName));
            Document           doc  = FileDocument.Document(file);

            writer.AddDocument(doc);
            writer.Flush();
            return(writer.NewestSegment());
        }
예제 #11
0
        private void btnFolder_Click(object sender, EventArgs e)
        {
            FolderBrowserDialog dia = new FolderBrowserDialog();
            DialogResult res = dia.ShowDialog();
            if (res != System.Windows.Forms.DialogResult.OK)
            {
                return;
            }

            FSDirectory dir = FSDirectory.GetDirectory(Environment.CurrentDirectory + "\\LuceneIndex");
            //Lucene.Net.Store.RAMDirectory dir = new RAMDirectory();
            Lucene.Net.Analysis.Standard.StandardAnalyzer an = new Lucene.Net.Analysis.Standard.StandardAnalyzer();
            IndexWriter wr = new IndexWriter(dir, an,true);
            IStemmer stemmer = new EnglishStemmer();
            DirectoryInfo diMain = new DirectoryInfo(dia.SelectedPath);
            foreach(FileInfo fi in diMain.GetFiles()){
                Document doc = new Document();
                doc.Add(new Field("title", fi.Name,Field.Store.YES, Field.Index.NO));
                //doc.Add(new Field("text", File.ReadAllText(fi.FullName),Field.Store.YES, Field.Index.TOKENIZED,Field.TermVector.YES));
                doc.Add(new Field("text", PerformStemming(stemmer,NLPToolkit.Tokenizer.TokenizeNow(File.ReadAllText(fi.FullName)).ToArray()), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
                wr.AddDocument(doc);
            }
            wr.Optimize();
            wr.Flush();
            wr.Close();
            dir.Close();

            IndexReader reader = IndexReader.Open(dir);
            for (int i = 0; i < reader.MaxDoc(); i++)
            {
                if (reader.IsDeleted(i))
                    continue;

                Document doc = reader.Document(i);
                String docId = doc.Get("docId");
                foreach (TermFreqVector vector in reader.GetTermFreqVectors(i))
                {
                    foreach(string term in vector.GetTerms()){
                        Console.WriteLine(term);
                    }
                }
                // do something with docId here...
            }
            //IndexSearcher search = new IndexSearcher(wr.GetReader());

            //MoreLikeThis mlt = new MoreLikeThis(wr.GetReader());
            //FileInfo fitarget = new FileInfo(@"C:\Users\peacemaker\Desktop\TestNoBitcoin\test.txt");
            //Query query = mlt.Like(fitarget);

            //var hits = search.Search(query, int.MaxValue);
            //foreach (ScoreDoc doc in hits.ScoreDocs)
            //{
            //    textBox1.Text += doc.Score + Environment.NewLine;
            //}
        }
예제 #12
0
        public virtual void  TestAddDocument()
        {
            Document testDoc = new Document();

            DocHelper.SetupDoc(testDoc);
            Analyzer    analyzer = new WhitespaceAnalyzer();
            IndexWriter writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.AddDocument(testDoc);
            writer.Flush();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            //After adding the document, we should be able to read it back in
            SegmentReader reader = SegmentReader.Get(info);

            Assert.IsTrue(reader != null);
            Document doc = reader.Document(0);

            Assert.IsTrue(doc != null);

            //System.out.println("Document: " + doc);
            Fieldable[] fields = doc.GetFields("textField2");
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_2_TEXT));
            Assert.IsTrue(fields[0].IsTermVectorStored());

            fields = doc.GetFields("textField1");
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_1_TEXT));
            Assert.IsFalse(fields[0].IsTermVectorStored());

            fields = doc.GetFields("keyField");
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.KEYWORD_TEXT));

            fields = doc.GetFields(DocHelper.NO_NORMS_KEY);
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.NO_NORMS_TEXT));

            fields = doc.GetFields(DocHelper.TEXT_FIELD_3_KEY);
            Assert.IsTrue(fields != null && fields.Length == 1);
            Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_3_TEXT));

            // test that the norms are not present in the segment if
            // omitNorms is true
            for (int i = 0; i < reader.core_ForNUnit.fieldInfos_ForNUnit.Size(); i++)
            {
                FieldInfo fi = reader.core_ForNUnit.fieldInfos_ForNUnit.FieldInfo(i);
                if (fi.isIndexed_ForNUnit)
                {
                    Assert.IsTrue(fi.omitNorms_ForNUnit == !reader.HasNorms(fi.name_ForNUnit));
                }
            }
        }
예제 #13
0
        public virtual void  TestNoWaitClose()
        {
            RAMDirectory directory = new MockRAMDirectory();

            Document doc     = new Document();
            Field    idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);

            doc.Add(idField);

            for (int pass = 0; pass < 2; pass++)
            {
                bool        autoCommit = pass == 0;
                IndexWriter writer     = new IndexWriter(directory, autoCommit, ANALYZER, true);

                for (int iter = 0; iter < 10; iter++)
                {
                    ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
                    writer.SetMergeScheduler(cms);
                    writer.SetMaxBufferedDocs(2);
                    writer.SetMergeFactor(100);

                    for (int j = 0; j < 201; j++)
                    {
                        idField.SetValue(System.Convert.ToString(iter * 201 + j));
                        writer.AddDocument(doc);
                    }

                    int delID = iter * 201;
                    for (int j = 0; j < 20; j++)
                    {
                        writer.DeleteDocuments(new Term("id", System.Convert.ToString(delID)));
                        delID += 5;
                    }

                    // Force a bunch of merge threads to kick off so we
                    // stress out aborting them on close:
                    writer.SetMergeFactor(3);
                    writer.AddDocument(doc);
                    writer.Flush();

                    writer.Close(false);

                    IndexReader reader = IndexReader.Open(directory);
                    Assert.AreEqual((1 + iter) * 182, reader.NumDocs());
                    reader.Close();

                    // Reopen
                    writer = new IndexWriter(directory, autoCommit, ANALYZER, false);
                }
                writer.Close();
            }

            directory.Close();
        }
예제 #14
0
        public virtual void  TestRAMDeletes()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                for (int t = 0; t < 2; t++)
                {
                    bool        autoCommit = (0 == pass);
                    Directory   dir        = new MockRAMDirectory();
                    IndexWriter modifier   = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
                    modifier.SetMaxBufferedDocs(4);
                    modifier.SetMaxBufferedDeleteTerms(4);

                    int id            = 0;
                    int value_Renamed = 100;

                    AddDoc(modifier, ++id, value_Renamed);
                    if (0 == t)
                    {
                        modifier.DeleteDocuments(new Term("value", System.Convert.ToString(value_Renamed)));
                    }
                    else
                    {
                        modifier.DeleteDocuments(new TermQuery(new Term("value", System.Convert.ToString(value_Renamed))));
                    }
                    AddDoc(modifier, ++id, value_Renamed);
                    if (0 == t)
                    {
                        modifier.DeleteDocuments(new Term("value", System.Convert.ToString(value_Renamed)));
                        Assert.AreEqual(2, modifier.GetNumBufferedDeleteTerms());
                        Assert.AreEqual(1, modifier.GetBufferedDeleteTermsSize());
                    }
                    else
                    {
                        modifier.DeleteDocuments(new TermQuery(new Term("value", System.Convert.ToString(value_Renamed))));
                    }

                    AddDoc(modifier, ++id, value_Renamed);
                    Assert.AreEqual(0, modifier.GetSegmentCount());
                    modifier.Flush();

                    modifier.Commit();

                    IndexReader reader = IndexReader.Open(dir);
                    Assert.AreEqual(1, reader.NumDocs());

                    int hitCount = GetHitCount(dir, new Term("id", System.Convert.ToString(id)));
                    Assert.AreEqual(1, hitCount);
                    reader.Close();
                    modifier.Close();
                    dir.Close();
                }
            }
        }
 private static void doWithWriter(string indexRoot, Action<IndexWriter> actionWithWriter, Analyzer analyzer, bool recreateIndex = false)
 {
     var indexDirectory = FSDirectory.Open(indexRoot);
     if(analyzer == null)
         analyzer = new StandardAnalyzer(Version.LUCENE_30);
     var writer = new IndexWriter(indexDirectory, analyzer, recreateIndex, IndexWriter.MaxFieldLength.UNLIMITED);
     actionWithWriter(writer);
     //writer.Commit();
     writer.Optimize();
     writer.Flush(true, true, true);
     writer.Dispose();
 }
예제 #16
0
        /// <summary> Writes the document to the directory using the analyzer
        /// and the similarity score; returns the SegmentInfo
        /// describing the new segment
        /// </summary>
        /// <param name="dir">
        /// </param>
        /// <param name="analyzer">
        /// </param>
        /// <param name="similarity">
        /// </param>
        /// <param name="doc">
        /// </param>
        /// <throws>  IOException </throws>
        public static SegmentInfo WriteDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc)
        {
            IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetSimilarity(similarity);
            //writer.setUseCompoundFile(false);
            writer.AddDocument(doc);
            writer.Flush();
            SegmentInfo info = writer.NewestSegment();

            writer.Close();
            return(info);
        }
예제 #17
0
        public virtual void  TestAddIndexes()
        {
            bool optimize = false;

            Directory   dir1   = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir1, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer.SetInfoStream(infoStream);
            // create the index
            CreateIndexNoClose(!optimize, "index1", writer);
            writer.Flush(false, true, true);

            // create a 2nd index
            Directory   dir2    = new MockRAMDirectory();
            IndexWriter writer2 = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer2.SetInfoStream(infoStream);
            CreateIndexNoClose(!optimize, "index2", writer2);
            writer2.Close();

            IndexReader r0 = writer.GetReader();

            Assert.IsTrue(r0.IsCurrent());
            writer.AddIndexesNoOptimize(new Directory[] { dir2 });
            Assert.IsFalse(r0.IsCurrent());
            r0.Close();

            IndexReader r1 = writer.GetReader();

            Assert.IsTrue(r1.IsCurrent());

            writer.Commit();
            Assert.IsTrue(r1.IsCurrent());

            Assert.AreEqual(200, r1.MaxDoc());

            int index2df = r1.DocFreq(new Term("indexname", "index2"));

            Assert.AreEqual(100, index2df);

            // verify the docs are from different indexes
            Document doc5 = r1.Document(5);

            Assert.AreEqual("index1", doc5.Get("indexname"));
            Document doc150 = r1.Document(150);

            Assert.AreEqual("index2", doc150.Get("indexname"));
            r1.Close();
            writer.Close();
            dir1.Close();
        }
예제 #18
0
        private void Initialize()
        {
            _directory = new RAMDirectory();
            _analyzer = new StandardAnalyzer(Version.LUCENE_30);

            using (var writer = new IndexWriter(_directory, _analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                StoreDocument("The lazy fox jumps over the quick brown dog", writer);
                StoreDocument("The quick brown fox jumps over the lazy dog", writer);

                writer.Optimize();
                writer.Flush(true, true, true);
            }
        }
예제 #19
0
        static void Main(string[] args)
        {
            //Setup indexer

            Directory directory = FSDirectory.GetDirectory("LuceneIndex");
            Analyzer analyzer = new StandardAnalyzer();
            IndexWriter writer = new IndexWriter(directory, analyzer);

            IndexReader red = IndexReader.Open(directory);
            int totDocs = red.MaxDoc();
            red.Close();

            //Add documents to the index
            string text = String.Empty;
            Console.WriteLine("Enter the text you want to add to the index:");
            Console.Write(">");
            int txts = totDocs;
            int j = 0;
            while ((text = Console.ReadLine()) != String.Empty)
            {
                AddTextToIndex(txts++, text, writer);
                j++;
                Console.Write(">");
            }

            writer.Optimize();
            //Close the writer
            writer.Flush();
            writer.Close();

            Console.WriteLine(j + " lines added, "+txts+" documents total");

            //Setup searcher
            IndexSearcher searcher = new IndexSearcher(directory);
            QueryParser parser = new QueryParser("postBody", analyzer);

            Console.WriteLine("Enter the search string:");
            Console.Write(">");

            while ((text = Console.ReadLine()) != String.Empty)
            {
                Search(text, searcher, parser);
                Console.Write(">");
            }

            //Clean up everything
            searcher.Close();
            directory.Close();
        }
예제 #20
0
        public static void CreateIndex(MongoCollection<TweetItem> collection)
        {
            DateTime dtmFirst = new DateTime(2014, 05, 17, 0, 0, 0);
            DateTime dtmLast = new DateTime(2014, 05, 17, 23, 59, 59);
            FSDirectory dir = FSDirectory.GetDirectory(Environment.CurrentDirectory + "\\LuceneIndex");
            //Lucene.Net.Store.RAMDirectory dir = new RAMDirectory();
            Lucene.Net.Analysis.StopAnalyzer an = new Lucene.Net.Analysis.StopAnalyzer();
            IndexWriter wr = new IndexWriter(dir, an, true);
            IStemmer stemmer = new EnglishStemmer();
            while (dtmFirst.Date <= DateTime.Now.Date)
            {
                var query = Query<TweetItem>.Where(t => t.CreationDate >= dtmFirst && t.CreationDate <= dtmLast);
                List<TweetItem> value = collection.Find(query).ToList();
                //DirectoryInfo diMain = new DirectoryInfo(dia.SelectedPath);               
                using (var client = new HttpClient())
                {
                    client.BaseAddress = new Uri("http://www.datasciencetoolkit.org/text2sentiment");
                    client.DefaultRequestHeaders.Accept.Clear();
                    client.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));

                    foreach (TweetItem tweet in value)
                    {
                        Document doc = new Document();
                        
                        //SentimentResult res = await GetSentiment(tweet.Text, client);                        
                        string stemmedtext = PerformStemming(stemmer, NLPToolkit.Tokenizer.TokenizeNow(tweet.Text).ToArray());
                        var scores = classifier.Classify(stemmedtext,DragonHelper.DragonHelper.ExcludeList);
                        string positiveSentiment = string.Empty;
                        string negativeSentiment = string.Empty;                        
                        positiveSentiment = scores["Positive"].ToString();
                        negativeSentiment = scores["Negative"].ToString();
                        doc.Add(new Field("id", tweet._id.ToString(), Field.Store.YES, Field.Index.NO));
                        doc.Add(new Field("created", tweet.CreationDate.ToString(), Field.Store.YES, Field.Index.NO));
                        doc.Add(new Field("user", tweet.User, Field.Store.YES, Field.Index.NO));                        
                        doc.Add(new Field("text", stemmedtext, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));                        
                        doc.Add(new Field("possentiment", positiveSentiment , Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
                        doc.Add(new Field("negsentiment", negativeSentiment, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));

                        wr.AddDocument(doc);
                    }
                }
                dtmFirst = dtmFirst.AddDays(1);
                dtmLast = dtmLast.AddDays(1);
            }
            wr.Optimize();
            wr.Flush();
            wr.Close();
            dir.Close();
        }
예제 #21
0
        /// <summary>
        /// static boolean hasPendingDeletes(SegmentInfos infos) {
        ///  for (SegmentInfo info : infos) {
        ///    if (info.deletes.Any()) {
        ///      return true;
        ///    }
        ///  }
        ///  return false;
        /// }
        ///
        /// </summary>
        internal virtual void Part2(IndexWriter writer, RangeMergePolicy fsmp)
        {
            for (int x = 20; x < 25; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "5", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            writer.Flush(false, false);
            for (int x = 25; x < 30; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "5", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            writer.Flush(false, false);

            //System.out.println("infos3:"+writer.SegmentInfos);

            Term delterm = new Term("id", "8");

            writer.DeleteDocuments(delterm);
            //System.out.println("segdels3:" + writer.docWriter.deletesToString());

            fsmp.doMerge = true;
            fsmp.start   = 1;
            fsmp.length  = 2;
            writer.MaybeMerge();

            // deletes for info1, the newly created segment from the
            // merge should have no deletes because they were applied in
            // the merge
            //SegmentInfo info1 = writer.SegmentInfos[1];
            //Assert.IsFalse(exists(info1, writer.docWriter.segmentDeletes));

            //System.out.println("infos4:"+writer.SegmentInfos);
            //System.out.println("segdels4:" + writer.docWriter.deletesToString());
        }
예제 #22
0
        public virtual void  TestDeleteMerging()
        {
            RAMDirectory directory = new MockRAMDirectory();

            IndexWriter writer           = new IndexWriter(directory, true, ANALYZER, true);
            ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();

            writer.SetMergeScheduler(cms);

            LogDocMergePolicy mp = new LogDocMergePolicy(writer);

            writer.SetMergePolicy(mp);

            // Force degenerate merging so we can get a mix of
            // merging of segments with and without deletes at the
            // start:
            mp.SetMinMergeDocs(1000);

            Document doc     = new Document();
            Field    idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);

            doc.Add(idField);
            for (int i = 0; i < 10; i++)
            {
                for (int j = 0; j < 100; j++)
                {
                    idField.SetValue(System.Convert.ToString(i * 100 + j));
                    writer.AddDocument(doc);
                }

                int delID = i;
                while (delID < 100 * (1 + i))
                {
                    writer.DeleteDocuments(new Term("id", "" + delID));
                    delID += 10;
                }

                writer.Flush();
            }

            writer.Close();
            IndexReader reader = IndexReader.Open(directory);

            // Verify that we did not lose any deletes...
            Assert.AreEqual(450, reader.NumDocs());
            reader.Close();
            directory.Close();
        }
예제 #23
0
        public virtual void TestPartialMerge()
        {
            int num = AtLeast(10);

            for (int iter = 0; iter < num; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                Directory         dir  = NewDirectory();
                IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
                conf.SetMergeScheduler(new SerialMergeScheduler());
                TieredMergePolicy tmp = NewTieredMergePolicy();
                conf.SetMergePolicy(tmp);
                conf.SetMaxBufferedDocs(2);
                tmp.MaxMergeAtOnce  = 3;
                tmp.SegmentsPerTier = 6;

                IndexWriter w        = new IndexWriter(dir, conf);
                int         maxCount = 0;
                int         numDocs  = TestUtil.NextInt32(Random, 20, 100);
                for (int i = 0; i < numDocs; i++)
                {
                    Document doc = new Document();
                    doc.Add(NewTextField("content", "aaa " + (i % 4), Field.Store.NO));
                    w.AddDocument(doc);
                    int count = w.SegmentCount;
                    maxCount = Math.Max(count, maxCount);
                    Assert.IsTrue(count >= maxCount - 3, "count=" + count + " maxCount=" + maxCount);
                }

                w.Flush(true, true);

                int segmentCount = w.SegmentCount;
                int targetCount  = TestUtil.NextInt32(Random, 1, segmentCount);
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: merge to " + targetCount + " segs (current count=" + segmentCount + ")");
                }
                w.ForceMerge(targetCount);
                Assert.AreEqual(targetCount, w.SegmentCount);

                w.Dispose();
                dir.Dispose();
            }
        }
예제 #24
0
 protected static IndexReader SetupIndex()
 {
     var directory = new RAMDirectory();
     var writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_30), true,
         IndexWriter.MaxFieldLength.LIMITED);
     for (var i = 0; i < 50000; i++)
         writer.AddDocument(new Document()
             .AddField("title", Guid.NewGuid().ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED)
             .AddField("color", GenerateColor(), Field.Store.YES, Field.Index.NOT_ANALYZED)
             .AddField("type", GenerateFood(), Field.Store.YES, Field.Index.NOT_ANALYZED)
             .AddField("type", GenerateFruit(), Field.Store.YES, Field.Index.NOT_ANALYZED)
             .AddField("price", "10", Field.Store.YES, Field.Index.NOT_ANALYZED));
     writer.Flush(true, true, true);
     writer.Optimize();
     writer.Commit();
     return IndexReader.Open(directory, true);
 }
예제 #25
0
        public virtual void  TestFlushExceptions()
        {
            MockRAMDirectory directory = new MockRAMDirectory();
            FailOnlyOnFlush  failure   = new FailOnlyOnFlush();

            directory.FailOn(failure);

            IndexWriter writer           = new IndexWriter(directory, true, ANALYZER, true);
            ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();

            writer.SetMergeScheduler(cms);
            writer.SetMaxBufferedDocs(2);
            Document doc     = new Document();
            Field    idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);

            doc.Add(idField);
            for (int i = 0; i < 10; i++)
            {
                for (int j = 0; j < 20; j++)
                {
                    idField.SetValue(System.Convert.ToString(i * 20 + j));
                    writer.AddDocument(doc);
                }

                writer.AddDocument(doc);

                failure.SetDoFail();
                try
                {
                    writer.Flush();
                    Assert.Fail("failed to hit IOException");
                }
                catch (System.IO.IOException ioe)
                {
                    failure.ClearDoFail();
                }
            }

            writer.Close();
            IndexReader reader = IndexReader.Open(directory);

            Assert.AreEqual(200, reader.NumDocs());
            reader.Close();
            directory.Close();
        }
예제 #26
0
 /// <summary>
 /// Libera a instancia.
 /// </summary>
 public void Dispose()
 {
     _writer.Flush(true, true, true);
     _writer.Commit();
     _writer.ExpungeDeletes(true);
     try
     {
         if (Disposing != null)
         {
             Disposing(this, EventArgs.Empty);
         }
     }
     finally
     {
         _writer.Close();
         _resetEvent.Set();
     }
 }
        public void Initialize()
        {
            _analyzer = new StandardAnalyzer(Version.LUCENE_30);
            _searchIndex = new RAMDirectory();

            var db = new DbAccess("RecipeBrowser");
            var recipes = db.Query<dynamic>("SELECT rcp.RecipeId as RecipeId,rcp.Name as RecipeName, " +
                "rcp.Description as RecipeDescription, rcp.CookingInstructions as CookingInstructions, " +
                "cat.Name as CategoryName FROM Recipe rcp " +
                "JOIN RecipeCategory rcpcat ON rcpcat.RecipeId = rcp.RecipeId " +
                "JOIN Category cat ON cat.CategoryId = rcpcat.CategoryId").ToList();

            using (var writer = new IndexWriter(_searchIndex, _analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                foreach (dynamic record in recipes)
                {
                    Document document = new Document();

                    // Store the basic data for the recipe in the search index.
                    document.Add(new Field("id", record.RecipeId.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
                    document.Add(new Field("name", record.RecipeName.ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    document.Add(new Field("description", record.RecipeDescription.ToString(), Field.Store.YES, Field.Index.ANALYZED));
                    document.Add(new Field("instructions", record.CookingInstructions.ToString(), Field.Store.NO, Field.Index.ANALYZED));
                    document.Add(new Field("category", record.CategoryName.ToString(), Field.Store.NO, Field.Index.ANALYZED));

                    dynamic ingredientRecords =
                        db.Query<dynamic>(
                            "SELECT IngredientId, Name FROM Ingredient WHERE RecipeId = @RecipeId",
                            new { RecipeId = record.RecipeId.ToString() });

                    // Store multiple values for the ingredients in the same document.
                    // All the values get analyzed separately so that you can search for them.
                    // They do not get stored however, so you won't be able to retrieve them.
                    foreach (dynamic ingredient in ingredientRecords)
                    {
                        document.Add(new Field("ingredient", ingredient.Name.ToString(), Field.Store.NO, Field.Index.ANALYZED));
                    }
                }

                // Store everything in the directory and merge!
                writer.Optimize(true);
                writer.Flush(true, true, true);
            }
        }
예제 #28
0
 public LuceneCmsSearchService(Directory directory)
 {
     _Directory = directory;
     _Analyzer = new StandardAnalyzer(Version.LUCENE_30);
     try
     {
         //Try to open directory.
         using (var rd = IndexReader.Open(directory, true))
         {
         }
     }
     catch (Exception)
     {
         //If open fails, create it
         using (var writer = new IndexWriter(directory, _Analyzer, true, new IndexWriter.MaxFieldLength(1024 * 1024 * 4)))
         {
             writer.Flush(true, true, true);
         }
     }
 }
예제 #29
0
    /// <summary>
    /// This method indexes the content that is sent across to it. Each piece of content (or "document")
    /// that is indexed has to have a unique identifier (so that the caller can take action based on the
    /// document id). Therefore, this method accepts key-value pairs in the form of a dictionary. The key
    /// is a ulong which uniquely identifies the string to be indexed. The string itself is the value
    /// within the dictionary for that key. Be aware that stop words (like the, this, at, etc.) are _not_
    /// indexed.
    /// </summary>
    /// <param name="txtIdPairToBeIndexed">A dictionary of key-value pairs that are sent by the caller
    /// to uniquely identify each string that is to be indexed.</param>
    /// <returns>The number of documents indexed.</returns>
    public int Index (Dictionary<long, string> txtIdPairToBeIndexed) {

		using (Directory directory = FSDirectory.Open(_indexDir))
		using (Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30))
		using (IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
		using (IndexReader reader = writer.GetReader())
		{

			//writer.DeleteAll();


			Dictionary<long, string>.KeyCollection keys = txtIdPairToBeIndexed.Keys;

			foreach (long id in keys)
			{
				char[] delimiter = { ';' };
				string[] text = txtIdPairToBeIndexed[id].Split(delimiter);
				Document document = new Document();

				Field title = new	Field("title", text[0], Field.Store.YES, Field.Index.NO);
				Field type = new Field("type", text[1], Field.Store.YES, Field.Index.NO);
				Field idField = new Field("date", (id).ToString(), Field.Store.YES, Field.Index.ANALYZED);

				document.Add(title);
				document.Add(type);
				document.Add(idField);

				writer.AddDocument(document);
			}

			int numIndexed = writer.GetDocCount(0);//TODO check number
			writer.Optimize();
			writer.Flush(true,true,true);

			return numIndexed;
		}

		}
예제 #30
0
        public LuceneEngine(List<Line> linesDone, bool modified)
        {
            analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            string path = linesDone.First().path;
            string filename = System.IO.Path.GetFileNameWithoutExtension(path);
            fileDirectoryPath = System.IO.Path.GetDirectoryName(path);
            indexPath = fileDirectoryPath + "\\" + filename;
            if (modified)
            {
                if (System.IO.Directory.Exists(indexPath))
                {
                    System.IO.Directory.Delete(indexPath, true);

                }
                luceneIndexDirectory = FSDirectory.Open(indexPath);
                w = new IndexWriter(luceneIndexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
                foreach (var line in linesDone)
                {
                    addDoc(w, line);
                }
                w.Optimize();
                w.Flush(true, true, true);
                w.Dispose();
            }
            else
            {
                luceneIndexDirectory = FSDirectory.Open(indexPath);
            }
            //if (modified)
            //{
            //    foreach (var line in linesDone)
            //    {
            //        addDoc(w, line);
            //    }
            //}
        }
        public virtual void TestFlushExceptions()
        {
            MockDirectoryWrapper directory = NewMockDirectory();
            FailOnlyOnFlush failure = new FailOnlyOnFlush(this);
            directory.FailOn(failure);

            IndexWriter writer = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2));
            Document doc = new Document();
            Field idField = NewStringField("id", "", Field.Store.YES);
            doc.Add(idField);
            int extraCount = 0;

            for (int i = 0; i < 10; i++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + i);
                }

                for (int j = 0; j < 20; j++)
                {
                    idField.StringValue = Convert.ToString(i * 20 + j);
                    writer.AddDocument(doc);
                }

                // must cycle here because sometimes the merge flushes
                // the doc we just added and so there's nothing to
                // flush, and we don't hit the exception
                while (true)
                {
                    writer.AddDocument(doc);
                    failure.SetDoFail();
                    try
                    {
                        writer.Flush(true, true);
                        if (failure.HitExc)
                        {
                            Assert.Fail("failed to hit IOException");
                        }
                        extraCount++;
                    }
                    catch (IOException ioe)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine(ioe.StackTrace);
                        }
                        failure.ClearDoFail();
                        break;
                    }
                }
                Assert.AreEqual(20 * (i + 1) + extraCount, writer.NumDocs());
            }

            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(directory);
            Assert.AreEqual(200 + extraCount, reader.NumDocs);
            reader.Dispose();
            directory.Dispose();
        }
예제 #32
0
        internal void TryResettingIndex()
        {
            try
            {
                IOExtensions.DeleteDirectory(indexDirectory);
                using ( LuceneDirectory luceneDirectory = FSDirectory.Open(new DirectoryInfo(indexDirectory)) )
                {
                    WriteIndexVersion(luceneDirectory);

                    using (var indexWriter = new IndexWriter(luceneDirectory, analyzer, snapshotter, IndexWriter.MaxFieldLength.UNLIMITED))
                    {
                        indexWriter.SetMergeScheduler(new ErrorLoggingConcurrentMergeScheduler());

                        filesystem.Storage.Batch(accessor =>
                        {
                            foreach (var file in accessor.GetFilesAfter(Etag.Empty, int.MaxValue))
                            {
                                Index(indexWriter, FileHeader.Canonize(file.FullPath), file.Metadata, file.Etag, recreateSearcher: false);
                            }
                        });

                        indexWriter.Flush(true, true, true);
                    }
                }
            }
            catch (Exception exception)
            {
                throw new InvalidOperationException("Could not reset index for file system: " + name, exception);
            }
        }
예제 #33
0
		public virtual void  TestOptimizeMaxNumSegments2()
		{
			MockRAMDirectory dir = new MockRAMDirectory();
			
			Document doc = new Document();
			doc.Add(new Field("content", "aaa", Field.Store.YES, Field.Index.TOKENIZED));
			
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			LogDocMergePolicy ldmp = new LogDocMergePolicy();
			ldmp.SetMinMergeDocs(1);
			writer.SetMergePolicy(ldmp);
			writer.SetMergeFactor(4);
			writer.SetMaxBufferedDocs(2);
			
			for (int iter = 0; iter < 10; iter++)
			{
				
				for (int i = 0; i < 19; i++)
					writer.AddDocument(doc);
				
				writer.Flush();
				
				SegmentInfos sis = new SegmentInfos();
				((ConcurrentMergeScheduler) writer.GetMergeScheduler()).Sync();
				sis.Read(dir);
				
				int segCount = sis.Count;
				
				writer.Optimize(7);
				
				sis = new SegmentInfos();
				((ConcurrentMergeScheduler) writer.GetMergeScheduler()).Sync();
				sis.Read(dir);
				int optSegCount = sis.Count;
				
				if (segCount < 7)
					Assert.AreEqual(segCount, optSegCount);
				else
					Assert.AreEqual(7, optSegCount);
			}
		}
		public virtual void  TestFlushExceptions()
		{
			
			MockRAMDirectory directory = new MockRAMDirectory();
			FailOnlyOnFlush failure = new FailOnlyOnFlush();
			directory.FailOn(failure);
			
			IndexWriter writer = new IndexWriter(directory, true, ANALYZER, true);
			ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
			writer.SetMergeScheduler(cms);
			writer.SetMaxBufferedDocs(2);
			Document doc = new Document();
			Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
			doc.Add(idField);
			for (int i = 0; i < 10; i++)
			{
				for (int j = 0; j < 20; j++)
				{
					idField.SetValue(System.Convert.ToString(i * 20 + j));
					writer.AddDocument(doc);
				}
				
				writer.AddDocument(doc);
				
				failure.SetDoFail();
				try
				{
					writer.Flush();
					Assert.Fail("failed to hit IOException");
				}
				catch (System.IO.IOException ioe)
				{
					failure.ClearDoFail();
				}
			}
			
			writer.Close();
			IndexReader reader = IndexReader.Open(directory);
			Assert.AreEqual(200, reader.NumDocs());
			reader.Close();
			directory.Close();
		}
예제 #35
0
        public virtual void TestFlushExceptions()
        {
            MockDirectoryWrapper directory = NewMockDirectory();
            FailOnlyOnFlush      failure   = new FailOnlyOnFlush(this);

            directory.FailOn(failure);

            IndexWriter writer  = new IndexWriter(directory, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2));
            Document    doc     = new Document();
            Field       idField = NewStringField("id", "", Field.Store.YES);

            doc.Add(idField);
            int extraCount = 0;

            for (int i = 0; i < 10; i++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + i);
                }

                for (int j = 0; j < 20; j++)
                {
                    idField.StringValue = Convert.ToString(i * 20 + j);
                    writer.AddDocument(doc);
                }

                // must cycle here because sometimes the merge flushes
                // the doc we just added and so there's nothing to
                // flush, and we don't hit the exception
                while (true)
                {
                    writer.AddDocument(doc);
                    failure.SetDoFail();
                    try
                    {
                        writer.Flush(true, true);
                        if (failure.HitExc)
                        {
                            Assert.Fail("failed to hit IOException");
                        }
                        extraCount++;
                    }
                    catch (IOException ioe)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine(ioe.StackTrace);
                        }
                        failure.ClearDoFail();
                        break;
                    }
                }
                Assert.AreEqual(20 * (i + 1) + extraCount, writer.NumDocs());
            }

            writer.Dispose();
            IndexReader reader = DirectoryReader.Open(directory);

            Assert.AreEqual(200 + extraCount, reader.NumDocs);
            reader.Dispose();
            directory.Dispose();
        }
예제 #36
0
		public virtual void  TestTokenReuse()
		{
			Analyzer analyzer = new AnonymousClassAnalyzer1(this);
			
			IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			
			Document doc = new Document();
			doc.Add(new Field("f1", "a 5 a a", Field.Store.YES, Field.Index.ANALYZED));
			
			writer.AddDocument(doc);
			writer.Flush();
			SegmentInfo info = writer.NewestSegment();
			writer.Close();
			SegmentReader reader = SegmentReader.Get(info);
			
			TermPositions termPositions = reader.TermPositions(new Term("f1", "a"));
			Assert.IsTrue(termPositions.Next());
			int freq = termPositions.Freq();
			Assert.AreEqual(3, freq);
			Assert.AreEqual(0, termPositions.NextPosition());
			Assert.AreEqual(true, termPositions.IsPayloadAvailable());
			Assert.AreEqual(6, termPositions.NextPosition());
			Assert.AreEqual(false, termPositions.IsPayloadAvailable());
			Assert.AreEqual(7, termPositions.NextPosition());
			Assert.AreEqual(false, termPositions.IsPayloadAvailable());
		}
예제 #37
0
		public virtual void  TestAddDocument()
		{
			Document testDoc = new Document();
			DocHelper.SetupDoc(testDoc);
			Analyzer analyzer = new WhitespaceAnalyzer();
			IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			writer.AddDocument(testDoc);
			writer.Flush();
			SegmentInfo info = writer.NewestSegment();
			writer.Close();
			//After adding the document, we should be able to read it back in
			SegmentReader reader = SegmentReader.Get(info);
			Assert.IsTrue(reader != null);
			Document doc = reader.Document(0);
			Assert.IsTrue(doc != null);
			
			//System.out.println("Document: " + doc);
			Fieldable[] fields = doc.GetFields("textField2");
			Assert.IsTrue(fields != null && fields.Length == 1);
			Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_2_TEXT));
			Assert.IsTrue(fields[0].IsTermVectorStored());
			
			fields = doc.GetFields("textField1");
			Assert.IsTrue(fields != null && fields.Length == 1);
			Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_1_TEXT));
			Assert.IsFalse(fields[0].IsTermVectorStored());
			
			fields = doc.GetFields("keyField");
			Assert.IsTrue(fields != null && fields.Length == 1);
			Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.KEYWORD_TEXT));
			
			fields = doc.GetFields(DocHelper.NO_NORMS_KEY);
			Assert.IsTrue(fields != null && fields.Length == 1);
			Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.NO_NORMS_TEXT));
			
			fields = doc.GetFields(DocHelper.TEXT_FIELD_3_KEY);
			Assert.IsTrue(fields != null && fields.Length == 1);
			Assert.IsTrue(fields[0].StringValue().Equals(DocHelper.FIELD_3_TEXT));
			
			// test that the norms are not present in the segment if
			// omitNorms is true
			for (int i = 0; i < reader.core_ForNUnit.fieldInfos_ForNUnit.Size(); i++)
			{
				FieldInfo fi = reader.core_ForNUnit.fieldInfos_ForNUnit.FieldInfo(i);
				if (fi.isIndexed_ForNUnit)
				{
					Assert.IsTrue(fi.omitNorms_ForNUnit == !reader.HasNorms(fi.name_ForNUnit));
				}
			}
		}
예제 #38
0
		public virtual void  TestNoTermVectorAfterTermVectorMerge()
		{
			MockRAMDirectory dir = new MockRAMDirectory();
			IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true);
			Document document = new Document();
			document.Add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES));
			iw.AddDocument(document);
			iw.Flush();
			
			document = new Document();
			document.Add(new Field("tvtest", "x y z", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
			iw.AddDocument(document);
			// Make first segment
			iw.Flush();
			
			iw.Optimize();
			
			document.Add(new Field("tvtest", "a b c", Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES));
			iw.AddDocument(document);
			// Make 2nd segment
			iw.Flush();
			iw.Optimize();
			
			iw.Close();
			dir.Close();
		}
		public override void  SetUp()
		{
			base.SetUp();
			/*
			for (int i = 0; i < testFields.length; i++) {
			fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
			}
			*/
			
			System.Array.Sort(testTerms);
			int tokenUpto = 0;
			for (int i = 0; i < testTerms.Length; i++)
			{
				positions[i] = new int[TERM_FREQ];
				offsets[i] = new TermVectorOffsetInfo[TERM_FREQ];
				// first position must be 0
				for (int j = 0; j < TERM_FREQ; j++)
				{
					// positions are always sorted in increasing order
					positions[i][j] = (int) (j * 10 + (new System.Random().NextDouble()) * 10);
					// offsets are always sorted in increasing order
					offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].Length);
					TestToken token = tokens[tokenUpto++] = new TestToken(this);
					token.text = testTerms[i];
					token.pos = positions[i][j];
					token.startOffset = offsets[i][j].GetStartOffset();
					token.endOffset = offsets[i][j].GetEndOffset();
				}
			}
			System.Array.Sort(tokens);
			
			IndexWriter writer = new IndexWriter(dir, new MyAnalyzer(this), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetUseCompoundFile(false);
			Document doc = new Document();
			for (int i = 0; i < testFields.Length; i++)
			{
				Field.TermVector tv;
				if (testFieldsStorePos[i] && testFieldsStoreOff[i])
					tv = Field.TermVector.WITH_POSITIONS_OFFSETS;
				else if (testFieldsStorePos[i] && !testFieldsStoreOff[i])
					tv = Field.TermVector.WITH_POSITIONS;
				else if (!testFieldsStorePos[i] && testFieldsStoreOff[i])
					tv = Field.TermVector.WITH_OFFSETS;
				else
					tv = Field.TermVector.YES;
				doc.Add(new Field(testFields[i], "", Field.Store.NO, Field.Index.ANALYZED, tv));
			}
			
			//Create 5 documents for testing, they all have the same
			//terms
			for (int j = 0; j < 5; j++)
				writer.AddDocument(doc);
			writer.Flush();
			seg = writer.NewestSegment().name;
			writer.Close();
			
			fieldInfos = new FieldInfos(dir, seg + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
		}
		public virtual void  TestDeleteMerging()
		{
			
			RAMDirectory directory = new MockRAMDirectory();
			
			IndexWriter writer = new IndexWriter(directory, true, ANALYZER, true);
			ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
			writer.SetMergeScheduler(cms);
			
			LogDocMergePolicy mp = new LogDocMergePolicy(writer);
			writer.SetMergePolicy(mp);
			
			// Force degenerate merging so we can get a mix of
			// merging of segments with and without deletes at the
			// start:
			mp.SetMinMergeDocs(1000);
			
			Document doc = new Document();
			Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
			doc.Add(idField);
			for (int i = 0; i < 10; i++)
			{
				for (int j = 0; j < 100; j++)
				{
					idField.SetValue(System.Convert.ToString(i * 100 + j));
					writer.AddDocument(doc);
				}
				
				int delID = i;
				while (delID < 100 * (1 + i))
				{
					writer.DeleteDocuments(new Term("id", "" + delID));
					delID += 10;
				}
				
				writer.Flush();
			}
			
			writer.Close();
			IndexReader reader = IndexReader.Open(directory);
			// Verify that we did not lose any deletes...
			Assert.AreEqual(450, reader.NumDocs());
			reader.Close();
			directory.Close();
		}
예제 #41
0
		public virtual void  TestFlushWithNoMerging()
		{
			Directory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetMaxBufferedDocs(2);
			Document doc = new Document();
			doc.Add(new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
			for (int i = 0; i < 19; i++)
				writer.AddDocument(doc);
			writer.Flush(false, true);
			writer.Close();
			SegmentInfos sis = new SegmentInfos();
			sis.Read(dir);
			// Since we flushed w/o allowing merging we should now
			// have 10 segments
			System.Diagnostics.Debug.Assert(sis.Count == 10);
		}
예제 #42
0
		// builds an index with payloads in the given Directory and performs
		// different tests to verify the payload encoding
		private void  PerformTest(Directory dir)
		{
			PayloadAnalyzer analyzer = new PayloadAnalyzer();
			IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			
			// should be in sync with value in TermInfosWriter
			int skipInterval = 16;
			
			int numTerms = 5;
			System.String fieldName = "f1";
			
			int numDocs = skipInterval + 1;
			// create content for the test documents with just a few terms
			Term[] terms = GenerateTerms(fieldName, numTerms);
			System.Text.StringBuilder sb = new System.Text.StringBuilder();
			for (int i = 0; i < terms.Length; i++)
			{
				sb.Append(terms[i].text_ForNUnit);
				sb.Append(" ");
			}
			System.String content = sb.ToString();
			
			
			int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
			byte[] payloadData = GenerateRandomData(payloadDataLength);
			
			Document d = new Document();
			d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED));
			// add the same document multiple times to have the same payload lengths for all
			// occurrences within two consecutive skip intervals
			int offset = 0;
			for (int i = 0; i < 2 * numDocs; i++)
			{
				analyzer.SetPayloadData(fieldName, payloadData, offset, 1);
				offset += numTerms;
				writer.AddDocument(d);
			}
			
			// make sure we create more than one segment to test merging
			writer.Flush();
			
			// now we make sure to have different payload lengths next at the next skip point        
			for (int i = 0; i < numDocs; i++)
			{
				analyzer.SetPayloadData(fieldName, payloadData, offset, i);
				offset += i * numTerms;
				writer.AddDocument(d);
			}
			
			writer.Optimize();
			// flush
			writer.Close();
			
			
			/*
			* Verify the index
			* first we test if all payloads are stored correctly
			*/
			IndexReader reader = IndexReader.Open(dir);
			
			byte[] verifyPayloadData = new byte[payloadDataLength];
			offset = 0;
			TermPositions[] tps = new TermPositions[numTerms];
			for (int i = 0; i < numTerms; i++)
			{
				tps[i] = reader.TermPositions(terms[i]);
			}
			
			while (tps[0].Next())
			{
				for (int i = 1; i < numTerms; i++)
				{
					tps[i].Next();
				}
				int freq = tps[0].Freq();
				
				for (int i = 0; i < freq; i++)
				{
					for (int j = 0; j < numTerms; j++)
					{
						tps[j].NextPosition();
						tps[j].GetPayload(verifyPayloadData, offset);
						offset += tps[j].GetPayloadLength();
					}
				}
			}
			
			for (int i = 0; i < numTerms; i++)
			{
				tps[i].Close();
			}
			
			AssertByteArrayEquals(payloadData, verifyPayloadData);
			
			/*
			*  test lazy skipping
			*/
			TermPositions tp = reader.TermPositions(terms[0]);
			tp.Next();
			tp.NextPosition();
			// now we don't read this payload
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			byte[] payload = tp.GetPayload(null, 0);
			Assert.AreEqual(payload[0], payloadData[numTerms]);
			tp.NextPosition();
			
			// we don't read this payload and skip to a different document
			tp.SkipTo(5);
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			payload = tp.GetPayload(null, 0);
			Assert.AreEqual(payload[0], payloadData[5 * numTerms]);
			
			
			/*
			* Test different lengths at skip points
			*/
			tp.Seek(terms[1]);
			tp.Next();
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			tp.SkipTo(skipInterval - 1);
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			tp.SkipTo(2 * skipInterval - 1);
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			tp.SkipTo(3 * skipInterval - 1);
			tp.NextPosition();
			Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayloadLength(), "Wrong payload length.");
			
			/*
			* Test multiple call of getPayload()
			*/
			tp.GetPayload(null, 0);
			try
			{
				// it is forbidden to call getPayload() more than once
				// without calling nextPosition()
				tp.GetPayload(null, 0);
				Assert.Fail("Expected exception not thrown");
			}
			catch (System.Exception expected)
			{
				// expected exception
			}
			
			reader.Close();
			
			// test long payload
			analyzer = new PayloadAnalyzer();
			writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			System.String singleTerm = "lucene";
			
			d = new Document();
			d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED));
			// add a payload whose length is greater than the buffer size of BufferedIndexOutput
			payloadData = GenerateRandomData(2000);
			analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
			writer.AddDocument(d);
			
			
			writer.Optimize();
			// flush
			writer.Close();
			
			reader = IndexReader.Open(dir);
			tp = reader.TermPositions(new Term(fieldName, singleTerm));
			tp.Next();
			tp.NextPosition();
			
			verifyPayloadData = new byte[tp.GetPayloadLength()];
			tp.GetPayload(verifyPayloadData, 0);
			byte[] portion = new byte[1500];
			Array.Copy(payloadData, 100, portion, 0, 1500);
			
			AssertByteArrayEquals(portion, verifyPayloadData);
			reader.Close();
		}
예제 #43
0
		public virtual void  TestEmptyDocAfterFlushingRealDoc()
		{
			Directory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			Document doc = new Document();
			doc.Add(new Field("field", "aaa", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
			writer.AddDocument(doc);
			writer.Flush();
			writer.AddDocument(new Document());
			writer.Close();
			IndexReader reader = IndexReader.Open(dir);
			Assert.AreEqual(2, reader.NumDocs());
		}
예제 #44
0
		public virtual void  TestRAMDeletes()
		{
			for (int pass = 0; pass < 2; pass++)
			{
				for (int t = 0; t < 2; t++)
				{
					bool autoCommit = (0 == pass);
					Directory dir = new MockRAMDirectory();
					IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
					modifier.SetMaxBufferedDocs(4);
					modifier.SetMaxBufferedDeleteTerms(4);
					
					int id = 0;
					int value_Renamed = 100;
					
					AddDoc(modifier, ++id, value_Renamed);
					if (0 == t)
						modifier.DeleteDocuments(new Term("value", System.Convert.ToString(value_Renamed)));
					else
						modifier.DeleteDocuments(new TermQuery(new Term("value", System.Convert.ToString(value_Renamed))));
					AddDoc(modifier, ++id, value_Renamed);
					if (0 == t)
					{
						modifier.DeleteDocuments(new Term("value", System.Convert.ToString(value_Renamed)));
						Assert.AreEqual(2, modifier.GetNumBufferedDeleteTerms());
						Assert.AreEqual(1, modifier.GetBufferedDeleteTermsSize());
					}
					else
						modifier.DeleteDocuments(new TermQuery(new Term("value", System.Convert.ToString(value_Renamed))));
					
					AddDoc(modifier, ++id, value_Renamed);
					Assert.AreEqual(0, modifier.GetSegmentCount());
					modifier.Flush();
					
					modifier.Commit();
					
					IndexReader reader = IndexReader.Open(dir);
					Assert.AreEqual(1, reader.NumDocs());
					
					int hitCount = GetHitCount(dir, new Term("id", System.Convert.ToString(id)));
					Assert.AreEqual(1, hitCount);
					reader.Close();
					modifier.Close();
					dir.Close();
				}
			}
		}
        public override void  SetUp()
        {
            base.SetUp();

            /*
             * for (int i = 0; i < testFields.length; i++) {
             * fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
             * }
             */

            System.Array.Sort(testTerms);
            int tokenUpto = 0;

            for (int i = 0; i < testTerms.Length; i++)
            {
                positions[i] = new int[TERM_FREQ];
                offsets[i]   = new TermVectorOffsetInfo[TERM_FREQ];
                // first position must be 0
                for (int j = 0; j < TERM_FREQ; j++)
                {
                    // positions are always sorted in increasing order
                    positions[i][j] = (int)(j * 10 + (new System.Random().NextDouble()) * 10);
                    // offsets are always sorted in increasing order
                    offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].Length);
                    TestToken token = tokens[tokenUpto++] = new TestToken(this);
                    token.text        = testTerms[i];
                    token.pos         = positions[i][j];
                    token.startOffset = offsets[i][j].GetStartOffset();
                    token.endOffset   = offsets[i][j].GetEndOffset();
                }
            }
            System.Array.Sort(tokens);

            IndexWriter writer = new IndexWriter(dir, new MyAnalyzer(this), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetUseCompoundFile(false);
            Document doc = new Document();

            for (int i = 0; i < testFields.Length; i++)
            {
                Field.TermVector tv;
                if (testFieldsStorePos[i] && testFieldsStoreOff[i])
                {
                    tv = Field.TermVector.WITH_POSITIONS_OFFSETS;
                }
                else if (testFieldsStorePos[i] && !testFieldsStoreOff[i])
                {
                    tv = Field.TermVector.WITH_POSITIONS;
                }
                else if (!testFieldsStorePos[i] && testFieldsStoreOff[i])
                {
                    tv = Field.TermVector.WITH_OFFSETS;
                }
                else
                {
                    tv = Field.TermVector.YES;
                }
                doc.Add(new Field(testFields[i], "", Field.Store.NO, Field.Index.ANALYZED, tv));
            }

            //Create 5 documents for testing, they all have the same
            //terms
            for (int j = 0; j < 5; j++)
            {
                writer.AddDocument(doc);
            }
            writer.Flush();
            seg = writer.NewestSegment().name;
            writer.Close();

            fieldInfos = new FieldInfos(dir, seg + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
        }
예제 #46
0
        public virtual void TestDeletes1()
        {
            //IndexWriter.debug2 = System.out;
            Directory         dir = new MockDirectoryWrapper(new J2N.Randomizer(Random.NextInt64()), new RAMDirectory());
            IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergeScheduler(new SerialMergeScheduler());
            iwc.SetMaxBufferedDocs(5000);
            iwc.SetRAMBufferSizeMB(100);
            RangeMergePolicy fsmp = new RangeMergePolicy(this, false);

            iwc.SetMergePolicy(fsmp);
            IndexWriter writer = new IndexWriter(dir, iwc);

            for (int x = 0; x < 5; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "1", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit1");
            writer.Commit();
            Assert.AreEqual(1, writer.SegmentCount);
            for (int x = 5; x < 10; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "2", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }
            //System.out.println("commit2");
            writer.Commit();
            Assert.AreEqual(2, writer.SegmentCount);

            for (int x = 10; x < 15; x++)
            {
                writer.AddDocument(DocHelper.CreateDocument(x, "3", 2));
                //System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
            }

            writer.DeleteDocuments(new Term("id", "1"));

            writer.DeleteDocuments(new Term("id", "11"));

            // flushing without applying deletes means
            // there will still be deletes in the segment infos
            writer.Flush(false, false);
            Assert.IsTrue(writer.bufferedUpdatesStream.Any());

            // get reader flushes pending deletes
            // so there should not be anymore
            IndexReader r1 = writer.GetReader();

            Assert.IsFalse(writer.bufferedUpdatesStream.Any());
            r1.Dispose();

            // delete id:2 from the first segment
            // merge segments 0 and 1
            // which should apply the delete id:2
            writer.DeleteDocuments(new Term("id", "2"));
            writer.Flush(false, false);
            fsmp         = (RangeMergePolicy)writer.Config.MergePolicy;
            fsmp.doMerge = true;
            fsmp.start   = 0;
            fsmp.length  = 2;
            writer.MaybeMerge();

            Assert.AreEqual(2, writer.SegmentCount);

            // id:2 shouldn't exist anymore because
            // it's been applied in the merge and now it's gone
            IndexReader r2 = writer.GetReader();

            int[] id2docs = ToDocsArray(new Term("id", "2"), null, r2);
            Assert.IsTrue(id2docs is null);
            r2.Dispose();

            /*
             * /// // added docs are in the ram buffer
             * /// for (int x = 15; x < 20; x++) {
             * ///  writer.AddDocument(TestIndexWriterReader.CreateDocument(x, "4", 2));
             * ///  System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
             * /// }
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * /// // delete from the ram buffer
             * /// writer.DeleteDocuments(new Term("id", Integer.toString(13)));
             * ///
             * /// Term id3 = new Term("id", Integer.toString(3));
             * ///
             * /// // delete from the 1st segment
             * /// writer.DeleteDocuments(id3);
             * ///
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * ///
             * /// //System.out
             * /// //    .println("segdels1:" + writer.docWriter.deletesToString());
             * ///
             * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
             * ///
             * /// // we cause a merge to happen
             * /// fsmp.doMerge = true;
             * /// fsmp.start = 0;
             * /// fsmp.length = 2;
             * /// System.out.println("maybeMerge "+writer.SegmentInfos);
             * ///
             * /// SegmentInfo info0 = writer.SegmentInfos[0];
             * /// SegmentInfo info1 = writer.SegmentInfos[1];
             * ///
             * /// writer.MaybeMerge();
             * /// System.out.println("maybeMerge after "+writer.SegmentInfos);
             * /// // there should be docs in RAM
             * /// Assert.IsTrue(writer.numRamDocs() > 0);
             * ///
             * /// // assert we've merged the 1 and 2 segments
             * /// // and still have a segment leftover == 2
             * /// Assert.AreEqual(2, writer.SegmentInfos.Size());
             * /// Assert.IsFalse(segThere(info0, writer.SegmentInfos));
             * /// Assert.IsFalse(segThere(info1, writer.SegmentInfos));
             * ///
             * /// //System.out.println("segdels2:" + writer.docWriter.deletesToString());
             * ///
             * /// //Assert.IsTrue(writer.docWriter.segmentDeletes.Size() > 0);
             * ///
             * /// IndexReader r = writer.GetReader();
             * /// IndexReader r1 = r.getSequentialSubReaders()[0];
             * /// printDelDocs(r1.GetLiveDocs());
             * /// int[] docs = toDocsArray(id3, null, r);
             * /// System.out.println("id3 docs:"+Arrays.toString(docs));
             * /// // there shouldn't be any docs for id:3
             * /// Assert.IsTrue(docs is null);
             * /// r.Dispose();
             * ///
             * /// part2(writer, fsmp);
             * ///
             */
            // System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString());
            //System.out.println("close");
            writer.Dispose();
            dir.Dispose();
        }
예제 #47
0
 public void CleanUp() // Cleans up indexer
 {
     writer.Optimize();
     writer.Flush(true, true, true);
     writer.Dispose();
 }
예제 #48
0
		public virtual void  TestPositionIncrementGap()
		{
			Analyzer analyzer = new AnonymousClassAnalyzer(this);
			
			IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			
			Document doc = new Document();
			doc.Add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.ANALYZED));
			
			writer.AddDocument(doc);
			writer.Flush();
			SegmentInfo info = writer.NewestSegment();
			writer.Close();
			SegmentReader reader = SegmentReader.Get(info);
			
			TermPositions termPositions = reader.TermPositions(new Term("repeated", "repeated"));
			Assert.IsTrue(termPositions.Next());
			int freq = termPositions.Freq();
			Assert.AreEqual(2, freq);
			Assert.AreEqual(0, termPositions.NextPosition());
			Assert.AreEqual(502, termPositions.NextPosition());
		}
예제 #49
0
		private SegmentInfo IndexDoc(IndexWriter writer, System.String fileName)
		{
			System.IO.FileInfo file = new System.IO.FileInfo(System.IO.Path.Combine(workDir.FullName, fileName));
			Document doc = FileDocument.Document(file);
			writer.AddDocument(doc);
			writer.Flush();
			return writer.NewestSegment();
		}
예제 #50
0
		public virtual void  TestPreAnalyzedField()
		{
			IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Document doc = new Document();
			
			doc.Add(new Field("preanalyzed", new AnonymousClassTokenStream(this), TermVector.NO));
			
			writer.AddDocument(doc);
			writer.Flush();
			SegmentInfo info = writer.NewestSegment();
			writer.Close();
			SegmentReader reader = SegmentReader.Get(info);
			
			TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1"));
			Assert.IsTrue(termPositions.Next());
			Assert.AreEqual(1, termPositions.Freq());
			Assert.AreEqual(0, termPositions.NextPosition());
			
			termPositions.Seek(new Term("preanalyzed", "term2"));
			Assert.IsTrue(termPositions.Next());
			Assert.AreEqual(2, termPositions.Freq());
			Assert.AreEqual(1, termPositions.NextPosition());
			Assert.AreEqual(3, termPositions.NextPosition());
			
			termPositions.Seek(new Term("preanalyzed", "term3"));
			Assert.IsTrue(termPositions.Next());
			Assert.AreEqual(1, termPositions.Freq());
			Assert.AreEqual(2, termPositions.NextPosition());
		}
예제 #51
0
 /// helper funciton for CreateIndex()
 private void CleanUpIndex()
 {
     writer.Optimize();
     writer.Flush(true, true, true);
     writer.Dispose();
 }
예제 #52
0
 /// <summary>
 /// Flushes the buffer and closes the index
 /// </summary>
 public void CleanUpIndexer()
 {
     writer.Optimize();
     writer.Flush(true, true, true);
     writer.Dispose();
 }
		public virtual void  TestNoWaitClose()
		{
			RAMDirectory directory = new MockRAMDirectory();
			
			Document doc = new Document();
			Field idField = new Field("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
			doc.Add(idField);
			
			for (int pass = 0; pass < 2; pass++)
			{
				bool autoCommit = pass == 0;
				IndexWriter writer = new IndexWriter(directory, autoCommit, ANALYZER, true);
				
				for (int iter = 0; iter < 10; iter++)
				{
					ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
					writer.SetMergeScheduler(cms);
					writer.SetMaxBufferedDocs(2);
					writer.SetMergeFactor(100);
					
					for (int j = 0; j < 201; j++)
					{
						idField.SetValue(System.Convert.ToString(iter * 201 + j));
						writer.AddDocument(doc);
					}
					
					int delID = iter * 201;
					for (int j = 0; j < 20; j++)
					{
						writer.DeleteDocuments(new Term("id", System.Convert.ToString(delID)));
						delID += 5;
					}
					
					// Force a bunch of merge threads to kick off so we
					// stress out aborting them on close:
					writer.SetMergeFactor(3);
					writer.AddDocument(doc);
					writer.Flush();
					
					writer.Close(false);
					
					IndexReader reader = IndexReader.Open(directory);
					Assert.AreEqual((1 + iter) * 182, reader.NumDocs());
					reader.Close();
					
					// Reopen
					writer = new IndexWriter(directory, autoCommit, ANALYZER, false);
				}
				writer.Close();
			}
			
			directory.Close();
		}
        public virtual void TestPartialMerge()
        {
            int num = AtLeast(10);
            for (int iter = 0; iter < num; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + iter);
                }
                Directory dir = NewDirectory();
                IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
                conf.SetMergeScheduler(new SerialMergeScheduler());
                TieredMergePolicy tmp = NewTieredMergePolicy();
                conf.SetMergePolicy(tmp);
                conf.SetMaxBufferedDocs(2);
                tmp.MaxMergeAtOnce = 3;
                tmp.SegmentsPerTier = 6;

                IndexWriter w = new IndexWriter(dir, conf);
                int maxCount = 0;
                int numDocs = TestUtil.NextInt(Random(), 20, 100);
                for (int i = 0; i < numDocs; i++)
                {
                    Document doc = new Document();
                    doc.Add(NewTextField("content", "aaa " + (i % 4), Field.Store.NO));
                    w.AddDocument(doc);
                    int count = w.SegmentCount;
                    maxCount = Math.Max(count, maxCount);
                    Assert.IsTrue(count >= maxCount - 3, "count=" + count + " maxCount=" + maxCount);
                }

                w.Flush(true, true);

                int segmentCount = w.SegmentCount;
                int targetCount = TestUtil.NextInt(Random(), 1, segmentCount);
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: merge to " + targetCount + " segs (current count=" + segmentCount + ")");
                }
                w.ForceMerge(targetCount);
                Assert.AreEqual(targetCount, w.SegmentCount);

                w.Dispose();
                dir.Dispose();
            }
        }
예제 #55
0
        // builds an index with payloads in the given Directory and performs
        // different tests to verify the payload encoding
        private void  PerformTest(Directory dir)
        {
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            // should be in sync with value in TermInfosWriter
            int skipInterval = 16;

            int numTerms = 5;

            System.String fieldName = "f1";

            int numDocs = skipInterval + 1;

            // create content for the test documents with just a few terms
            Term[] terms = GenerateTerms(fieldName, numTerms);
            System.Text.StringBuilder sb = new System.Text.StringBuilder();
            for (int i = 0; i < terms.Length; i++)
            {
                sb.Append(terms[i].text_ForNUnit);
                sb.Append(" ");
            }
            System.String content = sb.ToString();


            int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;

            byte[] payloadData = GenerateRandomData(payloadDataLength);

            Document d = new Document();

            d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED));
            // add the same document multiple times to have the same payload lengths for all
            // occurrences within two consecutive skip intervals
            int offset = 0;

            for (int i = 0; i < 2 * numDocs; i++)
            {
                analyzer.SetPayloadData(fieldName, payloadData, offset, 1);
                offset += numTerms;
                writer.AddDocument(d);
            }

            // make sure we create more than one segment to test merging
            writer.Flush();

            // now we make sure to have different payload lengths next at the next skip point
            for (int i = 0; i < numDocs; i++)
            {
                analyzer.SetPayloadData(fieldName, payloadData, offset, i);
                offset += i * numTerms;
                writer.AddDocument(d);
            }

            writer.Optimize();
            // flush
            writer.Close();


            /*
             * Verify the index
             * first we test if all payloads are stored correctly
             */
            IndexReader reader = IndexReader.Open(dir);

            byte[] verifyPayloadData = new byte[payloadDataLength];
            offset = 0;
            TermPositions[] tps = new TermPositions[numTerms];
            for (int i = 0; i < numTerms; i++)
            {
                tps[i] = reader.TermPositions(terms[i]);
            }

            while (tps[0].Next())
            {
                for (int i = 1; i < numTerms; i++)
                {
                    tps[i].Next();
                }
                int freq = tps[0].Freq();

                for (int i = 0; i < freq; i++)
                {
                    for (int j = 0; j < numTerms; j++)
                    {
                        tps[j].NextPosition();
                        tps[j].GetPayload(verifyPayloadData, offset);
                        offset += tps[j].GetPayloadLength();
                    }
                }
            }

            for (int i = 0; i < numTerms; i++)
            {
                tps[i].Close();
            }

            AssertByteArrayEquals(payloadData, verifyPayloadData);

            /*
             *  test lazy skipping
             */
            TermPositions tp = reader.TermPositions(terms[0]);

            tp.Next();
            tp.NextPosition();
            // now we don't read this payload
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
            byte[] payload = tp.GetPayload(null, 0);
            Assert.AreEqual(payload[0], payloadData[numTerms]);
            tp.NextPosition();

            // we don't read this payload and skip to a different document
            tp.SkipTo(5);
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
            payload = tp.GetPayload(null, 0);
            Assert.AreEqual(payload[0], payloadData[5 * numTerms]);


            /*
             * Test different lengths at skip points
             */
            tp.Seek(terms[1]);
            tp.Next();
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
            tp.SkipTo(skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
            tp.SkipTo(2 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
            tp.SkipTo(3 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayloadLength(), "Wrong payload length.");

            /*
             * Test multiple call of getPayload()
             */
            tp.GetPayload(null, 0);
            try
            {
                // it is forbidden to call getPayload() more than once
                // without calling nextPosition()
                tp.GetPayload(null, 0);
                Assert.Fail("Expected exception not thrown");
            }
            catch (System.Exception expected)
            {
                // expected exception
            }

            reader.Close();

            // test long payload
            analyzer = new PayloadAnalyzer();
            writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            System.String singleTerm = "lucene";

            d = new Document();
            d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED));
            // add a payload whose length is greater than the buffer size of BufferedIndexOutput
            payloadData = GenerateRandomData(2000);
            analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
            writer.AddDocument(d);


            writer.Optimize();
            // flush
            writer.Close();

            reader = IndexReader.Open(dir);
            tp     = reader.TermPositions(new Term(fieldName, singleTerm));
            tp.Next();
            tp.NextPosition();

            verifyPayloadData = new byte[tp.GetPayloadLength()];
            tp.GetPayload(verifyPayloadData, 0);
            byte[] portion = new byte[1500];
            Array.Copy(payloadData, 100, portion, 0, 1500);

            AssertByteArrayEquals(portion, verifyPayloadData);
            reader.Close();
        }