private void  SetUpDirs(Directory dir, Directory aux)
        {
            IndexWriter writer = null;

            writer = NewWriter(dir, true);
            writer.SetMaxBufferedDocs(1000);
            // add 1000 documents in 1 segment
            AddDocs(writer, 1000);
            Assert.AreEqual(1000, writer.DocCount());
            Assert.AreEqual(1, writer.GetSegmentCount());
            writer.Close();

            writer = NewWriter(aux, true);
            writer.SetUseCompoundFile(false);             // use one without a compound file
            writer.SetMaxBufferedDocs(100);
            writer.SetMergeFactor(10);
            // add 30 documents in 3 segments
            for (int i = 0; i < 3; i++)
            {
                AddDocs(writer, 10);
                writer.Close();
                writer = NewWriter(aux, false);
                writer.SetUseCompoundFile(false);                 // use one without a compound file
                writer.SetMaxBufferedDocs(100);
                writer.SetMergeFactor(10);
            }
            Assert.AreEqual(30, writer.DocCount());
            Assert.AreEqual(3, writer.GetSegmentCount());
            writer.Close();
        }
Beispiel #2
0
        public void IndexFile(string filePath)
        {
            PropertyDescriptors descriptors = new PropertyDescriptors();
            descriptors.LoadData(System.Windows.Forms.Application.StartupPath + "\\PropertyDescriptors.xml");
            Analyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer();
            bool create = !(System.IO.Directory.Exists(_idxDir) && IndexReader.IndexExists(_idxDir));
            IndexWriter iw = new IndexWriter(_idxDir, a, create);
            iw.SetUseCompoundFile(true);

            AdDataStream adStream = new AdDataStream(filePath);
            adStream.LoadData();
            foreach (Advert ad in adStream.FetchAd())
            {
                Document doc = new Document();
                foreach (string s in ad.GetDictionary().Keys)
                {
                    string temp = descriptors.GetIndexableFormat(descriptors[s], ad[s]);
                    doc.Add(Field.Text(s, temp));

                }
                iw.AddDocument(doc);
                if (_updateCallback != null)
                {
                    _updateCallback("Added Document: " + ad["Title"]);

                }
            }
            iw.Optimize();
            iw.Close();
        }
Beispiel #3
0
        private Directory MakeIndex()
        {
            Directory dir = new RAMDirectory();

            try
            {
                System.Random r        = NewRandom();
                Analyzer      analyzer = new SimpleAnalyzer();
                IndexWriter   writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

                writer.SetUseCompoundFile(false);

                for (int d = 1; d <= NUM_DOCS; d++)
                {
                    Document doc = new Document();
                    for (int f = 1; f <= NUM_FIELDS; f++)
                    {
                        doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED));
                    }
                    writer.AddDocument(doc);
                }
                writer.Close();
            }
            catch (System.Exception e)
            {
                throw new System.SystemException("", e);
            }
            return(dir);
        }
Beispiel #4
0
		private Directory MakeIndex()
		{
			Directory dir = new RAMDirectory();
			try
			{
				System.Random r = NewRandom();
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
				
				writer.SetUseCompoundFile(false);
				
				for (int d = 1; d <= NUM_DOCS; d++)
				{
					Document doc = new Document();
					for (int f = 1; f <= NUM_FIELDS; f++)
					{
						doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.ANALYZED));
					}
					writer.AddDocument(doc);
				}
				writer.Close();
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("", e);
			}
			return dir;
		}
Beispiel #5
0
        public virtual void  TestNoPrxFile()
        {
            Directory   ram      = new MockRAMDirectory();
            Analyzer    analyzer = new StandardAnalyzer();
            IndexWriter writer   = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMaxBufferedDocs(3);
            writer.SetMergeFactor(2);
            writer.SetUseCompoundFile(false);
            Document d = new Document();

            Field f1 = new Field("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED);

            f1.SetOmitTermFreqAndPositions(true);
            d.Add(f1);

            for (int i = 0; i < 30; i++)
            {
                writer.AddDocument(d);
            }

            writer.Commit();

            AssertNoPrx(ram);

            // force merge
            writer.Optimize();
            // flush
            writer.Close();

            AssertNoPrx(ram);
            _TestUtil.CheckIndex(ram);
            ram.Close();
        }
		private static Directory MakeIndex()
		{
			Directory dir = new RAMDirectory();
			try
			{
				System.Random r = new System.Random((System.Int32) (BASE_SEED + 42));
				Analyzer analyzer = new SimpleAnalyzer();
				IndexWriter writer = new IndexWriter(dir, analyzer, true);
				
				writer.SetUseCompoundFile(false);
				
				for (int d = 1; d <= NUM_DOCS; d++)
				{
					Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
					for (int f = 1; f <= NUM_FIELDS; f++)
					{
						doc.Add(new Field("f" + f, data[f % data.Length] + '#' + data[r.Next(data.Length)], Field.Store.YES, Field.Index.TOKENIZED));
					}
					writer.AddDocument(doc);
				}
				writer.Close();
			}
			catch (System.Exception e)
			{
				throw new System.SystemException("", e);
			}
			return dir;
		}
        public virtual void  TestTargetCFS()
        {
            Directory   dir    = new RAMDirectory();
            IndexWriter writer = NewWriter(dir, true);

            writer.SetUseCompoundFile(false);
            AddDocs(writer, 1);
            writer.Close();

            Directory other = new RAMDirectory();

            writer = NewWriter(other, true);
            writer.SetUseCompoundFile(true);
            writer.AddIndexesNoOptimize(new Directory[] { dir });
            Assert.IsTrue(writer.NewestSegment().GetUseCompoundFile());
            writer.Close();
        }
        public virtual void  TestAddSelf()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // auxiliary directory
            Directory aux = new RAMDirectory();

            IndexWriter writer = null;

            writer = NewWriter(dir, true);
            // add 100 documents
            AddDocs(writer, 100);
            Assert.AreEqual(100, writer.DocCount());
            writer.Close();

            writer = NewWriter(aux, true);
            writer.SetUseCompoundFile(false);             // use one without a compound file
            writer.SetMaxBufferedDocs(1000);
            // add 140 documents in separate files
            AddDocs(writer, 40);
            writer.Close();
            writer = NewWriter(aux, true);
            writer.SetUseCompoundFile(false);             // use one without a compound file
            writer.SetMaxBufferedDocs(1000);
            AddDocs(writer, 100);
            writer.Close();

            writer = NewWriter(dir, false);
            try
            {
                // cannot add self
                writer.AddIndexesNoOptimize(new Directory[] { aux, dir });
                Assert.IsTrue(false);
            }
            catch (System.ArgumentException e)
            {
                Assert.AreEqual(100, writer.DocCount());
            }
            writer.Close();

            // make sure the index is correct
            VerifyNumDocs(dir, 100);
        }
        public void CreateIndex(string databaseFileName)
        {
            IndexWriter writer = new IndexWriter(indexFolderName, new StandardAnalyzer(), true);
            writer.SetUseCompoundFile(false);

            IndexDatabase(writer, Container.GetContainer(databaseFileName));

            writer.Optimize();
            writer.Close();
        }
        private void  CreateIndex(Directory dir)
        {
            IndexWriter iw = new IndexWriter(dir, anlzr, true, IndexWriter.MaxFieldLength.LIMITED);

            iw.SetMaxBufferedDocs(5);
            iw.SetMergeFactor(3);
            iw.SetSimilarity(similarityOne);
            iw.SetUseCompoundFile(true);
            iw.Close();
        }
Beispiel #11
0
        public static IndexWriter GetAzureIndexWriter(this LuceneIndexer indexer)
        {
            indexer.EnsureIndex(false);
            var writer = new IndexWriter(indexer.GetLuceneDirectory(), indexer.IndexingAnalyzer, false, IndexWriter.MaxFieldLength.UNLIMITED);

            writer.SetRAMBufferSizeMB(10.0);
            writer.SetUseCompoundFile(false);
            writer.SetMaxMergeDocs(10000);
            writer.SetMergeFactor(100);
            return writer;
        }
		public override void SetUp()
		{
			base.SetUp();
			fieldInfos = new FieldInfos();
			DocHelper.SetupDoc(testDoc);
			fieldInfos.Add(testDoc);
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetUseCompoundFile(false);
			writer.AddDocument(testDoc);
			writer.Close();
			segmentName = writer.NewestSegment().name;
		}
Beispiel #13
0
 /// <summary> Setting to turn on usage of a compound file. When on, multiple files
 /// for each segment are merged into a single file once the segment creation
 /// is finished. This is done regardless of what directory is in use.
 /// </summary>
 /// <seealso cref="IndexWriter#SetUseCompoundFile(boolean)">
 /// </seealso>
 /// <throws>  IllegalStateException if the index is closed </throws>
 public virtual void  SetUseCompoundFile(bool useCompoundFile)
 {
     lock (directory)
     {
         AssureOpen();
         if (indexWriter != null)
         {
             indexWriter.SetUseCompoundFile(useCompoundFile);
         }
         this.useCompoundFile = useCompoundFile;
     }
 }
        public override void  SetUp()
        {
            base.SetUp();
            fieldInfos = new FieldInfos();
            DocHelper.SetupDoc(testDoc);
            fieldInfos.Add(testDoc);
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetUseCompoundFile(false);
            writer.AddDocument(testDoc);
            writer.Close();
        }
        private void  AddDocs(Directory dir, int ndocs, bool compound)
        {
            IndexWriter iw = new IndexWriter(dir, anlzr, false, IndexWriter.MaxFieldLength.LIMITED);

            iw.SetMaxBufferedDocs(5);
            iw.SetMergeFactor(3);
            iw.SetSimilarity(similarityOne);
            iw.SetUseCompoundFile(compound);
            for (int i = 0; i < ndocs; i++)
            {
                iw.AddDocument(NewDoc());
            }
            iw.Close();
        }
		private void  DoTest(System.IO.StringWriter out_Renamed, bool useCompoundFiles)
		{
			Directory directory = new RAMDirectory();
			Analyzer analyzer = new SimpleAnalyzer();
			IndexWriter writer = new IndexWriter(directory, analyzer, true);
			
			writer.SetUseCompoundFile(useCompoundFiles);
			
			int MAX_DOCS = 225;
			
			for (int j = 0; j < MAX_DOCS; j++)
			{
				Document d = new Document();
				d.Add(Field.Text(PRIORITY_FIELD, HIGH_PRIORITY));
				d.Add(Field.Text(ID_FIELD, System.Convert.ToString(j)));
				writer.AddDocument(d);
			}
			writer.Close();
			
			// try a search without OR
			Searcher searcher = new IndexSearcher(directory);
			Hits hits = null;
			
			QueryParsers.QueryParser parser = new QueryParsers.QueryParser(PRIORITY_FIELD, analyzer);
			
			Query query = parser.Parse(HIGH_PRIORITY);
			out_Renamed.WriteLine("Query: " + query.ToString(PRIORITY_FIELD));
			
			hits = searcher.Search(query);
			PrintHits(out_Renamed, hits);
			CheckHits(hits, MAX_DOCS);
			
			searcher.Close();
			
			// try a new search with OR
			searcher = new IndexSearcher(directory);
			hits = null;
			
			parser = new QueryParsers.QueryParser(PRIORITY_FIELD, analyzer);
			
			query = parser.Parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
			out_Renamed.WriteLine("Query: " + query.ToString(PRIORITY_FIELD));
			
			hits = searcher.Search(query);
			PrintHits(out_Renamed, hits);
			CheckHits(hits, MAX_DOCS);
			
			searcher.Close();
		}
Beispiel #17
0
        static void Main()
        {
            _indexWriter = new IndexWriter(FSDirectory.Open(new DirectoryInfo(_indexPath)), new StandardAnalyzer(), true,
                                           IndexWriter.MaxFieldLength.UNLIMITED);
            _indexWriter.SetUseCompoundFile(false);

            // TODO: we can do this in batches, and trigger another thread to do that
            var sw = ParseDisks(disk => disks.Add(disk));

            Console.WriteLine("Elapsed: " + sw.Elapsed);

            sw.Restart();

            foreach (var disk in disks)
            {
                AddDocShortVersion(disk);
            }

            _indexWriter.SetUseCompoundFile(true);
            _indexWriter.Optimize();
            _indexWriter.Close(true);

            Console.WriteLine("Elapsed: " + sw.Elapsed);
        }
Beispiel #18
0
        private void button1_Click(object sender, EventArgs e)
        {
            //FilterData.PrepareCharMap();

            int total = this.databaseDataSet.trans.Count;
            int counter = 1;
            string fsPath = indexpath;

            if (!System.IO.Directory.Exists(fsPath)) System.IO.Directory.CreateDirectory(fsPath);
            if (IndexReader.IndexExists(fsPath)) return;
            RAMDirectory dir = new RAMDirectory();
            IndexWriter ramWriter = new IndexWriter(dir, new DiacriticAnalyzer(FilterData.stopWords), true);
            IndexWriter fsWriter = new IndexWriter(fsPath, new DiacriticAnalyzer(FilterData.stopWords), !IndexReader.IndexExists(fsPath));
            ramWriter.SetUseCompoundFile(false);
            fsWriter.SetUseCompoundFile(false);
            foreach (DataRow row in this.databaseDataSet.trans.Rows)
            {

                Document doc = new Document();
                string pid = row[this.databaseDataSet.trans.pidColumn].ToString();
                string sid = row[this.databaseDataSet.trans.sidColumn].ToString();
                string ayatno = row[this.databaseDataSet.trans.ayatnoColumn].ToString();

                string arabic = row[this.databaseDataSet.trans.ayat_arabicColumn].ToString();
                string urdu = row[this.databaseDataSet.trans.ayat_urduColumn].ToString();
                string english = row[this.databaseDataSet.trans.ayat_descColumn].ToString();

                doc.Add(Field.Keyword("pid", pid));
                doc.Add(Field.Keyword("sid", sid));
                doc.Add(Field.Keyword("ayatno", ayatno));
                doc.Add(Field.Text("ayat_desc", english));
                doc.Add(Field.Text("ayat_arabic", arabic));
                doc.Add(Field.Text("ayat_urdu", urdu));
                doc.Add(Field.Text("contents", arabic + Environment.NewLine + urdu + Environment.NewLine + english));
                ramWriter.AddDocument(doc);
                int percent = counter * 100 / total;
                this.progressBar1.Value = percent;
                label1.Text = percent.ToString() + "%";
                counter++;
                Application.DoEvents();

            }
            ramWriter.Optimize();
            fsWriter.AddIndexes(new Lucene.Net.Store.Directory[] { dir });
            ramWriter.Close();
            fsWriter.Close();
            MessageBox.Show("Done Indexing!");
        }
        /// <summary>
        /// Constructor
        /// </summary>
        /// <param name="indexPath">Path where you wish to create the index</param>
        /// <param name="mode">One of Create, Append or Search</param>
        public Indexer(string indexPath, IndexMode mode)
        {
            m_indexMode = mode;
            m_bSucess = false;
            m_analyzer = new StandardAnalyzer();

            if (mode == IndexMode.CREATE)
            {
                try
                {
                    m_indexWriter = new IndexWriter(indexPath, m_analyzer, true);
                    m_indexWriter.SetUseCompoundFile(true);
                    m_bSucess = true;
                }
                catch (Exception e)
                {
                    Logger.Instance.LogException(e);
                    m_bSucess = false;
                }
            }
            else if (mode == IndexMode.APPEND)
            {
                try
                {
                    m_indexWriter = new IndexWriter(indexPath, m_analyzer, false);
                    m_indexWriter.SetUseCompoundFile(true);
                    m_bSucess = true;
                }
                catch (Exception e)
                {
                    Logger.Instance.LogException(e);
                    m_bSucess = false;
                }
            }
            else if (mode == IndexMode.SEARCH)
            {
                try
                {
                    m_indexSearcher = new IndexSearcher(indexPath);
                    m_bSucess = true;
                }
                catch (Exception e)
                {
                    Logger.Instance.LogException(e);
                    m_bSucess = false;
                }
            }
        }
Beispiel #20
0
 /// <summary> Close the IndexReader and open an IndexWriter.</summary>
 /// <throws>  IOException </throws>
 protected internal virtual void  CreateIndexWriter()
 {
     if (indexWriter == null)
     {
         if (indexReader != null)
         {
             indexReader.Close();
             indexReader = null;
         }
         indexWriter = new IndexWriter(directory, analyzer, false);
         indexWriter.SetInfoStream(infoStream);
         indexWriter.SetUseCompoundFile(useCompoundFile);
         indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
         indexWriter.SetMaxFieldLength(maxFieldLength);
         indexWriter.SetMergeFactor(mergeFactor);
     }
 }
        public virtual void  TestHangOnClose()
        {
            Directory   dir    = new MockRAMDirectory();
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMergePolicy(new LogByteSizeMergePolicy(writer));
            writer.SetMaxBufferedDocs(5);
            writer.SetUseCompoundFile(false);
            writer.SetMergeFactor(100);

            Document doc = new Document();

            doc.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
            for (int i = 0; i < 60; i++)
            {
                writer.AddDocument(doc);
            }
            writer.SetMaxBufferedDocs(200);
            Document doc2 = new Document();

            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            doc2.Add(new Field("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, Field.Index.NO));
            for (int i = 0; i < 10; i++)
            {
                writer.AddDocument(doc2);
            }
            writer.Close();

            Directory dir2 = new MockRAMDirectory();

            writer = new IndexWriter(dir2, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy(writer);

            lmp.SetMinMergeMB(0.0001);
            writer.SetMergePolicy(lmp);
            writer.SetMergeFactor(4);
            writer.SetUseCompoundFile(false);
            writer.SetMergeScheduler(new SerialMergeScheduler());
            writer.AddIndexesNoOptimize(new Directory[] { dir });
            writer.Close();
            dir.Close();
            dir2.Close();
        }
Beispiel #22
0
		private void  DoTestSearch(System.IO.StreamWriter out_Renamed, bool useCompoundFile)
		{
			Directory directory = new RAMDirectory();
			Analyzer analyzer = new SimpleAnalyzer();
			IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			
			writer.SetUseCompoundFile(useCompoundFile);
			
			System.String[] docs = new System.String[]{"a b c d e", "a b c d e a b c d e", "a b c d e f g h i j", "a c e", "e c a", "a c e a c e", "a c e a b c"};
			for (int j = 0; j < docs.Length; j++)
			{
				Document d = new Document();
				d.Add(new Field("contents", docs[j], Field.Store.YES, Field.Index.ANALYZED));
				writer.AddDocument(d);
			}
			writer.Close();
			
			Searcher searcher = new IndexSearcher(directory);
			
			System.String[] queries = new System.String[]{"a b", "\"a b\"", "\"a b c\"", "a c", "\"a c\"", "\"a c e\""};
			ScoreDoc[] hits = null;
			
			QueryParser parser = new QueryParser("contents", analyzer);
			parser.SetPhraseSlop(4);
			for (int j = 0; j < queries.Length; j++)
			{
				Query query = parser.Parse(queries[j]);
				out_Renamed.WriteLine("Query: " + query.ToString("contents"));
				
				//DateFilter filter =
				//  new DateFilter("modified", Time(1997,0,1), Time(1998,0,1));
				//DateFilter filter = DateFilter.Before("modified", Time(1997,00,01));
				//System.out.println(filter);
				
				hits = searcher.Search(query, null, 1000).scoreDocs;
				
				out_Renamed.WriteLine(hits.Length + " total results");
				for (int i = 0; i < hits.Length && i < 10; i++)
				{
					Document d = searcher.Doc(hits[i].doc);
					out_Renamed.WriteLine(i + " " + hits[i].score + " " + d.Get("contents"));
				}
			}
			searcher.Close();
		}
        public virtual void  TestCloseStoredFields()
        {
            Directory   dir = new MockRAMDirectory();
            IndexWriter w   = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

            w.SetUseCompoundFile(false);
            Document doc = new Document();

            doc.Add(new Field("field", "yes it's stored", Field.Store.YES, Field.Index.ANALYZED));
            w.AddDocument(doc);
            w.Close();
            IndexReader r1 = IndexReader.Open(dir);
            IndexReader r2 = r1.Clone(false);

            r1.Close();
            r2.Close();
            dir.Close();
        }
        public virtual void  TestIndexing()
        {
            Directory   mainDir = new MockRAMDirectory();
            IndexWriter writer  = new IndexWriter(mainDir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);

            writer.SetUseCompoundFile(false);
            IndexReader reader = writer.GetReader();             // start pooling readers

            reader.Close();
            writer.SetMergeFactor(2);
            writer.SetMaxBufferedDocs(10);
            RunThread[] indexThreads = new RunThread[4];
            for (int x = 0; x < indexThreads.Length; x++)
            {
                indexThreads[x]      = new RunThread(this, x % 2, writer);
                indexThreads[x].Name = "Thread " + x;
                indexThreads[x].Start();
            }
            long startTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
            long duration  = 5 * 1000;

            while (((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - startTime) < duration)
            {
                System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 100));
            }
            int delCount = 0;
            int addCount = 0;

            for (int x = 0; x < indexThreads.Length; x++)
            {
                indexThreads[x].run_Renamed_Field = false;
                Assert.IsTrue(indexThreads[x].ex == null);
                addCount += indexThreads[x].addCount;
                delCount += indexThreads[x].delCount;
            }
            for (int x = 0; x < indexThreads.Length; x++)
            {
                indexThreads[x].Join();
            }
            //System.out.println("addCount:"+addCount);
            //System.out.println("delCount:"+delCount);
            writer.Close();
            mainDir.Close();
        }
Beispiel #25
0
        public virtual void  TestKeepNoneOnInitDeletionPolicy()
        {
            for (int pass = 0; pass < 4; pass++)
            {
                bool autoCommit      = pass < 2;
                bool useCompoundFile = (pass % 2) > 0;

                KeepNoneOnInitDeletionPolicy policy = new KeepNoneOnInitDeletionPolicy(this);

                Directory dir = new RAMDirectory();

                IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                writer.SetMaxBufferedDocs(10);
                writer.SetUseCompoundFile(useCompoundFile);
                for (int i = 0; i < 107; i++)
                {
                    AddDoc(writer);
                }
                writer.Close();

                writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
                writer.SetUseCompoundFile(useCompoundFile);
                writer.Optimize();
                writer.Close();

                Assert.AreEqual(2, policy.numOnInit);
                if (!autoCommit)
                {
                    // If we are not auto committing then there should
                    // be exactly 2 commits (one per close above):
                    Assert.AreEqual(2, policy.numOnCommit);
                }

                // Simplistic check: just verify the index is in fact
                // readable:
                IndexReader reader = IndexReader.Open(dir);
                reader.Close();

                dir.Close();
            }
        }
        private void  CreateIndex(int numHits)
        {
            int numDocs = 500;

            Directory   directory = new SeekCountingDirectory(this);
            IndexWriter writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetUseCompoundFile(false);
            writer.SetMaxBufferedDocs(10);
            for (int i = 0; i < numDocs; i++)
            {
                Document      doc = new Document();
                System.String content;
                if (i % (numDocs / numHits) == 0)
                {
                    // add a document that matches the query "term1 term2"
                    content = this.term1 + " " + this.term2;
                }
                else if (i % 15 == 0)
                {
                    // add a document that only contains term1
                    content = this.term1 + " " + this.term1;
                }
                else
                {
                    // add a document that contains term2 but not term 1
                    content = this.term3 + " " + this.term2;
                }

                doc.Add(new Field(this.field, content, Field.Store.YES, Field.Index.ANALYZED));
                writer.AddDocument(doc);
            }

            // make sure the index has only a single segment
            writer.Optimize();
            writer.Close();

            SegmentReader reader = SegmentReader.GetOnlySegmentReader(directory);

            this.searcher = new IndexSearcher(reader);
        }
		public virtual void  TestIndexing()
		{
			Directory mainDir = new MockRAMDirectory();
			IndexWriter writer = new IndexWriter(mainDir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
			writer.SetUseCompoundFile(false);
			IndexReader reader = writer.GetReader(); // start pooling readers
			reader.Close();
			writer.SetMergeFactor(2);
			writer.SetMaxBufferedDocs(10);
			RunThread[] indexThreads = new RunThread[4];
			for (int x = 0; x < indexThreads.Length; x++)
			{
				indexThreads[x] = new RunThread(this, x % 2, writer);
				indexThreads[x].Name = "Thread " + x;
				indexThreads[x].Start();
			}
			long startTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
			long duration = 5 * 1000;
			while (((DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond) - startTime) < duration)
			{
				System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * 100));
			}
			int delCount = 0;
			int addCount = 0;
			for (int x = 0; x < indexThreads.Length; x++)
			{
				indexThreads[x].run_Renamed_Field = false;
				Assert.IsTrue(indexThreads[x].ex == null);
				addCount += indexThreads[x].addCount;
				delCount += indexThreads[x].delCount;
			}
			for (int x = 0; x < indexThreads.Length; x++)
			{
				indexThreads[x].Join();
			}
			//System.out.println("addCount:"+addCount);
			//System.out.println("delCount:"+delCount);
			writer.Close();
			mainDir.Close();
		}
        public virtual void  TestSimpleCase()
        {
            System.String[] keywords  = new System.String[] { "1", "2" };
            System.String[] unindexed = new System.String[] { "Netherlands", "Italy" };
            System.String[] unstored  = new System.String[] { "Amsterdam has lots of bridges", "Venice has lots of canals" };
            System.String[] text      = new System.String[] { "Amsterdam", "Venice" };

            for (int pass = 0; pass < 2; pass++)
            {
                bool autoCommit = (0 == pass);

                Directory   dir      = new MockRAMDirectory();
                IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
                modifier.SetUseCompoundFile(true);
                modifier.SetMaxBufferedDeleteTerms(1);

                for (int i = 0; i < keywords.Length; i++)
                {
                    Document doc = new Document();
                    doc.Add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
                    doc.Add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO));
                    doc.Add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED));
                    doc.Add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED));
                    modifier.AddDocument(doc);
                }
                modifier.Optimize();
                modifier.Commit();

                Term term     = new Term("city", "Amsterdam");
                int  hitCount = GetHitCount(dir, term);
                Assert.AreEqual(1, hitCount);
                modifier.DeleteDocuments(term);
                modifier.Commit();
                hitCount = GetHitCount(dir, term);
                Assert.AreEqual(0, hitCount);

                modifier.Close();
                dir.Close();
            }
        }
Beispiel #29
0
 /// <summary> Close the IndexReader and open an IndexWriter.</summary>
 /// <throws>  CorruptIndexException if the index is corrupt </throws>
 /// <throws>  LockObtainFailedException if another writer </throws>
 /// <summary>  has this index open (<code>write.lock</code> could not
 /// be obtained)
 /// </summary>
 /// <throws>  IOException if there is a low-level IO error </throws>
 protected internal virtual void  CreateIndexWriter()
 {
     if (indexWriter == null)
     {
         if (indexReader != null)
         {
             indexReader.Close();
             indexReader = null;
         }
         indexWriter = new IndexWriter(directory, analyzer, false, new IndexWriter.MaxFieldLength(maxFieldLength));
         // IndexModifier cannot use ConcurrentMergeScheduler
         // because it synchronizes on the directory which can
         // cause deadlock
         indexWriter.SetMergeScheduler(new SerialMergeScheduler());
         indexWriter.SetInfoStream(infoStream);
         indexWriter.SetUseCompoundFile(useCompoundFile);
         if (maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH)
         {
             indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
         }
         indexWriter.SetMergeFactor(mergeFactor);
     }
 }
        public virtual void  CreateIndex(System.String dirName, bool doCFS)
        {
            RmDir(dirName);

            dirName = FullDir(dirName);

            Directory   dir    = FSDirectory.Open(new System.IO.FileInfo(dirName));
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetUseCompoundFile(doCFS);
            writer.SetMaxBufferedDocs(10);

            for (int i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            Assert.AreEqual(35, writer.DocCount(), "wrong doc count");
            writer.Close();

            // open fresh writer so we get no prx file in the added segment
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            writer.SetUseCompoundFile(doCFS);
            writer.SetMaxBufferedDocs(10);
            AddNoProxDoc(writer);
            writer.Close();

            // Delete one doc so we get a .del file:
            IndexReader reader     = IndexReader.Open(dir);
            Term        searchTerm = new Term("id", "7");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "didn't delete the right number of documents");

            // Set one norm so we get a .s0 file:
            reader.SetNorm(21, "content", (float)1.5);
            reader.Close();
        }
		/// <summary>
		/// IndexWriter that can be used to apply updates to an index
		/// </summary>
		/// <param name="indexPath">File system path to the target index</param>
		/// <param name="oAnalyzer">Lucene Analyzer to be used by the underlying IndexWriter</param>
		/// <param name="bCompoundFile">Setting to dictate if the index should use compound format</param>
		/// <returns></returns>
		private IndexWriter GetIndexWriter(string indexPath, Analyzer oAnalyzer, bool bCompoundFile)
		{
			bool bExists = System.IO.Directory.Exists(indexPath);
			if (bExists==false)
				System.IO.Directory.CreateDirectory(indexPath);
			bExists=IndexReader.IndexExists(FSDirectory.GetDirectory(indexPath, false));
			IndexWriter idxWriter = new IndexWriter(indexPath, oAnalyzer, !bExists);
			idxWriter.SetUseCompoundFile(bCompoundFile);
			return idxWriter;
		}
		public virtual void  TestVariableSchema()
		{
			MockRAMDirectory dir = new MockRAMDirectory();
			int delID = 0;
			for (int i = 0; i < 20; i++)
			{
				IndexWriter writer = new IndexWriter(dir, false, new WhitespaceAnalyzer());
				writer.SetMaxBufferedDocs(2);
				writer.SetMergeFactor(2);
				writer.SetUseCompoundFile(false);
				Document doc = new Document();
				System.String contents = "aa bb cc dd ee ff gg hh ii jj kk";
				
				if (i == 7)
				{
					// Add empty docs here
					doc.Add(new Field("content3", "", Field.Store.NO, Field.Index.TOKENIZED));
				}
				else
				{
					Field.Store storeVal;
					if (i % 2 == 0)
					{
						doc.Add(new Field("content4", contents, Field.Store.YES, Field.Index.TOKENIZED));
						storeVal = Field.Store.YES;
					}
					else
						storeVal = Field.Store.NO;
					doc.Add(new Field("content1", contents, storeVal, Field.Index.TOKENIZED));
					doc.Add(new Field("content3", "", Field.Store.YES, Field.Index.TOKENIZED));
					doc.Add(new Field("content5", "", storeVal, Field.Index.TOKENIZED));
				}
				
				for (int j = 0; j < 4; j++)
					writer.AddDocument(doc);
				
				writer.Close();
				IndexReader reader = IndexReader.Open(dir);
				reader.DeleteDocument(delID++);
				reader.Close();
				
				if (0 == i % 4)
				{
					writer = new IndexWriter(dir, false, new WhitespaceAnalyzer());
					writer.SetUseCompoundFile(false);
					writer.Optimize();
					writer.Close();
				}
			}
		}
		public override void  SetUp()
		{
			base.SetUp();
			/*
			for (int i = 0; i < testFields.length; i++) {
			fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
			}
			*/
			
			System.Array.Sort(testTerms);
			int tokenUpto = 0;
			for (int i = 0; i < testTerms.Length; i++)
			{
				positions[i] = new int[TERM_FREQ];
				offsets[i] = new TermVectorOffsetInfo[TERM_FREQ];
				// first position must be 0
				for (int j = 0; j < TERM_FREQ; j++)
				{
					// positions are always sorted in increasing order
					positions[i][j] = (int) (j * 10 + (new System.Random().NextDouble()) * 10);
					// offsets are always sorted in increasing order
					offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].Length);
					TestToken token = tokens[tokenUpto++] = new TestToken(this);
					token.text = testTerms[i];
					token.pos = positions[i][j];
					token.startOffset = offsets[i][j].GetStartOffset();
					token.endOffset = offsets[i][j].GetEndOffset();
				}
			}
			System.Array.Sort(tokens);
			
			IndexWriter writer = new IndexWriter(dir, new MyAnalyzer(this), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetUseCompoundFile(false);
			Document doc = new Document();
			for (int i = 0; i < testFields.Length; i++)
			{
				Field.TermVector tv;
				if (testFieldsStorePos[i] && testFieldsStoreOff[i])
					tv = Field.TermVector.WITH_POSITIONS_OFFSETS;
				else if (testFieldsStorePos[i] && !testFieldsStoreOff[i])
					tv = Field.TermVector.WITH_POSITIONS;
				else if (!testFieldsStorePos[i] && testFieldsStoreOff[i])
					tv = Field.TermVector.WITH_OFFSETS;
				else
					tv = Field.TermVector.YES;
				doc.Add(new Field(testFields[i], "", Field.Store.NO, Field.Index.ANALYZED, tv));
			}
			
			//Create 5 documents for testing, they all have the same
			//terms
			for (int j = 0; j < 5; j++)
				writer.AddDocument(doc);
			writer.Flush();
			seg = writer.NewestSegment().name;
			writer.Close();
			
			fieldInfos = new FieldInfos(dir, seg + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
		}
        public override void  SetUp()
        {
            base.SetUp();

            /*
             * for (int i = 0; i < testFields.length; i++) {
             * fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
             * }
             */

            System.Array.Sort(testTerms);
            int tokenUpto = 0;

            for (int i = 0; i < testTerms.Length; i++)
            {
                positions[i] = new int[TERM_FREQ];
                offsets[i]   = new TermVectorOffsetInfo[TERM_FREQ];
                // first position must be 0
                for (int j = 0; j < TERM_FREQ; j++)
                {
                    // positions are always sorted in increasing order
                    positions[i][j] = (int)(j * 10 + (new System.Random().NextDouble()) * 10);
                    // offsets are always sorted in increasing order
                    offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].Length);
                    TestToken token = tokens[tokenUpto++] = new TestToken(this);
                    token.text        = testTerms[i];
                    token.pos         = positions[i][j];
                    token.startOffset = offsets[i][j].GetStartOffset();
                    token.endOffset   = offsets[i][j].GetEndOffset();
                }
            }
            System.Array.Sort(tokens);

            IndexWriter writer = new IndexWriter(dir, new MyAnalyzer(this), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetUseCompoundFile(false);
            Document doc = new Document();

            for (int i = 0; i < testFields.Length; i++)
            {
                Field.TermVector tv;
                if (testFieldsStorePos[i] && testFieldsStoreOff[i])
                {
                    tv = Field.TermVector.WITH_POSITIONS_OFFSETS;
                }
                else if (testFieldsStorePos[i] && !testFieldsStoreOff[i])
                {
                    tv = Field.TermVector.WITH_POSITIONS;
                }
                else if (!testFieldsStorePos[i] && testFieldsStoreOff[i])
                {
                    tv = Field.TermVector.WITH_OFFSETS;
                }
                else
                {
                    tv = Field.TermVector.YES;
                }
                doc.Add(new Field(testFields[i], "", Field.Store.NO, Field.Index.ANALYZED, tv));
            }

            //Create 5 documents for testing, they all have the same
            //terms
            for (int j = 0; j < 5; j++)
            {
                writer.AddDocument(doc);
            }
            writer.Flush();
            seg = writer.NewestSegment().name;
            writer.Close();

            fieldInfos = new FieldInfos(dir, seg + "." + IndexFileNames.FIELD_INFOS_EXTENSION);
        }
Beispiel #35
0
		private void  AddDocs(Directory dir, int ndocs, bool compound)
		{
			IndexWriter iw = new IndexWriter(dir, anlzr, false, IndexWriter.MaxFieldLength.LIMITED);
			iw.SetMaxBufferedDocs(5);
			iw.SetMergeFactor(3);
			iw.SetSimilarity(similarityOne);
			iw.SetUseCompoundFile(compound);
			for (int i = 0; i < ndocs; i++)
			{
				iw.AddDocument(NewDoc());
			}
			iw.Close();
		}
 /// <summary>
 /// 建立内存索引。。。
 /// </summary>
 /// <param name="ramdir">内存索引</param>
 public IntranetIndexer(Lucene.Net.Store.Directory ramdir)
 {
     writer = new IndexWriter(ramdir, new StandardAnalyzer(), true);
     writer.SetUseCompoundFile(true);
 }
		public virtual void  TestKeepAllDeletionPolicy()
		{
			
			for (int pass = 0; pass < 4; pass++)
			{
				
				bool autoCommit = pass < 2;
				bool useCompoundFile = (pass % 2) > 0;
				
				KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this);
				
				Directory dir = new RAMDirectory();
				
				IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
				writer.SetMaxBufferedDocs(10);
				writer.SetUseCompoundFile(useCompoundFile);
				for (int i = 0; i < 107; i++)
				{
					AddDoc(writer);
				}
				writer.Close();
				
				writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
				writer.SetUseCompoundFile(useCompoundFile);
				writer.Optimize();
				writer.Close();
				
				Assert.AreEqual(2, policy.numOnInit);
				if (autoCommit)
				{
					Assert.IsTrue(policy.numOnCommit > 2);
				}
				else
				{
					// If we are not auto committing then there should
					// be exactly 2 commits (one per close above):
					Assert.AreEqual(2, policy.numOnCommit);
				}
				
				// Simplistic check: just verify all segments_N's still
				// exist, and, I can open a reader on each:
				dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
				long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);
				while (gen > 0)
				{
					IndexReader reader = IndexReader.Open(dir);
					reader.Close();
					dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
					gen--;
					
					if (gen > 0)
					{
						// Now that we've removed a commit point, which
						// should have orphan'd at least one index file.
						// Open & close a writer and assert that it
						// actually removed something:
						int preCount = dir.List().Length;
						writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false, policy);
						writer.Close();
						int postCount = dir.List().Length;
						Assert.IsTrue(postCount < preCount);
					}
				}
				
				dir.Close();
			}
		}
        public virtual void  TestErrorAfterApplyDeletes()
        {
            MockRAMDirectory.Failure failure = new AnonymousClassFailure(this);

            // create a couple of files

            System.String[] keywords  = new System.String[] { "1", "2" };
            System.String[] unindexed = new System.String[] { "Netherlands", "Italy" };
            System.String[] unstored  = new System.String[] { "Amsterdam has lots of bridges", "Venice has lots of canals" };
            System.String[] text      = new System.String[] { "Amsterdam", "Venice" };

            for (int pass = 0; pass < 2; pass++)
            {
                bool             autoCommit = (0 == pass);
                MockRAMDirectory dir        = new MockRAMDirectory();
                IndexWriter      modifier   = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
                modifier.SetUseCompoundFile(true);
                modifier.SetMaxBufferedDeleteTerms(2);

                dir.FailOn(failure.Reset());

                for (int i = 0; i < keywords.Length; i++)
                {
                    Document doc = new Document();
                    doc.Add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
                    doc.Add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO));
                    doc.Add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED));
                    doc.Add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED));
                    modifier.AddDocument(doc);
                }
                // flush (and commit if ac)

                modifier.Optimize();
                modifier.Commit();

                // one of the two files hits

                Term term     = new Term("city", "Amsterdam");
                int  hitCount = GetHitCount(dir, term);
                Assert.AreEqual(1, hitCount);

                // open the writer again (closed above)

                // delete the doc
                // max buf del terms is two, so this is buffered

                modifier.DeleteDocuments(term);

                // add a doc (needed for the !ac case; see below)
                // doc remains buffered

                Document doc2 = new Document();
                modifier.AddDocument(doc2);

                // commit the changes, the buffered deletes, and the new doc

                // The failure object will fail on the first write after the del
                // file gets created when processing the buffered delete

                // in the ac case, this will be when writing the new segments
                // files so we really don't need the new doc, but it's harmless

                // in the !ac case, a new segments file won't be created but in
                // this case, creation of the cfs file happens next so we need
                // the doc (to test that it's okay that we don't lose deletes if
                // failing while creating the cfs file)

                bool failed = false;
                try
                {
                    modifier.Commit();
                }
                catch (System.IO.IOException ioe)
                {
                    failed = true;
                }

                Assert.IsTrue(failed);

                // The commit above failed, so we need to retry it (which will
                // succeed, because the failure is a one-shot)

                modifier.Commit();

                hitCount = GetHitCount(dir, term);

                // Make sure the delete was successfully flushed:
                Assert.AreEqual(0, hitCount);

                modifier.Close();
                dir.Close();
            }
        }
Beispiel #39
0
        private void menuItemOptimize_Click(object sender, System.EventArgs e)
        {
            if (indexReader == null)
            {
                ShowStatus(resources.GetString("NoIndex"));
                return;
            }
            if (_readOnly)
            {
                ShowStatus(resources.GetString("Readonly"));
                return;
            }

            try
            {
                indexReader.Close();
                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
                writer.SetUseCompoundFile(useCompound);
                long startSize = FilesTabPage.CalcTotalFileSize(dir);
                DateTime startTime = DateTime.Now;
                writer.Optimize();
                DateTime endTime = DateTime.Now;
                long endSize = FilesTabPage.CalcTotalFileSize(dir);
                long deltaSize = startSize - endSize;
                String sign = deltaSize < 0 ? " Increased " : " Reduced ";
                String sizeMsg = sign + FilesTabPage.NormalizeSize(Math.Abs(deltaSize)) + FilesTabPage.NormalizeUnit(Math.Abs(deltaSize));
                String timeMsg = ((TimeSpan)(endTime - startTime)).TotalMilliseconds + " ms";
                ShowStatus(sizeMsg + " in " + timeMsg);
                tabFiles.ShowFiles(dir);
                writer.Close();
                indexReader = IndexReader.Open(dir, true);

                InitOverview();
            }
            catch (Exception exc)
            {
                ShowStatus(exc.Message);
            }
        }
		public virtual void  TestExpirationTimeDeletionPolicy()
		{
			
			double SECONDS = 2.0;
			
			bool autoCommit = false;
			bool useCompoundFile = true;
			
			Directory dir = new RAMDirectory();
			ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(this, dir, SECONDS);
			IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
			writer.SetUseCompoundFile(useCompoundFile);
			writer.Close();
			
			long lastDeleteTime = 0;
			for (int i = 0; i < 7; i++)
			{
				// Record last time when writer performed deletes of
				// past commits
				lastDeleteTime = (System.DateTime.Now.Ticks - 621355968000000000) / 10000;
				writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
				writer.SetUseCompoundFile(useCompoundFile);
				for (int j = 0; j < 17; j++)
				{
					AddDoc(writer);
				}
				writer.Close();
				
				// Make sure to sleep long enough so that some commit
				// points will be deleted:
				System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * (int) (1000.0 * (SECONDS / 5.0))));
			}
			
			// First, make sure the policy in fact deleted something:
			Assert.IsTrue(policy.numDelete > 0, "no commits were deleted");
			
			// Then simplistic check: just verify that the
			// segments_N's that still exist are in fact within SECONDS
			// seconds of the last one's mod time, and, that I can
			// open a reader on each:
			long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);
			
			System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);
			dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
			while (gen > 0)
			{
				try
				{
					IndexReader reader = IndexReader.Open(dir);
					reader.Close();
					fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);
					long modTime = dir.FileModified(fileName);
					Assert.IsTrue(lastDeleteTime - modTime <= (SECONDS * 1000), "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted");
				}
				catch (System.IO.IOException)
				{
					// OK
					break;
				}
				
				dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
				gen--;
			}
			
			dir.Close();
		}
        public virtual void  TestLazyPerformance()
        {
            System.String      tmpIODir = SupportClass.AppSettings.Get("tempDir", "");
            System.String      userName = System.Environment.UserName;
            System.String      path     = tmpIODir + System.IO.Path.DirectorySeparatorChar.ToString() + "lazyDir" + userName;
            System.IO.FileInfo file     = new System.IO.FileInfo(path);
            _TestUtil.RmDir(file);
            FSDirectory tmpDir = FSDirectory.Open(file);

            Assert.IsTrue(tmpDir != null);

            IndexWriter writer = new IndexWriter(tmpDir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetUseCompoundFile(false);
            writer.AddDocument(testDoc);
            writer.Close();

            Assert.IsTrue(fieldInfos != null);
            FieldsReader reader;
            long         lazyTime    = 0;
            long         regularTime = 0;
            int          length      = 50;

            System.Collections.Hashtable lazyFieldNames = new System.Collections.Hashtable();
            SupportClass.CollectionsHelper.AddIfNotContains(lazyFieldNames, DocHelper.LARGE_LAZY_FIELD_KEY);
            SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(new System.Collections.Hashtable(), lazyFieldNames);

            for (int i = 0; i < length; i++)
            {
                reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos);
                Assert.IsTrue(reader != null);
                Assert.IsTrue(reader.Size() == 1);

                Document doc;
                doc = reader.Doc(0, null);                 //Load all of them
                Assert.IsTrue(doc != null, "doc is null and it shouldn't be");
                Fieldable field = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
                Assert.IsTrue(field.IsLazy() == false, "field is lazy");
                System.String value_Renamed;
                long          start;
                long          finish;
                start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                //On my machine this was always 0ms.
                value_Renamed = field.StringValue();
                finish        = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
                Assert.IsTrue(field != null, "field is null and it shouldn't be");
                regularTime += (finish - start);
                reader.Close();
                reader = null;
                doc    = null;
                //Hmmm, are we still in cache???
                System.GC.Collect();
                reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos);
                doc    = reader.Doc(0, fieldSelector);
                field  = doc.GetFieldable(DocHelper.LARGE_LAZY_FIELD_KEY);
                Assert.IsTrue(field.IsLazy() == true, "field is not lazy");
                start = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                //On my machine this took around 50 - 70ms
                value_Renamed = field.StringValue();
                finish        = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                Assert.IsTrue(value_Renamed != null, "value is null and it shouldn't be");
                lazyTime += (finish - start);
                reader.Close();
            }
            System.Console.Out.WriteLine("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
            System.Console.Out.WriteLine("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
        }
		public virtual void  TestErrorAfterApplyDeletes()
		{
			
			MockRAMDirectory.Failure failure = new AnonymousClassFailure(this);
			
			// create a couple of files
			
			System.String[] keywords = new System.String[]{"1", "2"};
			System.String[] unindexed = new System.String[]{"Netherlands", "Italy"};
			System.String[] unstored = new System.String[]{"Amsterdam has lots of bridges", "Venice has lots of canals"};
			System.String[] text = new System.String[]{"Amsterdam", "Venice"};
			
			for (int pass = 0; pass < 2; pass++)
			{
				bool autoCommit = (0 == pass);
				MockRAMDirectory dir = new MockRAMDirectory();
				IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
				modifier.SetUseCompoundFile(true);
				modifier.SetMaxBufferedDeleteTerms(2);
				
				dir.FailOn(failure.Reset());
				
				for (int i = 0; i < keywords.Length; i++)
				{
					Document doc = new Document();
					doc.Add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
					doc.Add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO));
					doc.Add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED));
					doc.Add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED));
					modifier.AddDocument(doc);
				}
				// flush (and commit if ac)
				
				modifier.Optimize();
				modifier.Commit();
				
				// one of the two files hits
				
				Term term = new Term("city", "Amsterdam");
				int hitCount = GetHitCount(dir, term);
				Assert.AreEqual(1, hitCount);
				
				// open the writer again (closed above)
				
				// delete the doc
				// max buf del terms is two, so this is buffered
				
				modifier.DeleteDocuments(term);
				
				// add a doc (needed for the !ac case; see below)
				// doc remains buffered
				
				Document doc2 = new Document();
				modifier.AddDocument(doc2);
				
				// commit the changes, the buffered deletes, and the new doc
				
				// The failure object will fail on the first write after the del
				// file gets created when processing the buffered delete
				
				// in the ac case, this will be when writing the new segments
				// files so we really don't need the new doc, but it's harmless
				
				// in the !ac case, a new segments file won't be created but in
				// this case, creation of the cfs file happens next so we need
				// the doc (to test that it's okay that we don't lose deletes if
				// failing while creating the cfs file)
				
				bool failed = false;
				try
				{
					modifier.Commit();
				}
				catch (System.IO.IOException ioe)
				{
					failed = true;
				}
				
				Assert.IsTrue(failed);
				
				// The commit above failed, so we need to retry it (which will
				// succeed, because the failure is a one-shot)
				
				modifier.Commit();
				
				hitCount = GetHitCount(dir, term);
				
				// Make sure the delete was successfully flushed:
				Assert.AreEqual(0, hitCount);
				
				modifier.Close();
				dir.Close();
			}
		}
		public virtual void  TestSimpleCase()
		{
			System.String[] keywords = new System.String[]{"1", "2"};
			System.String[] unindexed = new System.String[]{"Netherlands", "Italy"};
			System.String[] unstored = new System.String[]{"Amsterdam has lots of bridges", "Venice has lots of canals"};
			System.String[] text = new System.String[]{"Amsterdam", "Venice"};
			
			for (int pass = 0; pass < 2; pass++)
			{
				bool autoCommit = (0 == pass);
				
				Directory dir = new MockRAMDirectory();
				IndexWriter modifier = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true);
				modifier.SetUseCompoundFile(true);
				modifier.SetMaxBufferedDeleteTerms(1);
				
				for (int i = 0; i < keywords.Length; i++)
				{
					Document doc = new Document();
					doc.Add(new Field("id", keywords[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
					doc.Add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO));
					doc.Add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED));
					doc.Add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED));
					modifier.AddDocument(doc);
				}
				modifier.Optimize();
				modifier.Commit();
				
				Term term = new Term("city", "Amsterdam");
				int hitCount = GetHitCount(dir, term);
				Assert.AreEqual(1, hitCount);
				modifier.DeleteDocuments(term);
				modifier.Commit();
				hitCount = GetHitCount(dir, term);
				Assert.AreEqual(0, hitCount);
				
				modifier.Close();
				dir.Close();
			}
		}
Beispiel #44
0
        public virtual void  TestKeepLastNDeletionPolicy()
        {
            int N = 5;

            for (int pass = 0; pass < 4; pass++)
            {
                bool autoCommit      = pass < 2;
                bool useCompoundFile = (pass % 2) > 0;

                Directory dir = new RAMDirectory();

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);

                for (int j = 0; j < N + 1; j++)
                {
                    IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                    writer.SetMaxBufferedDocs(10);
                    writer.SetUseCompoundFile(useCompoundFile);
                    for (int i = 0; i < 17; i++)
                    {
                        AddDoc(writer);
                    }
                    writer.Optimize();
                    writer.Close();
                }

                Assert.IsTrue(policy.numDelete > 0);
                Assert.AreEqual(N + 1, policy.numOnInit);
                if (autoCommit)
                {
                    Assert.IsTrue(policy.numOnCommit > 1);
                }
                else
                {
                    Assert.AreEqual(N + 1, policy.numOnCommit);
                }

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);
                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = IndexReader.Open(dir);
                        reader.Close();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits prior to last " + N);
                        }
                    }
                    catch (System.IO.IOException e)
                    {
                        if (i != N)
                        {
                            throw e;
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Close();
            }
        }
		public virtual void  TestDeleteLeftoverFiles()
		{
			
			Directory dir = new RAMDirectory();
			
			IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			writer.SetMaxBufferedDocs(10);
			int i;
			for (i = 0; i < 35; i++)
			{
				AddDoc(writer, i);
			}
			writer.SetUseCompoundFile(false);
			for (; i < 45; i++)
			{
				AddDoc(writer, i);
			}
			writer.Close();
			
			// Delete one doc so we get a .del file:
			IndexReader reader = IndexReader.Open(dir);
			Term searchTerm = new Term("id", "7");
			int delCount = reader.DeleteDocuments(searchTerm);
			Assert.AreEqual(1, delCount, "didn't delete the right number of documents");
			
			// Set one norm so we get a .s0 file:
			reader.SetNorm(21, "content", (float) 1.5);
			reader.Close();
			
			// Now, artificially create an extra .del file & extra
			// .s0 file:
			System.String[] files = dir.List();
			
			/*
			for(int i=0;i<files.length;i++) {
			System.out.println(i + ": " + files[i]);
			}
			*/
			
			// The numbering of fields can vary depending on which
			// JRE is in use.  On some JREs we see content bound to
			// field 0; on others, field 1.  So, here we have to
			// figure out which field number corresponds to
			// "content", and then set our expected file names below
			// accordingly:
			CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs");
			FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
			int contentFieldIndex = - 1;
			for (i = 0; i < fieldInfos.Size(); i++)
			{
				FieldInfo fi = fieldInfos.FieldInfo(i);
				if (fi.Name_ForNUnitTest.Equals("content"))
				{
					contentFieldIndex = i;
					break;
				}
			}
			cfsReader.Close();
			Assert.IsTrue(contentFieldIndex != - 1, "could not locate the 'content' field number in the _2.cfs segment");
			
			System.String normSuffix = "s" + contentFieldIndex;
			
			// Create a bogus separate norms file for a
			// segment/field that actually has a separate norms file
			// already:
			CopyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix);
			
			// Create a bogus separate norms file for a
			// segment/field that actually has a separate norms file
			// already, using the "not compound file" extension:
			CopyFile(dir, "_2_1." + normSuffix, "_2_2.f" + contentFieldIndex);
			
			// Create a bogus separate norms file for a
			// segment/field that does not have a separate norms
			// file already:
			CopyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix);
			
			// Create a bogus separate norms file for a
			// segment/field that does not have a separate norms
			// file already using the "not compound file" extension:
			CopyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex);
			
			// Create a bogus separate del file for a
			// segment that already has a separate del file: 
			CopyFile(dir, "_0_1.del", "_0_2.del");
			
			// Create a bogus separate del file for a
			// segment that does not yet have a separate del file:
			CopyFile(dir, "_0_1.del", "_1_1.del");
			
			// Create a bogus separate del file for a
			// non-existent segment:
			CopyFile(dir, "_0_1.del", "_188_1.del");
			
			// Create a bogus segment file:
			CopyFile(dir, "_0.cfs", "_188.cfs");
			
			// Create a bogus fnm file when the CFS already exists:
			CopyFile(dir, "_0.cfs", "_0.fnm");
			
			// Create a deletable file:
			CopyFile(dir, "_0.cfs", "deletable");
			
			// Create some old segments file:
			CopyFile(dir, "segments_a", "segments");
			CopyFile(dir, "segments_a", "segments_2");
			
			// Create a bogus cfs file shadowing a non-cfs segment:
			CopyFile(dir, "_2.cfs", "_3.cfs");
			
			System.String[] filesPre = dir.List();
			
			// Open & close a writer: it should delete the above 4
			// files and nothing more:
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
			writer.Close();
			
			System.String[] files2 = dir.List();
			dir.Close();
			
			System.Array.Sort(files);
			System.Array.Sort(files2);
			
			if (!ArrayEquals(files, files2))
			{
				Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n    " + AsString(files) + "\n  actual files:\n    " + AsString(files2));
			}
		}
Beispiel #46
0
        public virtual void  TestKeepLastNDeletionPolicyWithCreates()
        {
            int N = 10;

            for (int pass = 0; pass < 4; pass++)
            {
                bool autoCommit      = pass < 2;
                bool useCompoundFile = (pass % 2) > 0;

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);

                Directory   dir    = new RAMDirectory();
                IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                writer.SetMaxBufferedDocs(10);
                writer.SetUseCompoundFile(useCompoundFile);
                writer.Close();
                Term  searchTerm = new Term("content", "aaa");
                Query query      = new TermQuery(searchTerm);

                for (int i = 0; i < N + 1; i++)
                {
                    writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
                    writer.SetMaxBufferedDocs(10);
                    writer.SetUseCompoundFile(useCompoundFile);
                    for (int j = 0; j < 17; j++)
                    {
                        AddDoc(writer);
                    }
                    // this is a commit when autoCommit=false:
                    writer.Close();
                    IndexReader reader = IndexReader.Open(dir, policy);
                    reader.DeleteDocument(3);
                    reader.SetNorm(5, "content", 2.0F);
                    IndexSearcher searcher = new IndexSearcher(reader);
                    ScoreDoc[]    hits     = searcher.Search(query, null, 1000).scoreDocs;
                    Assert.AreEqual(16, hits.Length);
                    // this is a commit when autoCommit=false:
                    reader.Close();
                    searcher.Close();

                    writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                    // This will not commit: there are no changes
                    // pending because we opened for "create":
                    writer.Close();
                }

                Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit);
                if (!autoCommit)
                {
                    Assert.AreEqual(3 * (N + 1), policy.numOnCommit);
                }

                IndexSearcher searcher2 = new IndexSearcher(dir);
                ScoreDoc[]    hits2     = searcher2.Search(query, null, 1000).scoreDocs;
                Assert.AreEqual(0, hits2.Length);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);

                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                int expectedCount = 0;

                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = IndexReader.Open(dir);

                        // Work backwards in commits on what the expected
                        // count should be.  Only check this in the
                        // autoCommit false case:
                        if (!autoCommit)
                        {
                            searcher2 = new IndexSearcher(reader);
                            hits2     = searcher2.Search(query, null, 1000).scoreDocs;
                            Assert.AreEqual(expectedCount, hits2.Length);
                            searcher2.Close();
                            if (expectedCount == 0)
                            {
                                expectedCount = 16;
                            }
                            else if (expectedCount == 16)
                            {
                                expectedCount = 17;
                            }
                            else if (expectedCount == 17)
                            {
                                expectedCount = 0;
                            }
                        }
                        reader.Close();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits before last " + N);
                        }
                    }
                    catch (System.IO.IOException e)
                    {
                        if (i != N)
                        {
                            throw e;
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    }
                    gen--;
                }

                dir.Close();
            }
        }
		public virtual void  TestKeepLastNDeletionPolicy()
		{
			
			int N = 5;
			
			for (int pass = 0; pass < 4; pass++)
			{
				
				bool autoCommit = pass < 2;
				bool useCompoundFile = (pass % 2) > 0;
				
				Directory dir = new RAMDirectory();
				
				KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);
				
				for (int j = 0; j < N + 1; j++)
				{
					IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
					writer.SetMaxBufferedDocs(10);
					writer.SetUseCompoundFile(useCompoundFile);
					for (int i = 0; i < 17; i++)
					{
						AddDoc(writer);
					}
					writer.Optimize();
					writer.Close();
				}
				
				Assert.IsTrue(policy.numDelete > 0);
				Assert.AreEqual(N + 1, policy.numOnInit);
				if (autoCommit)
				{
					Assert.IsTrue(policy.numOnCommit > 1);
				}
				else
				{
					Assert.AreEqual(N + 1, policy.numOnCommit);
				}
				
				// Simplistic check: just verify only the past N segments_N's still
				// exist, and, I can open a reader on each:
				dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
				long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);
				for (int i = 0; i < N + 1; i++)
				{
					try
					{
						IndexReader reader = IndexReader.Open(dir);
						reader.Close();
						if (i == N)
						{
							Assert.Fail("should have failed on commits prior to last " + N);
						}
					}
					catch (System.IO.IOException e)
					{
						if (i != N)
						{
							throw e;
						}
					}
					if (i < N)
					{
						dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
					}
					gen--;
				}
				
				dir.Close();
			}
		}
Beispiel #48
0
		private void  CreateIndex(Directory dir)
		{
			IndexWriter iw = new IndexWriter(dir, anlzr, true, IndexWriter.MaxFieldLength.LIMITED);
			iw.SetMaxBufferedDocs(5);
			iw.SetMergeFactor(3);
			iw.SetSimilarity(similarityOne);
			iw.SetUseCompoundFile(true);
			iw.Close();
		}
		public virtual void  TestKeepLastNDeletionPolicyWithCreates()
		{
			
			int N = 10;
			
			for (int pass = 0; pass < 4; pass++)
			{
				
				bool autoCommit = pass < 2;
				bool useCompoundFile = (pass % 2) > 0;
				
				KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);
				
				Directory dir = new RAMDirectory();
				IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
				writer.SetMaxBufferedDocs(10);
				writer.SetUseCompoundFile(useCompoundFile);
				writer.Close();
				Term searchTerm = new Term("content", "aaa");
				Query query = new TermQuery(searchTerm);
				
				for (int i = 0; i < N + 1; i++)
				{
					
					writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
					writer.SetMaxBufferedDocs(10);
					writer.SetUseCompoundFile(useCompoundFile);
					for (int j = 0; j < 17; j++)
					{
						AddDoc(writer);
					}
					// this is a commit when autoCommit=false:
					writer.Close();
					IndexReader reader = IndexReader.Open(dir, policy);
					reader.DeleteDocument(3);
					reader.SetNorm(5, "content", 2.0F);
					IndexSearcher searcher = new IndexSearcher(reader);
					Hits hits = searcher.Search(query);
					Assert.AreEqual(16, hits.Length());
					// this is a commit when autoCommit=false:
					reader.Close();
					searcher.Close();
					
					writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
					// This will not commit: there are no changes
					// pending because we opened for "create":
					writer.Close();
				}
				
				Assert.AreEqual(1 + 3 * (N + 1), policy.numOnInit);
				if (autoCommit)
				{
					Assert.IsTrue(policy.numOnCommit > 3 * (N + 1) - 1);
				}
				else
				{
					Assert.AreEqual(2 * (N + 1), policy.numOnCommit);
				}
				
				IndexSearcher searcher2 = new IndexSearcher(dir);
				Hits hits2 = searcher2.Search(query);
				Assert.AreEqual(0, hits2.Length());
				
				// Simplistic check: just verify only the past N segments_N's still
				// exist, and, I can open a reader on each:
				long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);
				
				dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
				int expectedCount = 0;
				
				for (int i = 0; i < N + 1; i++)
				{
					try
					{
						IndexReader reader = IndexReader.Open(dir);
						
						// Work backwards in commits on what the expected
						// count should be.  Only check this in the
						// autoCommit false case:
						if (!autoCommit)
						{
							searcher2 = new IndexSearcher(reader);
							hits2 = searcher2.Search(query);
							Assert.AreEqual(expectedCount, hits2.Length());
							searcher2.Close();
							if (expectedCount == 0)
							{
								expectedCount = 16;
							}
							else if (expectedCount == 16)
							{
								expectedCount = 17;
							}
							else if (expectedCount == 17)
							{
								expectedCount = 0;
							}
						}
						reader.Close();
						if (i == N)
						{
							Assert.Fail("should have failed on commits before last " + N);
						}
					}
					catch (System.IO.IOException e)
					{
						if (i != N)
						{
							throw e;
						}
					}
					if (i < N)
					{
						dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
					}
					gen--;
				}
				
				dir.Close();
			}
		}
 /// <summary>
 /// Creates a new index in <c>directory</c>. Overwrites the existing index in that directory.
 /// </summary>
 /// <param name="directory">Path to index (will be created if not existing).</param>
 public IntranetIndexer(string directory)
 {
     writer = new IndexWriter(directory, new StandardAnalyzer(), true);
     writer.SetUseCompoundFile(true);
 }
Beispiel #51
0
 /// <summary> Close the IndexReader and open an IndexWriter.</summary>
 /// <throws>  CorruptIndexException if the index is corrupt </throws>
 /// <throws>  LockObtainFailedException if another writer </throws>
 /// <summary>  has this index open (<c>write.lock</c> could not
 /// be obtained)
 /// </summary>
 /// <throws>  IOException if there is a low-level IO error </throws>
 protected internal virtual void CreateIndexWriter()
 {
     if (indexWriter == null)
     {
         if (indexReader != null)
         {
             indexReader.Close();
             indexReader = null;
         }
         indexWriter = new IndexWriter(directory, analyzer, false, new IndexWriter.MaxFieldLength(maxFieldLength));
         // IndexModifier cannot use ConcurrentMergeScheduler
         // because it synchronizes on the directory which can
         // cause deadlock
         indexWriter.SetMergeScheduler(new SerialMergeScheduler());
         indexWriter.SetInfoStream(infoStream);
         indexWriter.SetUseCompoundFile(useCompoundFile);
         if (maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH)
             indexWriter.SetMaxBufferedDocs(maxBufferedDocs);
         indexWriter.SetMergeFactor(mergeFactor);
     }
 }
Beispiel #52
0
        /// <summary>
        /// Creates the index in the specified path, using the corpusReader object
        /// as the documents feed
        /// </summary>
        /// <param name="corpusReader"></param>
        /// <param name="indexPath"></param>
        public void CreateIndex(WikiDumpReader corpusReader, string indexPath)
        {
            cr = corpusReader;

            var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29);

            writer = new IndexWriter(FSDirectory.Open(new DirectoryInfo(indexPath)), analyzer, true,
                                         IndexWriter.MaxFieldLength.UNLIMITED);
            writer.SetUseCompoundFile(false);

            // This will be called whenever a document is read by the provided ICorpusReader
            corpusReader.OnDocument += corpusDoc =>
            {
                if (corpusReader.AbortReading)
                    return;

                // Blaaaah that's ugly. Make sure parsing doesn't stick us in an infinite loop
                var t = Task.Factory.StartNew(() => corpusDoc.AsHtml());
                var timeout = t.Wait(TimeSpan.FromMinutes(2));
                var content = timeout ? t.Result : string.Empty;

                // skip blank documents, they are worthless to us (even though they have a title we could index)
                if (string.IsNullOrEmpty(content))
                    return;

                // Create a new index document
                var doc = new Document();
                doc.Add(new Field("Id", corpusDoc.Id, Field.Store.YES,
                    Field.Index.NOT_ANALYZED_NO_NORMS));

                // Add title field
                var titleField = new Field("Title", corpusDoc.Title, Field.Store.YES,
                    Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
                titleField.SetBoost(3.0f);
                doc.Add(titleField);

                doc.Add(new Field("Content", content, Field.Store.COMPRESS,
                    Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));

                writer.AddDocument(doc);
            };

            // Progress reporting
            corpusReader.OnProgress += (percentage, status, isRunning) =>
            {
                var pi = new ProgressInfo { IsStillRunning = true, Status = string.Format("{0} ({1}%)", status, percentage) };
                Invoke(new ProgressChangedDelegate(UpdateProgress), null, new ProgressChangedEventArgs(percentage, pi));
            };

            // Execute corpus reading, which will trigger indexing for each document found
            corpusReader.Read();
            cr = null;

            // Clean up and close
            writer.SetUseCompoundFile(true);
            writer.Optimize();
            writer.Close();
            writer = null;

            var pi1 = new ProgressInfo { IsStillRunning = false, Status = "Ready" };
            Invoke(new ProgressChangedDelegate(UpdateProgress), null, new ProgressChangedEventArgs(100, pi1));
        }
Beispiel #53
0
        public virtual void  TestExpirationTimeDeletionPolicy()
        {
            double SECONDS = 2.0;

            bool autoCommit      = false;
            bool useCompoundFile = true;

            Directory dir = new RAMDirectory();
            ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(this, dir, SECONDS);
            IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);

            writer.SetUseCompoundFile(useCompoundFile);
            writer.Close();

            long lastDeleteTime = 0;

            for (int i = 0; i < 7; i++)
            {
                // Record last time when writer performed deletes of
                // past commits
                lastDeleteTime = (DateTime.Now.Ticks / TimeSpan.TicksPerMillisecond);
                writer         = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
                writer.SetUseCompoundFile(useCompoundFile);
                for (int j = 0; j < 17; j++)
                {
                    AddDoc(writer);
                }
                writer.Close();

                // Make sure to sleep long enough so that some commit
                // points will be deleted:
                System.Threading.Thread.Sleep(new System.TimeSpan((System.Int64) 10000 * (int)(1000.0 * (SECONDS / 5.0))));
            }

            // First, make sure the policy in fact deleted something:
            Assert.IsTrue(policy.numDelete > 0, "no commits were deleted");

            // Then simplistic check: just verify that the
            // segments_N's that still exist are in fact within SECONDS
            // seconds of the last one's mod time, and, that I can
            // open a reader on each:
            long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);

            System.String fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);
            dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
            while (gen > 0)
            {
                try
                {
                    IndexReader reader = IndexReader.Open(dir);
                    reader.Close();
                    fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);
                    long modTime = dir.FileModified(fileName);
                    Assert.IsTrue(lastDeleteTime - modTime <= (SECONDS * 1000), "commit point was older than " + SECONDS + " seconds (" + (lastDeleteTime - modTime) + " msec) but did not get deleted");
                }
                catch (System.IO.IOException e)
                {
                    // OK
                    break;
                }

                dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                gen--;
            }

            dir.Close();
        }
        public virtual void  TestSimpleCase()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // two auxiliary directories
            Directory aux  = new RAMDirectory();
            Directory aux2 = new RAMDirectory();

            IndexWriter writer = null;

            writer = NewWriter(dir, true);
            // add 100 documents
            AddDocs(writer, 100);
            Assert.AreEqual(100, writer.DocCount());
            writer.Close();

            writer = NewWriter(aux, true);
            writer.SetUseCompoundFile(false);             // use one without a compound file
            // add 40 documents in separate files
            AddDocs(writer, 40);
            Assert.AreEqual(40, writer.DocCount());
            writer.Close();

            writer = NewWriter(aux2, true);
            // add 40 documents in compound files
            AddDocs2(writer, 50);
            Assert.AreEqual(50, writer.DocCount());
            writer.Close();

            // test doc count before segments are merged
            writer = NewWriter(dir, false);
            Assert.AreEqual(100, writer.DocCount());
            writer.AddIndexesNoOptimize(new Directory[] { aux, aux2 });
            Assert.AreEqual(190, writer.DocCount());
            writer.Close();

            // make sure the old index is correct
            VerifyNumDocs(aux, 40);

            // make sure the new index is correct
            VerifyNumDocs(dir, 190);

            // now add another set in.
            Directory aux3 = new RAMDirectory();

            writer = NewWriter(aux3, true);
            // add 40 documents
            AddDocs(writer, 40);
            Assert.AreEqual(40, writer.DocCount());
            writer.Close();

            // test doc count before segments are merged/index is optimized
            writer = NewWriter(dir, false);
            Assert.AreEqual(190, writer.DocCount());
            writer.AddIndexesNoOptimize(new Directory[] { aux3 });
            Assert.AreEqual(230, writer.DocCount());
            writer.Close();

            // make sure the new index is correct
            VerifyNumDocs(dir, 230);

            VerifyTermDocs(dir, new Term("content", "aaa"), 180);

            VerifyTermDocs(dir, new Term("content", "bbb"), 50);

            // now optimize it.
            writer = NewWriter(dir, false);
            writer.Optimize();
            writer.Close();

            // make sure the new index is correct
            VerifyNumDocs(dir, 230);

            VerifyTermDocs(dir, new Term("content", "aaa"), 180);

            VerifyTermDocs(dir, new Term("content", "bbb"), 50);

            // now add a single document
            Directory aux4 = new RAMDirectory();

            writer = NewWriter(aux4, true);
            AddDocs2(writer, 1);
            writer.Close();

            writer = NewWriter(dir, false);
            Assert.AreEqual(230, writer.DocCount());
            writer.AddIndexesNoOptimize(new Directory[] { aux4 });
            Assert.AreEqual(231, writer.DocCount());
            writer.Close();

            VerifyNumDocs(dir, 231);

            VerifyTermDocs(dir, new Term("content", "bbb"), 51);
        }
Beispiel #55
0
        public virtual void  TestKeepAllDeletionPolicy()
        {
            for (int pass = 0; pass < 4; pass++)
            {
                bool autoCommit      = pass < 2;
                bool useCompoundFile = (pass % 2) > 0;

                // Never deletes a commit
                KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this);

                Directory dir = new RAMDirectory();
                policy.dir = dir;

                IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
                writer.SetMaxBufferedDocs(10);
                writer.SetUseCompoundFile(useCompoundFile);
                writer.SetMergeScheduler(new SerialMergeScheduler());
                for (int i = 0; i < 107; i++)
                {
                    AddDoc(writer);
                    if (autoCommit && i % 10 == 0)
                    {
                        writer.Commit();
                    }
                }
                writer.Close();

                writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
                writer.SetUseCompoundFile(useCompoundFile);
                writer.Optimize();
                writer.Close();

                Assert.AreEqual(2, policy.numOnInit);
                if (!autoCommit)
                {
                    // If we are not auto committing then there should
                    // be exactly 2 commits (one per close above):
                    Assert.AreEqual(2, policy.numOnCommit);
                }

                // Test listCommits
                System.Collections.ICollection commits = IndexReader.ListCommits(dir);
                if (!autoCommit)
                {
                    // 1 from opening writer + 2 from closing writer
                    Assert.AreEqual(3, commits.Count);
                }
                // 1 from opening writer + 2 from closing writer +
                // 11 from calling writer.commit() explicitly above
                else
                {
                    Assert.AreEqual(14, commits.Count);
                }

                System.Collections.IEnumerator it = commits.GetEnumerator();
                // Make sure we can open a reader on each commit:
                while (it.MoveNext())
                {
                    IndexCommit commit = (IndexCommit)it.Current;
                    IndexReader r      = IndexReader.Open(commit, null);
                    r.Close();
                }

                // Simplistic check: just verify all segments_N's still
                // exist, and, I can open a reader on each:
                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN);
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir);
                while (gen > 0)
                {
                    IndexReader reader = IndexReader.Open(dir);
                    reader.Close();
                    dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
                    gen--;

                    if (gen > 0)
                    {
                        // Now that we've removed a commit point, which
                        // should have orphan'd at least one index file.
                        // Open & close a writer and assert that it
                        // actually removed something:
                        int preCount = dir.ListAll().Length;
                        writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.LIMITED);
                        writer.Close();
                        int postCount = dir.ListAll().Length;
                        Assert.IsTrue(postCount < preCount);
                    }
                }

                dir.Close();
            }
        }
		public virtual void  TestKeepNoneOnInitDeletionPolicy()
		{
			
			for (int pass = 0; pass < 4; pass++)
			{
				
				bool autoCommit = pass < 2;
				bool useCompoundFile = (pass % 2) > 0;
				
				KeepNoneOnInitDeletionPolicy policy = new KeepNoneOnInitDeletionPolicy(this);
				
				Directory dir = new RAMDirectory();
				
				IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
				writer.SetMaxBufferedDocs(10);
				writer.SetUseCompoundFile(useCompoundFile);
				for (int i = 0; i < 107; i++)
				{
					AddDoc(writer);
				}
				writer.Close();
				
				writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
				writer.SetUseCompoundFile(useCompoundFile);
				writer.Optimize();
				writer.Close();
				
				Assert.AreEqual(2, policy.numOnInit);
				if (autoCommit)
				{
					Assert.IsTrue(policy.numOnCommit > 2);
				}
				else
				{
					// If we are not auto committing then there should
					// be exactly 2 commits (one per close above):
					Assert.AreEqual(2, policy.numOnCommit);
				}
				
				// Simplistic check: just verify the index is in fact
				// readable:
				IndexReader reader = IndexReader.Open(dir);
				reader.Close();
				
				dir.Close();
			}
		}
Beispiel #57
0
        public virtual void  TestDeleteLeftoverFiles()
        {
            Directory dir = new RAMDirectory();

            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetMaxBufferedDocs(10);
            int i;

            for (i = 0; i < 35; i++)
            {
                AddDoc(writer, i);
            }
            writer.SetUseCompoundFile(false);
            for (; i < 45; i++)
            {
                AddDoc(writer, i);
            }
            writer.Close();

            // Delete one doc so we get a .del file:
            IndexReader reader     = IndexReader.Open(dir);
            Term        searchTerm = new Term("id", "7");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "didn't delete the right number of documents");

            // Set one norm so we get a .s0 file:
            reader.SetNorm(21, "content", (float)1.5);
            reader.Close();

            // Now, artificially create an extra .del file & extra
            // .s0 file:
            System.String[] files = dir.ListAll();

            /*
             * for(int j=0;j<files.length;j++) {
             * System.out.println(j + ": " + files[j]);
             * }
             */

            // The numbering of fields can vary depending on which
            // JRE is in use.  On some JREs we see content bound to
            // field 0; on others, field 1.  So, here we have to
            // figure out which field number corresponds to
            // "content", and then set our expected file names below
            // accordingly:
            CompoundFileReader cfsReader  = new CompoundFileReader(dir, "_2.cfs");
            FieldInfos         fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
            int contentFieldIndex         = -1;

            for (i = 0; i < fieldInfos.Size(); i++)
            {
                FieldInfo fi = fieldInfos.FieldInfo(i);
                if (fi.name_ForNUnit.Equals("content"))
                {
                    contentFieldIndex = i;
                    break;
                }
            }
            cfsReader.Close();
            Assert.IsTrue(contentFieldIndex != -1, "could not locate the 'content' field number in the _2.cfs segment");

            System.String normSuffix = "s" + contentFieldIndex;

            // Create a bogus separate norms file for a
            // segment/field that actually has a separate norms file
            // already:
            CopyFile(dir, "_2_1." + normSuffix, "_2_2." + normSuffix);

            // Create a bogus separate norms file for a
            // segment/field that actually has a separate norms file
            // already, using the "not compound file" extension:
            CopyFile(dir, "_2_1." + normSuffix, "_2_2.f" + contentFieldIndex);

            // Create a bogus separate norms file for a
            // segment/field that does not have a separate norms
            // file already:
            CopyFile(dir, "_2_1." + normSuffix, "_1_1." + normSuffix);

            // Create a bogus separate norms file for a
            // segment/field that does not have a separate norms
            // file already using the "not compound file" extension:
            CopyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex);

            // Create a bogus separate del file for a
            // segment that already has a separate del file:
            CopyFile(dir, "_0_1.del", "_0_2.del");

            // Create a bogus separate del file for a
            // segment that does not yet have a separate del file:
            CopyFile(dir, "_0_1.del", "_1_1.del");

            // Create a bogus separate del file for a
            // non-existent segment:
            CopyFile(dir, "_0_1.del", "_188_1.del");

            // Create a bogus segment file:
            CopyFile(dir, "_0.cfs", "_188.cfs");

            // Create a bogus fnm file when the CFS already exists:
            CopyFile(dir, "_0.cfs", "_0.fnm");

            // Create a deletable file:
            CopyFile(dir, "_0.cfs", "deletable");

            // Create some old segments file:
            CopyFile(dir, "segments_3", "segments");
            CopyFile(dir, "segments_3", "segments_2");

            // Create a bogus cfs file shadowing a non-cfs segment:
            CopyFile(dir, "_2.cfs", "_3.cfs");

            System.String[] filesPre = dir.ListAll();

            // Open & close a writer: it should delete the above 4
            // files and nothing more:
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED);
            writer.Close();

            System.String[] files2 = dir.ListAll();
            dir.Close();

            System.Array.Sort(files);
            System.Array.Sort(files2);

            System.Collections.Hashtable dif = DifFiles(files, files2);

            if (!SupportClass.CollectionsHelper.Equals(files, files2))
            {
                Assert.Fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.Length - files.Length) + " files but only deleted " + (filesPre.Length - files2.Length) + "; expected files:\n    " + AsString(files) + "\n  actual files:\n    " + AsString(files2) + "\ndif: " + SupportClass.CollectionsHelper.CollectionToString(dif));
            }
        }