Optimize() public method

Requests an "optimize" operation on an index, priming the index for the fastest available search. Traditionally this has meant merging all segments into a single segment as is done in the default merge policy, but individaul merge policies may implement optimize in different ways.

It is recommended that this method be called upon completion of indexing. In environments with frequent updates, optimize is best done during low volume times, if at all.

See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion.

Note that optimize requires 2X the index size free space in your Directory (3X if you're using compound file format). For example, if your index size is 10 MB then you need 20 MB free for optimize to complete (30 MB if you're using compound fiel format).

If some but not all readers re-open while an optimize is underway, this will cause > 2X temporary space to be consumed as those new readers will then hold open the partially optimized segments at that time. It is best not to re-open readers while optimize is running.

The actual temporary usage could be much less than these figures (it depends on many factors).

In general, once the optimize completes, the total size of the index will be less than the size of the starting index. It could be quite a bit smaller (if there were many pending deletes) or just slightly smaller.

If an Exception is hit during optimize(), for example due to disk full, the index will not be corrupt and no documents will have been lost. However, it may have been partially optimized (some segments were merged but not all), and it's possible that one of the segments in the index will be in non-compound format even when using compound file format. This will occur when the Exception is hit during conversion of the segment into compound format.

This call will optimize those segments present in the index when the call started. If other threads are still adding documents and flushing segments, those newly created segments will not be optimized unless you call optimize again.

NOTE: if this method hits an OutOfMemoryError you should immediately close the writer. See above for details.

public Optimize ( ) : void
return void
Ejemplo n.º 1
1
        public void SetUp()
        {

            var writer = new IndexWriter(store, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

            var doc = new Document();
            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Optimize();
            writer.Close();
        }
Ejemplo n.º 2
0
        public virtual void  TestRAMDirectorySize()
        {
            Directory        dir    = FSDirectory.Open(indexDir);
            MockRAMDirectory ramDir = new MockRAMDirectory(dir);

            dir.Close();
            IndexWriter writer = new IndexWriter(ramDir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.LIMITED, null);

            writer.Optimize(null);

            Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes());

            ThreadClass[] threads = new ThreadClass[numThreads];
            for (int i = 0; i < numThreads; i++)
            {
                int num = i;
                threads[i] = new AnonymousClassThread(num, writer, ramDir, this);
            }
            for (int i = 0; i < numThreads; i++)
            {
                threads[i].Start();
            }
            for (int i = 0; i < numThreads; i++)
            {
                threads[i].Join();
            }

            writer.Optimize(null);
            Assert.AreEqual(ramDir.SizeInBytes(), ramDir.GetRecomputedSizeInBytes());

            writer.Close();
        }
Ejemplo n.º 3
0
        public ActionResult Process()
        {
            GetCoursesMessage message = new GetCoursesMessage();
            MvcContrib.Bus.Send(message);

            List<Course> courses = (message.Result.Data as List<Course>);

            if (courses == null)
                return RedirectToAction("Index");

            Directory directory = FSDirectory.Open(new System.IO.DirectoryInfo(Server.MapPath("~/Data/Index")));
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
            IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
            Document document;

            try
            {

                foreach (Course course in courses)
                {
                    document = new Document();

                    document.Add(new Field("Type", "Course", Field.Store.YES, Field.Index.NO));
                    document.Add(new Field("ID", course.Id.ToString(), Field.Store.YES, Field.Index.NO));
                    document.Add(new Field("Name", course.Name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
                    document.Add(new Field("Owner", course.Owner, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));

                    writer.AddDocument(document);

                    GetNodesMessage courseMessage = new GetNodesMessage { Input = new GetNodesInput { CourseId = course.Id } };
                    MvcContrib.Bus.Send(courseMessage);

                    List<Node> nodes = (courseMessage.Result.Data as List<Node>);

                    foreach (Node node in nodes)
                    {
                        ProcessNode(writer, node);
                    }
                }
            }
            catch (Exception e)
            {
                writer.Optimize();
                writer.Close();

                throw e;
            }

            writer.Optimize();
            writer.Close();

            return RedirectToAction("Index");
        }
Ejemplo n.º 4
0
        private void CreateIndex()
        {
            string sresult = "";
            try
            {
                //读取数据库数据
                SqlDataReader myred = ExecuteQuery();

                //建立索引字段
                //Lucene.Net.Analysis.Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_CURRENT);
                StockFooAnalyzer analyzer = new StockFooAnalyzer(System.Configuration.ConfigurationManager.AppSettings["AnalyzerPath"].ToString());

                FSDirectory dy = FSDirectory.Open(new DirectoryInfo(Server.MapPath("IndexDirectory")));
                IndexWriter writer = new IndexWriter(dy, analyzer, true,IndexWriter.MaxFieldLength.LIMITED);
                while (myred.Read())
                {
                    AddDocument(writer, myred["title"].ToString(), myred["url"].ToString(), myred["site"].ToString(), myred["body"].ToString(), myred["publish_time"].ToString());
                }
                myred.Close();
                myred.Dispose();

                writer.Optimize();
                writer.Close();

                sresult = "ok";
            }
            catch(Exception ex)
            {
                sresult = ex.Message;
            }
            Response.Write(sresult);
            Response.Flush();
            Response.End();
        }
Ejemplo n.º 5
0
        public void TestSpanRegex()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
            Document doc = new Document();
            // doc.Add(new Field("field", "the quick brown fox jumps over the lazy dog",
            // Field.Store.NO, Field.Index.ANALYZED));
            // writer.AddDocument(doc);
            // doc = new Document();
            doc.Add(new Field("field", "auto update", Field.Store.NO,
                Field.Index.ANALYZED));
            writer.AddDocument(doc);
            doc = new Document();
            doc.Add(new Field("field", "first auto update", Field.Store.NO,
                Field.Index.ANALYZED));
            writer.AddDocument(doc);
            writer.Optimize();
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(directory, true);
            SpanRegexQuery srq = new SpanRegexQuery(new Term("field", "aut.*"));
            SpanFirstQuery sfq = new SpanFirstQuery(srq, 1);
            // SpanNearQuery query = new SpanNearQuery(new SpanQuery[] {srq, stq}, 6,
            // true);
            int numHits = searcher.Search(sfq, null, 1000).TotalHits;
            Assert.AreEqual(1, numHits);
        }
Ejemplo n.º 6
0
        public override void WriteEndVersion(Process process, AbstractConnection input, Entity entity, bool force = false) {
            if (entity.Updates + entity.Inserts <= 0 && !force)
                return;

            var versionType = entity.Version == null ? "string" : entity.Version.SimpleType;
            var end = entity.End ?? new DefaultFactory(Logger).Convert(entity.End, versionType);

            using (var dir = LuceneDirectoryFactory.Create(this, TflBatchEntity(entity.ProcessName))) {
                using (var writer = new IndexWriter(dir, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED)) {
                    var doc = new Document();
                    doc.Add(new NumericField("id", global::Lucene.Net.Documents.Field.Store.YES, true).SetIntValue(entity.TflBatchId));
                    doc.Add(new global::Lucene.Net.Documents.Field("process", entity.ProcessName, global::Lucene.Net.Documents.Field.Store.YES, global::Lucene.Net.Documents.Field.Index.NOT_ANALYZED_NO_NORMS));
                    doc.Add(new global::Lucene.Net.Documents.Field("connection", input.Name, global::Lucene.Net.Documents.Field.Store.YES, global::Lucene.Net.Documents.Field.Index.NOT_ANALYZED_NO_NORMS));
                    doc.Add(new global::Lucene.Net.Documents.Field("entity", entity.Alias, global::Lucene.Net.Documents.Field.Store.YES, global::Lucene.Net.Documents.Field.Index.NOT_ANALYZED_NO_NORMS));
                    doc.Add(new NumericField("updates", global::Lucene.Net.Documents.Field.Store.YES, true).SetLongValue(entity.Updates));
                    doc.Add(new NumericField("inserts", global::Lucene.Net.Documents.Field.Store.YES, true).SetLongValue(entity.Inserts));
                    doc.Add(new NumericField("deletes", global::Lucene.Net.Documents.Field.Store.YES, true).SetLongValue(entity.Deletes));
                    doc.Add(LuceneWriter.CreateField("version", versionType, new SearchType { Analyzer = "keyword" }, end));
                    doc.Add(new global::Lucene.Net.Documents.Field("version_type", versionType, global::Lucene.Net.Documents.Field.Store.YES, global::Lucene.Net.Documents.Field.Index.NOT_ANALYZED_NO_NORMS));
                    doc.Add(new NumericField("tflupdate", global::Lucene.Net.Documents.Field.Store.YES, true).SetLongValue(DateTime.UtcNow.Ticks));
                    writer.AddDocument(doc);
                    writer.Commit();
                    writer.Optimize();
                }
            }
        }
Ejemplo n.º 7
0
        //END
        //this method creates document from an ObjectToIndex
        public void BuildIndex(FileToIndex file)
        {
            using (var analyzer = new Lucene.Net.Analysis.Ru.RussianAnalyzer(Version.LUCENE_30))
            {
                using (IndexWriter idxw = new IndexWriter(_directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    //check if document exists, if true deletes existing

                    var searchQuery = new TermQuery(new Term("Id", file.Id.ToString()));
                    idxw.DeleteDocuments(searchQuery);
                    //creation
                    Document doc = new Document();
                    doc.Add(new Field("Id", file.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//аналайзер разбивает строки на слова
                    doc.Add(new Field("Title", file.Title, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Description", file.Description, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Authors", file.Authors, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Text", file.Text, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Hashtags", file.Hashtags, Field.Store.YES, Field.Index.ANALYZED));
                    doc.Add(new Field("Discipline", file.Discipline, Field.Store.YES, Field.Index.ANALYZED));
                    //write the document to the index
                    idxw.AddDocument(doc);
                    //optimize and close the writer
                    idxw.Commit();

                    idxw.Optimize();

                }
            }
        }
Ejemplo n.º 8
0
        //删除全部索引
        public void delAllIndex()
        {
            if (System.IO.Directory.Exists(indexPath) == false)
            {
                System.IO.Directory.CreateDirectory(indexPath);
            }
            FSDirectory fsDirectory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());

            if (!IndexReader.IndexExists(fsDirectory))
            {
                return;
            }
            else
            {
                if (IndexReader.IsLocked(fsDirectory))
                {
                    IndexReader.Unlock(fsDirectory);
                }
            }
            Lucene.Net.Index.IndexWriter iw = new Lucene.Net.Index.IndexWriter(indexPath, new PanGuAnalyzer(), false);
            //  iw.DeleteDocuments(new Lucene.Net.Index.Term("Key", key));
            iw.DeleteAll();
            iw.Optimize();//删除文件后并非从磁盘中移除,而是生成一个.del的文件,需要调用Optimize方法来清除。在清除文件前可以使用UndeleteAll方法恢复
            iw.Close();
        }
        public void SetUp()
        {

            IndexWriter writer = new IndexWriter(store, new WhitespaceAnalyzer(), true);

            Document doc;

            doc = new Document();
            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("aaa", "foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "Tom", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "Jerry", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("zzz", "bar", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Optimize();
            writer.Close();
        }
        public void HelloWorldTest()
        {
            Directory directory = new RAMDirectory();
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
            IndexWriter writer = new IndexWriter(directory,
                analyzer,
                IndexWriter.MaxFieldLength.UNLIMITED);

            Document doc = new Document();
            doc.Add(new Field("id", "1", Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("postBody", "sample test", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);
            writer.Optimize();
            writer.Commit();
            writer.Close();

            QueryParser parser = new QueryParser(Version.LUCENE_29, "postBody", analyzer);
            Query query = parser.Parse("sample test");

            //Setup searcher
            IndexSearcher searcher = new IndexSearcher(directory, true);
            //Do the search
            var hits = searcher.Search(query, null, 10);

            for (int i = 0; i < hits.TotalHits; i++)
            {
                var doc1 = hits.ScoreDocs[i];
            }

            searcher.Close();
            directory.Close();
        }
Ejemplo n.º 11
0
        public static IndexWriter CreateIndex(Content[] contents)
        {
            var v = Lucene.Net.Util.Version.LUCENE_30;
            var l = Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED;
            var d = FSDirectory.Open(new DirectoryInfo(IndexPath));

            IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(v), l);

            try
            {
                foreach (var item in contents)
                {
                    Document doc = new Document();

                    Field id = new Field("id", item.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
                    Field title = new Field("title", item.Title, Field.Store.YES, Field.Index.ANALYZED);
                    Field username = new Field("username", item.User.UserName, Field.Store.YES, Field.Index.ANALYZED);
                    doc.Add(id);
                    doc.Add(title);
                    doc.Add(username);
                    writer.AddDocument(doc);
                }
                writer.Optimize();
                writer.Dispose();
            }
            catch (System.Exception ex)
            {

            }

            return writer;
        }
		public virtual void  TestTermEnum()
		{
			IndexWriter writer = null;
			
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
			
			// add 100 documents with term : aaa
			// add 100 documents with terms: aaa bbb
			// Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100
			for (int i = 0; i < 100; i++)
			{
				AddDoc(writer, "aaa");
				AddDoc(writer, "aaa bbb");
			}
			
			writer.Close();
			
			// verify document frequency of terms in an unoptimized index
			VerifyDocFreq();
			
			// merge segments by optimizing the index
			writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
			writer.Optimize();
			writer.Close();
			
			// verify document frequency of terms in an optimized index
			VerifyDocFreq();
		}
Ejemplo n.º 13
0
        public static void CreateIndexCreative(Creative entity, string IndexPath)
        {
            var document = new Document();
            document.Add(new Field("CreativeId", entity.Creativeid.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            document.Add(new Field("Title", entity.Title, Field.Store.YES, Field.Index.ANALYZED));
            if (!string.IsNullOrEmpty(entity.About))
            {
                document.Add(new Field("About", entity.About, Field.Store.YES, Field.Index.ANALYZED));
            }

            Directory directory = FSDirectory.Open(new DirectoryInfo(IndexPath));
              Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);

               var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

              try
              {

            writer.AddDocument(document);

            writer.Optimize();

            writer.Dispose();
              }
             finally
              {

              writer.Dispose();
              }
        }
Ejemplo n.º 14
0
 protected void Optimize()
 {
     using (var writer = new IndexWriter(LuceneDirectory, new StandardAnalyzer(Version.LUCENE_30), IndexWriter.MaxFieldLength.UNLIMITED))
     {
         writer.Optimize();
     }
 }
Ejemplo n.º 15
0
		public virtual void  TestSimpleSkip()
		{
			RAMDirectory dir = new RAMDirectory();
			IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			Term term = new Term("test", "a");
			for (int i = 0; i < 5000; i++)
			{
				Document d1 = new Document();
				d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED));
				writer.AddDocument(d1);
			}
			writer.Flush();
			writer.Optimize();
			writer.Close();
			
			IndexReader reader = SegmentReader.GetOnlySegmentReader(dir);
			SegmentTermPositions tp = (SegmentTermPositions) reader.TermPositions();
			tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit);
			
			for (int i = 0; i < 2; i++)
			{
				counter = 0;
				tp.Seek(term);
				
				CheckSkipTo(tp, 14, 185); // no skips
				CheckSkipTo(tp, 17, 190); // one skip on level 0
				CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0
				
				// this test would fail if we had only one skip level,
				// because than more bytes would be read from the freqStream
				CheckSkipTo(tp, 4800, 250); // one skip on level 2
			}
		}
Ejemplo n.º 16
0
 public virtual void  TestLucene()
 {
     
     int num = 100;
     
     Directory indexA = new MockRAMDirectory();
     Directory indexB = new MockRAMDirectory();
     
     FillIndex(indexA, 0, num);
     Assert.IsFalse(VerifyIndex(indexA, 0), "Index a is invalid");
     
     FillIndex(indexB, num, num);
     Assert.IsFalse(VerifyIndex(indexB, num), "Index b is invalid");
     
     Directory merged = new MockRAMDirectory();
     
     IndexWriter writer = new IndexWriter(merged, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
     writer.MergeFactor = 2;
     
     writer.AddIndexesNoOptimize(new []{indexA, indexB});
     writer.Optimize();
     writer.Close();
     
     var fail = VerifyIndex(merged, 0);
     merged.Close();
     
     Assert.IsFalse(fail, "The merged index is invalid");
 }
Ejemplo n.º 17
0
        public virtual void  TestBooleanQueryContainingSingleTermPrefixQuery()
        {
            // this tests against bug 33161 (now fixed)
            // In order to cause the bug, the outer query must have more than one term
            // and all terms required.
            // The contained PhraseMultiQuery must contain exactly one term array.

            RAMDirectory indexStore = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            Add("blueberry pie", writer);
            Add("blueberry chewing gum", writer);
            Add("blue raspberry pie", writer);
            writer.Optimize(null);
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(indexStore, true, null);
            // This query will be equivalent to +body:pie +body:"blue*"
            BooleanQuery q = new BooleanQuery();

            q.Add(new TermQuery(new Term("body", "pie")), Occur.MUST);

            MultiPhraseQuery trouble = new MultiPhraseQuery();

            trouble.Add(new Term[] { new Term("body", "blueberry"), new Term("body", "blue") });
            q.Add(trouble, Occur.MUST);

            // exception will be thrown here without fix
            ScoreDoc[] hits = searcher.Search(q, null, 1000, null).ScoreDocs;

            Assert.AreEqual(2, hits.Length, "Wrong number of hits");
            searcher.Close();
        }
Ejemplo n.º 18
0
        public override void  SetUp()
        {
            base.SetUp();


            System.String[] data = new System.String[] { "A 1 2 3 4 5 6", "Z       4 5 6", null, "B   2   4 5 6", "Y     3   5 6", null, "C     3     6", "X       4 5 6" };

            index = new RAMDirectory();
            IndexWriter writer = new IndexWriter(index, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            for (int i = 0; i < data.Length; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("id", System.Convert.ToString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); //Field.Keyword("id",String.valueOf(i)));
                doc.Add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED));                     //Field.Keyword("all","all"));
                if (null != data[i])
                {
                    doc.Add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED));                     //Field.Text("data",data[i]));
                }
                writer.AddDocument(doc, null);
            }

            writer.Optimize(null);
            writer.Close();

            r = IndexReader.Open(index, true, null);
            s = new IndexSearcher(r);

            //System.out.println("Set up " + getName());
        }
        public void CreateIndex(List<ISearchEntity> CreateEntities)
        {
            Analyzer analyzer = new StandardAnalyzer();
            IndexWriter writer = new IndexWriter(ConfigElement.IndexDirectory, analyzer, true);
            //第三个参数:是否重新创建索引,True 一律清空 重新建立 False 原有基础上增量添加索引

            foreach (ISearchEntity IndexEntity in CreateEntities)
            {
                ProductModel product = (ProductModel)IndexEntity;

                Document doc = new Document();

                doc.Add(new Field("productid", Convert.ToString(product.EntityIdentity), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("productname", Convert.ToString(product.ProductName), Field.Store.YES, Field.Index.TOKENIZED));
                doc.Add(new Field("cateid", Convert.ToString(product.CategoryID), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("catepath", Convert.ToString(product.CategoryPath), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("keywords", Convert.ToString(product.Keywords), Field.Store.YES, Field.Index.TOKENIZED));
                doc.Add(new Field("description", Convert.ToString(product.Description), Field.Store.YES, Field.Index.TOKENIZED));
                doc.Add(new Field("price", Convert.ToString(product.Price), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("createtime", Convert.ToString(product.CreateTime), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("updatetime", Convert.ToString(product.UpdateTime), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("mainimage", Convert.ToString(product.ProductImage), Field.Store.YES, Field.Index.UN_TOKENIZED));

                writer.AddDocument(doc);
                Console.WriteLine("created index for {0}:{1}", product.EntityIdentity, product.ProductName);
            }

            writer.Optimize();
            writer.Close();
        }
        public void CreateIndex(List<ISearchEntity> CreateEntities)
        {
            Analyzer analyzer = new StandardAnalyzer();
            IndexWriter writer = new IndexWriter(ConfigElement.IndexDirectory, analyzer, true);
            //第三个参数:是否重新创建索引,True 一律清空 重新建立 False 原有基础上增量添加索引

            foreach (ISearchEntity IndexEntity in CreateEntities)
            {
                NewsModel news = (NewsModel)IndexEntity;

                Document doc = new Document();

                doc.Add(new Field("newsid", Convert.ToString(news.EntityIdentity), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("title", Convert.ToString(news.Title), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("content", Convert.ToString(news.Content), Field.Store.YES, Field.Index.TOKENIZED));
                doc.Add(new Field("keywords", Convert.ToString(news.Keywords), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("catepath", Convert.ToString(news.CategoryPath), Field.Store.YES, Field.Index.UN_TOKENIZED));
                doc.Add(new Field("createtime", Convert.ToString(news.CreateTime), Field.Store.YES, Field.Index.UN_TOKENIZED));

                writer.AddDocument(doc);
                Console.WriteLine("created index for {0}:{1}", news.EntityIdentity, news.Title);
            }

            writer.Optimize();
            writer.Close();
        }
Ejemplo n.º 21
0
        public void IndexFile(string filePath)
        {
            PropertyDescriptors descriptors = new PropertyDescriptors();
            descriptors.LoadData(System.Windows.Forms.Application.StartupPath + "\\PropertyDescriptors.xml");
            Analyzer a = new Lucene.Net.Analysis.Standard.StandardAnalyzer();
            bool create = !(System.IO.Directory.Exists(_idxDir) && IndexReader.IndexExists(_idxDir));
            IndexWriter iw = new IndexWriter(_idxDir, a, create);
            iw.SetUseCompoundFile(true);

            AdDataStream adStream = new AdDataStream(filePath);
            adStream.LoadData();
            foreach (Advert ad in adStream.FetchAd())
            {
                Document doc = new Document();
                foreach (string s in ad.GetDictionary().Keys)
                {
                    string temp = descriptors.GetIndexableFormat(descriptors[s], ad[s]);
                    doc.Add(Field.Text(s, temp));

                }
                iw.AddDocument(doc);
                if (_updateCallback != null)
                {
                    _updateCallback("Added Document: " + ad["Title"]);

                }
            }
            iw.Optimize();
            iw.Close();
        }
Ejemplo n.º 22
0
        public virtual void  TestFarsi()
        {
            /* build an index */
            RAMDirectory farsiIndex = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null);
            Document     doc        = new Document();

            doc.Add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
            writer.AddDocument(doc, null);

            writer.Optimize(null);
            writer.Close();

            IndexReader   reader = IndexReader.Open((Directory)farsiIndex, true, null);
            IndexSearcher search = new IndexSearcher(reader);
            Query         q      = new TermQuery(new Term("body", "body"));

            // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
            // RuleBasedCollator.  However, the Arabic Locale seems to order the Farsi
            // characters properly.
            System.Globalization.CompareInfo collator = new System.Globalization.CultureInfo("ar").CompareInfo;

            // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
            // orders the U+0698 character before the U+0633 character, so the single
            // index Term below should NOT be returned by a TermRangeFilter with a Farsi
            // Collator (or an Arabic one for the case when Farsi is not supported).
            int numHits = search.Search(q, new TermRangeFilter("content", "\u062F", "\u0698", T, T, collator), 1000, null).TotalHits;

            Assert.AreEqual(0, numHits, "The index Term should not be included.");

            numHits = search.Search(q, new TermRangeFilter("content", "\u0633", "\u0638", T, T, collator), 1000, null).TotalHits;
            Assert.AreEqual(1, numHits, "The index Term should be included.");
            search.Close();
        }
Ejemplo n.º 23
0
        public virtual void  TestDanish()
        {
            /* build an index */
            RAMDirectory danishIndex = new RAMDirectory();
            IndexWriter  writer      = new IndexWriter(danishIndex, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null);

            // Danish collation orders the words below in the given order
            // (example taken from TestSort.testInternationalSort() ).
            System.String[] words = new System.String[] { "H\u00D8T", "H\u00C5T", "MAND" };
            for (int docnum = 0; docnum < words.Length; ++docnum)
            {
                Document doc = new Document();
                doc.Add(new Field("content", words[docnum], Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("body", "body", Field.Store.YES, Field.Index.NOT_ANALYZED));
                writer.AddDocument(doc, null);
            }
            writer.Optimize(null);
            writer.Close();

            IndexReader   reader = IndexReader.Open((Directory)danishIndex, true, null);
            IndexSearcher search = new IndexSearcher(reader);

            System.Globalization.CompareInfo c = new System.Globalization.CultureInfo("da" + "-" + "dk").CompareInfo;

            // Unicode order would not include "H\u00C5T" in [ "H\u00D8T", "MAND" ],
            // but Danish collation does.
            ScoreDoc[] result = search.Search(Csrq("content", "H\u00D8T", "MAND", F, F, c), null, 1000, null).ScoreDocs;
            AssertEquals("The index Term should be included.", 1, result.Length);

            result = search.Search(Csrq("content", "H\u00C5T", "MAND", F, F, c), null, 1000, null).ScoreDocs;
            AssertEquals("The index Term should not be included.", 0, result.Length);
            search.Close();
        }
Ejemplo n.º 24
0
        private void CreateIndex2()
        {
            IndexWriter iw = null;
            iw = new IndexWriter("D:\\lucene", anay, true);

            DataTable dt = SqlHelper2.QueryTable("select  a_id, b_name,u_nickname,a_title,a_content,b_id from v_article");

            foreach (DataRow dr in dt.Rows)
            {
                Document doc = new Document();
                string title = dr["a_title"].ToString();
                string content = dr["a_content"].ToString();
                string nickname = dr["u_nickname"].ToString();
                string bname = dr["b_name"].ToString();
                string bid = dr["b_id"].ToString();
                string aid = dr["a_id"].ToString();
                if (aid == "5938")
                {
                    doc.SetBoost(100);
                }
                doc.Add(Field.Keyword("title", title));
                doc.Add(Field.Keyword("content", content));
                doc.Add(Field.Keyword("nick", nickname));
                doc.Add(Field.Text("bname", bname));
                doc.Add(Field.Keyword("bid", bid));
                doc.Add(Field.Keyword("aid", aid));

                iw.AddDocument(doc);
            }
            iw.Optimize();
            iw.Close();
            Response.Write("<script>alert('建立索引完成!');</script>");
        }
Ejemplo n.º 25
0
        public void CreateIndex(Analyzer analayer) 
        {
            FSDirectory fsDir = new SimpleFSDirectory(new DirectoryInfo(_indexerFolder));
            IndexWriter indexWriter = new IndexWriter(fsDir, analayer, true, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);

            string[] files = System.IO.Directory.GetFiles(_textFilesFolder, Config.FileSearchPattern, SearchOption.AllDirectories);
            foreach (string file in files)
            {
                string name = new FileInfo(file).Name;
                string content = File.ReadAllText(file);

                Document doc = new Document();
                doc.Add(new Field(Config.Field_Path, file, Field.Store.YES, Field.Index.NOT_ANALYZED));
                doc.Add(new Field(Config.Field_Name, name, Field.Store.YES, Field.Index.ANALYZED));
                doc.Add(new Field(Config.Field_Content, content, Field.Store.NO, Field.Index.ANALYZED));

                indexWriter.AddDocument(doc);

                Console.WriteLine("{0} - {1}", file, name);
            }

            indexWriter.Optimize();
            indexWriter.Dispose();

            Console.WriteLine("File count: {0}", files.Length);
        }
Ejemplo n.º 26
0
 /// <summary>
 /// 创建索引文档
 /// </summary>
 /// <param name="dic"></param>
 public void AddLuceneIndex(Dictionary<string, string> dic) {
     //var analyzer = new StandardAnalyzer(Version.LUCENE_30);
     var analyzer = GetAnalyzer();
     using (var directory = GetLuceneDirectory())
     using (var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED)) {
         var doc = new Document();
         foreach (KeyValuePair<string, string> pair in dic) {
             // add new index entry
             //Field.Store.YES:表示是否存储原值。
             //只有当Field.Store.YES在后面才能用doc.Get("number")取出值来
             //Field.Index. NOT_ANALYZED:不进行分词保存
             //todo:boost
             if (NotAnalyzeFields.Exists(one => one == pair.Key)) {
                 doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.NOT_ANALYZED));
             }
             else {
                 doc.Add(new Field(pair.Key, pair.Value, Field.Store.YES, Field.Index.ANALYZED));
             }
         }
         //doc.Boost
         writer.AddDocument(doc);
         writer.Commit();
         writer.Optimize();
         analyzer.Close();
     }
 }
Ejemplo n.º 27
0
        static Analyzer analyzer = new MMSegAnalyzer(); //MMSegAnalyzer //StandardAnalyzer
        static void Main(string[] args)
        {

            string[] texts = new string[] { 
                "京华时报1月23日报道 昨天,受一股来自中西伯利亚的强冷空气影响,本市出现大风降温天气,白天最高气温只有零下7摄氏度,同时伴有6到7级的偏北风。",
                "【AppsFlyer:社交平台口碑营销效果最佳http://t.cn/zTHEQRM】社交网络分享应用的方式,在新应用获取用户非常有效率。搜索方式可为移动应用带来最高玩家质量,但玩家量和转化率较低。广告可带来最大用户量,但用户质量却相对不高,转化率也不够高。刺激性流量的转化率最高,但是平均玩家质量是最低",
                "Server Core省去了Windows Server的图形界面,改为命令行的方式来管理服务器。它不仅拥有更精简的体积与更优化的性能,还可缩短50%-60%的系统更新时间。现在,SQL Server已经支持Windows Server Core,计划内停机时间的大幅缩减让企业关键数据库应用获得更高的可用性。",
                "【各移动游戏分发渠道的优势和分成比例】联通沃商店:线下资源和话费支付能力(分成比例3:7),触控:技术和运营能力(分成比例5:5),91无线:评测、运营、数据等服务能力(分成比例4:6),UC:用户接入点、WebApp的支持(分成比例5:5),斯凯网络:硬件厂商资源(分成比例3:7)http://t.cn/zTHnwJk",
                "iPod之父创办的Nest收购家居能源数据监测服务MyEnergy,将从小小恒温器进展为家居节能整套方案 |Nest公司要做的并不只是一个小小温控器,而是提供智能家居节能整套方案。而MyEnergy积累的数据能对Nest起到很大帮助,并且也为Nest带来更多能源服务商方面的联系: http://t.cn/zTHs8qQ",
                "今日,58同城将正式与支付宝达成战略合作。这既是支付宝首次为阿里系外的企业提供担保支付服务,也是58同城推动消费者保障服务在支付和结算方面迈出的重要一步,此番合作将对整个行业产生颠覆性的影响。58要做的就是不断的了解用户痛点,不断的与虚假信息斗争,建立一个人人信赖的生活服务平台。",
                "【iPhone如何征服日本】虽然日本身为现代移动技术的摇篮,智能手机和触屏设备的普及也领先于其他地区,但iPhone仍然顺利地征服这个岛国,成为该国最畅销的手机。一方面得益于女性用户的追捧,但更多地,还是源自日本移动行业的内在问题。http://t.cn/zTHENrI",
                "【东方体育中心游泳馆今起重新开放,成人票20元/场】#爱体育#“立夏”过了,夏天近了,喜欢游泳的筒子心痒难耐了吧!@965365上海体育发布 说,经过一个多月的内装修,东方体育中心室内游泳馆今天起重新对外开放,开放时间为13:00-21:00,票价详情点大图了解~今夏挥洒汗水,“玉兰桥”走起!",
                "【深圳地铁免费伞 一年借出2000把归还70把】深圳地铁站摆放了“红雨伞”,下雨时可免费借给乘客使用。但一年来,地铁借给市民2000多把雨伞,只还回来70把,有的站甚至已经没有雨伞了。工作人员尝试联系部分借伞人,发现登记电话号码常常显示是空号……地铁站的红雨伞,你借了会还吗?(南方都市报)",
                "【银行的速度,移动互联网的速度】招商银行信用卡副总经理彭千在GMIC上分享招商银行移动互联网尝试案例:先后和开心和人人推出联名信用卡,但银行动作太慢了,推出是开心网已经不开心了,人人网已经没有人了!",
                "【黑石超级公关】4月21日的新闻联播上,黑石集团主席施瓦茨曼向清华大学捐赠1亿美元,并牵头筹集2亿美元,投资3亿美元与清华大学合作筹建“苏世民书院”的新闻被列为头条。很明显“未来中国不再是选修课,而是必修课。”1亿美元投资清华,背后是扭转坑中投形象的战略公关…",
                "【传谷歌将效仿苹果开设谷歌眼镜零售店】科技博客Business Insider今天援引消息人士说法称,谷歌正计划开设零售店,销售谷歌眼镜。谷歌门店或将专为眼镜产品服务,即只展示各类品牌、型号的“谷歌眼镜”产品。早前的消息指出,谷歌拟效仿苹果和微软,计划推出自主品牌的零售门店,以展示旗下各类产品。",
                "【武汉一高中禁止学生校内用手机 现场砸毁】近期在武昌东亭二路,一所学校收缴并砸毁学生手机24部,其中包括iPhone5等较昂贵的智能手机,也有价值数百元的普通手机,并设“手机尸体展示台”展出近期砸毁的部分手机残骸,均已经无法使用。",
                "【小偷慌不择路当街撒钱 警民携手完璧归赵】日前,一男子来到青浦一小作坊佯装购买商品,后借机溜进卧室行窃。老板娘在周围群众的帮助下将男子扭获,男子见势不妙,掏出一沓钞票当街抛撒。民警到达现场后,将男子抛撒的钱一一清点,共计6600元。警察蜀黍真心想为当天帮忙捡钱的群众竖起大拇指!",
                "#瓜大活动预告#【风起云涌南山下,群雄逐鹿辩工大】经过层层奋战,软件与微电子学院和理学院最终杀入了决赛,巅峰对决,即将展开。智慧的火花,头脑的竞技,唇舌的交战,精彩,一触即发。5月13日,周一晚七点,翱翔学生中心,我们与你不见不散!via人人网@西北工业大学_学生会",
                "#GMIC#尚伦律师事务所合伙人张明若律师:在中国,发生了很多起创业者因为法律意识淡薄,在天使投融资期间甚至没有签订法律文件的创业悲剧。这份文件的目的是帮助暂时还请不起律师的创业者。这份法律文件模板简单、对买卖双方公平、且免费!",
                "【金蝶随手记创始人谷风:先自我否定,再创新!】当创业者们把目光聚焦在娱乐、社交、手游、电商这些热门品类时,相信没有多少人会料到记账这一细分领域里也有产品能做到6000万级别的用户规模,堪称“屌丝逆袭”。http://t.cn/zTQvB16",
                "【陕西回应省纪委人员开车打架致死案:车辆是私家车 车主是纪委临时工】乾县青仁村发生斗殴,一死两伤,嫌犯开的显示单位为陕西省纪委的轿车引起质疑。陕西公安厅称,陕VHA088克莱斯勒轿车系嫌犯付某借用朋友的私家车。乾县公安局此前通报,陕VHA088车主是陕西省纪委临时工范小勇http://t.cn/zTQP5kC",
                "【经典干货!25个强大的PS炫光特效教程】这些经典的特效教程是很多教PS老师们的课堂案例,更被很多出版物摘录其中。那么今天毫无保留的全盘托出,同学们一定要好好练习。完成的同学也可以到优设群交作业哟,给大家分享你的设计过程和经验心得:)@尼拉贡戈小麦穗 →http://t.cn/zTHdOCK",
                "【树莓派的三个另类“武装”玩法】树莓派(Raspberry Pi)一直以来以极低的价格和“信用卡大小”的尺寸为人追捧。对于爱折腾的发烧友来说,永远可以在常人意想不到的地方发挥出自己折腾的功力。当一般人仅仅研究其编程玩法时,另一帮人已经琢磨着要把树莓派“武装”成另一个样子。http://t.cn/zTHFxIS",
                "【媒体札记:白宫信访办】@徐达内:19年前铊中毒的清华女生朱令的同情者,找到了“白宫请愿”这个易于操作又声势浩大的方法,要求美国将朱当年的室友孙维驱逐出境。随着意见领袖和各大媒体的加入,这一“跨国抗议” 的景观搅动了对官方公信力,冤假错案判断标准的全民讨论。http://t.cn/zTHsLIC",
                "【传第七大道页游海外月流水近1亿元http://t.cn/zTQPnnv】根据消息人士的透露,第七大道目前旗下网页游戏海外月流水收入已达近1亿元人民币,实质已是国内游戏公司海外收入第一,已超过大家所熟知的端游上市公司。孟治昀如是表示:“谁能告诉我,中国网游企业出口收入哪家公司高于第七大道?”",
                "【简介:他废掉了一切不服者】弗格森执教曼联26年,夺得13个英超冠军,4个联赛杯冠军,5个足总杯冠军,2个欧冠冠军,1个世俱杯冠军,1个优胜者杯冠军,1个欧洲超级杯。如果非要用一句话来总结他的伟大,小编个人的总结是:他废掉了一切敢于“不服者”,包括小贝同学",
                "这个世界干啥最赚钱?历史证明,持续保持对一个国家进行专制统治,通过无节制的赋税和滥发货币来掠夺全体国民的私人财富是唯一的标准答案。历史在进步,这种商业模式也在改头换面,于是,党专制替代家族专制,集体世袭权利代替个体世袭权力。既然改头换面,理论体系也得改变,这个理论体系就是特色论。",
                "【拥有“全球最美海滩”的塞舌尔将对中国游客免签!】#便民提示#准备出国白相的筒子冒个泡吧~你们有福啦。拥有“全球最美丽的海滩”和“最洁净的海水”美誉的塞舌尔,将可凭我国有效护照免签入境,最多停留30天这里还是英国威廉王子和王妃的蜜月地~~所以,别再只盯着马尔代夫一处啦",
                "【用数据告诉你手游有多热】今天,作为本届GMIC 的一部分,GGS全球移动游戏峰会召开。嘉宾和游戏开发者们探讨了移动游戏的现状与发展趋势。手游则是最为重要的一大关键词。盛大游戏总裁钱东海分享了日本最大手游公司CEO预测的数据:2015年全球游戏产业的格局中80%都是手机游戏。http://t.cn/zTHdkFY"
            };

            IndexWriter iw = new IndexWriter(FSDirectory.Open(INDEX_DIR), analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            int i = 0;
            foreach (string text in texts)
            {
                Document doc = new Document();
                doc.Add(new Field("body", text, Field.Store.YES, Field.Index.ANALYZED));
                iw.AddDocument(doc);
                Console.WriteLine("Indexed doc: {0}", text);
            }
            iw.Commit();
            iw.Optimize();
            iw.Dispose();

            Console.WriteLine();

            Console.WriteLine("Building index done!\r\n\r\n");

            while (true)
            {
                Console.Write("Enter the keyword: ");
                string keyword = Console.ReadLine();
                Search(keyword);
                Console.WriteLine();
            }

            //Console.ReadLine();
        }
Ejemplo n.º 28
0
 static void Main(string[] args)
 {
     IndexWriter writer = new IndexWriter("d:/index", new StandardAnalyzer(), true);
     AddDocument(1, "我是谁", writer);
     AddDocument(2, "来自哪里", writer);
     writer.Optimize();
     writer.Close();
 }
        public void StartEmailIndexing()
        {
            if (!Directory.Exists(GlobalData.EmailIndexPath))
                Directory.CreateDirectory(GlobalData.EmailIndexPath);

            IndexWriter index;

            PerFieldAnalyzerWrapper pfaw = new PerFieldAnalyzerWrapper(new KeywordAnalyzer());
            pfaw.AddAnalyzer("body", new StopAnalyzer());
            try
            {
                index = new IndexWriter(GlobalData.EmailIndexPath, pfaw, false);
            }
            catch
            {
                index = new IndexWriter(GlobalData.EmailIndexPath, pfaw, true);
            }

            const string PopServer = "pop.google.in";
            const int PopPort = 995;
            const string User = "******";
            const string Pass = "******";
            using (Pop3Client client = new Pop3Client(PopServer, PopPort, true, User, Pass))
            {
                client.Trace += new Action<string>(Console.WriteLine);
                //connects to Pop3 Server, Executes POP3 USER and PASS
                client.Authenticate();
                client.Stat();
                foreach (Pop3ListItem item in client.List())
                {
                    Document doc = new Document();
                    MailMessageEx message = client.RetrMailMessageEx(item);

                    doc.Add(new Field("subject", message.Subject.ToLower(), Field.Store.YES, Field.Index.NO_NORMS));
                    doc.Add(new Field("from", message.From.ToString().ToLower(), Field.Store.YES, Field.Index.NO_NORMS));
                    doc.Add(new Field("to", message.To.ToString().ToLower(), Field.Store.YES, Field.Index.NO_NORMS));
                    //doc.Add(new Field("date", message.DeliveryDate.ToLower(), Field.Store.YES, Field.Index.NO_NORMS));

                    string code = message.Body;
                    code = Regex.Replace(code, @"<\s*head\s*>(.|\n|\r)*?<\s*/\s*head\s*>", " ", RegexOptions.Compiled); //repalce <head> section with single whitespace
                    code = Regex.Replace(code, @"<\s*script (.|\n|\r)*?<\s*/\s*script\s*>", " ", RegexOptions.Compiled);//repalce remaining <script> tags from body with single whitespace
                    code = Regex.Replace(code, @"<!--(.|\n|\r)*?-->", " ", RegexOptions.Compiled);                      //repalce comments
                    code = Regex.Replace(code, @"<(.|\n|\r)*?>", " ", RegexOptions.Compiled);                           //repalce all tags with single whitespace
                    code = Regex.Replace(code, @"&.*?;", " ", RegexOptions.Compiled);                                   //replace &gt; e.t.c
                    code = Regex.Replace(code, @"\s+", " ", RegexOptions.Compiled);                                     //replace multiple whitespaces characters by single whitespace
                    code = Regex.Replace(code, @"\ufffd", " ", RegexOptions.Compiled);

                    doc.Add(new Field("body", code.ToLower(), Field.Store.YES, Field.Index.NO_NORMS));

                    index.AddDocument(doc);
                }
                client.Noop();
                client.Rset();
                client.Quit();
                index.Optimize();
                index.Close();
            }
        }
Ejemplo n.º 30
0
        public virtual void  TestPhrasePrefix()
        {
            RAMDirectory indexStore = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);
            Document     doc1       = new Document();
            Document     doc2       = new Document();
            Document     doc3       = new Document();
            Document     doc4       = new Document();
            Document     doc5       = new Document();

            doc1.Add(new Field("body", "blueberry pie", Field.Store.YES, Field.Index.ANALYZED));
            doc2.Add(new Field("body", "blueberry strudel", Field.Store.YES, Field.Index.ANALYZED));
            doc3.Add(new Field("body", "blueberry pizza", Field.Store.YES, Field.Index.ANALYZED));
            doc4.Add(new Field("body", "blueberry chewing gum", Field.Store.YES, Field.Index.ANALYZED));
            doc5.Add(new Field("body", "piccadilly circus", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc1, null);
            writer.AddDocument(doc2, null);
            writer.AddDocument(doc3, null);
            writer.AddDocument(doc4, null);
            writer.AddDocument(doc5, null);
            writer.Optimize(null);
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(indexStore, true, null);

            //PhrasePrefixQuery query1 = new PhrasePrefixQuery();
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            //PhrasePrefixQuery query2 = new PhrasePrefixQuery();
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList();
            IndexReader ir = IndexReader.Open((Directory)indexStore, true, null);

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            System.String prefix = "pi";
            TermEnum      te     = ir.Terms(new Term("body", prefix + "*"), null);

            do
            {
                if (te.Term.Text.StartsWith(prefix))
                {
                    termsWithPrefix.Add(te.Term);
                }
            }while (te.Next(null));

            query1.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));
            query2.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000, null).ScoreDocs;
            Assert.AreEqual(2, result.Length);

            result = searcher.Search(query2, null, 1000, null).ScoreDocs;
            Assert.AreEqual(0, result.Length);
        }
Ejemplo n.º 31
0
	    public virtual void Index() 
        {
            using (_writer = new IndexWriter(_index, this.Analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
            {
                MyDataHandler handler = new MyDataHandler(_writer);
                _digester.Digest(handler);
                _writer.Optimize();
            }
	    }	
Ejemplo n.º 32
0
 public void Optimize()
 {
     var analyzer = new StandardAnalyzer(Version.LUCENE_30);
     using (var writer = new IndexWriter(Directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED))
     {
         analyzer.Close();
         writer.Optimize();
     }
 }
Ejemplo n.º 33
0
        public void OptimizeIndex()
        {
            var directory = FSDirectory.Open(_path);

            using (var write = new IndexWriter(directory, _analyzer, !IndexReader.IndexExists(directory), IndexWriter.MaxFieldLength.UNLIMITED))
            {
                write.Optimize();
            }
        }
Ejemplo n.º 34
0
 public override void CreateIndex()
 {
     using (IndexWriter indexWriter = new IndexWriter(this.index, new UnidadeFisicaSearcher.InstancePerFieldAnalyzerWrapper().instancePerFieldAnalyzerWrapper, IndexWriter.MaxFieldLength.UNLIMITED))
     {
         GISAUtils.getAllUnidadesFisicasIds().ToList().ForEach(id => indexWriter.AddDocument(this.GetDocument(id)));
         indexWriter.Optimize();
         indexWriter.Dispose();
     }
 }
Ejemplo n.º 35
0
 public override void CreateIndex()
 {
     using(IndexWriter indexWriter = new IndexWriter(this.index, new SynonymAnalyzer(new XmlSynonymEngine()), IndexWriter.MaxFieldLength.UNLIMITED))
     {
         GISAUtils.getAllTipologiasIds().ToList().ForEach(id => indexWriter.AddDocument(this.GetDocument(id)));
         indexWriter.Optimize();
         indexWriter.Dispose();
     }
 }
Ejemplo n.º 36
0
 protected void btnaddindex_Click(object sender, EventArgs e)
 {
     try
     {
         if (string.IsNullOrEmpty(txttitle.Text))
         {
             txttitle.Focus();
             return;
         }
         if (string.IsNullOrEmpty(txturl.Text))
         {
             txturl.Focus();
             return;
         }
         if (string.IsNullOrEmpty(txtsite.Text))
         {
             txtsite.Focus();
             return;
         }
         if (string.IsNullOrEmpty(txtbody.Text))
         {
             txtbody.Focus();
             return;
         }
         if (string.IsNullOrEmpty(txtpublishtime.Text))
         {
             txtpublishtime.Focus();
             return;
         }
         if (string.IsNullOrEmpty(txtboost.Text))
         {
             txtboost.Focus();
             return;
         }
         try
         {
             int temp = Convert.ToInt32(txtboost.Text);
         }
         catch
         {
             txtboost.Focus();
             return;
         }
         StockFooAnalyzer analyzer = new StockFooAnalyzer(System.Configuration.ConfigurationManager.AppSettings["AnalyzerPath"].ToString());
         FSDirectory dy = FSDirectory.Open(new DirectoryInfo(System.Configuration.ConfigurationManager.AppSettings["IndexDirectory"].ToString()));
         IndexWriter writer = new IndexWriter(dy, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
         AddDocument(writer, txttitle.Text, txturl.Text, txtsite.Text, txtbody.Text.Length > 200 ? txtbody.Text.Substring(0, 200) : txtbody.Text, txtpublishtime.Text, Convert.ToInt32(txtboost.Text));
         writer.Optimize();
         writer.Close();
         Page.RegisterStartupScript("ok", "<script>alert('新建成功!');</script>");
     }
     catch (Exception ex)
     {
         Page.RegisterStartupScript("error","<script>alert('新建失败!"+ex.Message+"');</script>");
     }
 }
Ejemplo n.º 37
0
 public void Index(List<Field> fields)
 {
     var analyzer = new AccentedAnalyzer();
     var directory = FSDirectory.Open(System.IO.Directory.GetParent(IndexPath));
     var writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.LIMITED);
     AddDocument(writer, fields);
     writer.Optimize();
     writer.Commit();
     writer.Close();
 }
Ejemplo n.º 38
0
        public override void  SetUp()
        {
            base.SetUp();
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            for (int i = 0; i < docFields.Length; i++)
            {
                Document document = new Document();
                document.Add(new Field(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
                writer.AddDocument(document, null);
            }
            writer.Close();
            searcher = new IndexSearcher(directory, true, null);

            // Make big index
            dir2 = new MockRAMDirectory(directory);

            // First multiply small test index:
            mulFactor = 1;
            int docCount = 0;

            do
            {
                Directory   copy        = new RAMDirectory(dir2, null);
                IndexWriter indexWriter = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null);
                indexWriter.AddIndexesNoOptimize(null, new[] { copy });
                docCount = indexWriter.MaxDoc();
                indexWriter.Close();
                mulFactor *= 2;
            } while (docCount < 3000);

            IndexWriter w   = new IndexWriter(dir2, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null);
            Document    doc = new Document();

            doc.Add(new Field("field2", "xxx", Field.Store.NO, Field.Index.ANALYZED));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                w.AddDocument(doc, null);
            }
            doc = new Document();
            doc.Add(new Field("field2", "big bad bug", Field.Store.NO, Field.Index.ANALYZED));
            for (int i = 0; i < NUM_EXTRA_DOCS / 2; i++)
            {
                w.AddDocument(doc, null);
            }
            // optimize to 1 segment
            w.Optimize(null);
            reader = w.GetReader(null);
            w.Close();
            bigSearcher = new IndexSearcher(reader);
        }
Ejemplo n.º 39
0
        // test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0
        public void TestSparseIndex()
        {
            RAMDirectory dir    = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(dir, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED, null);

            for (int d = -20; d <= 20; d++)
            {
                Document doc = new Document();
                doc.Add(new Field("id", d.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
                doc.Add(new Field("body", "body", Field.Store.NO, Field.Index.NOT_ANALYZED));
                writer.AddDocument(doc, null);
            }

            writer.Optimize(null);
            writer.DeleteDocuments(null, new Term("id", "0"));
            writer.Close();

            IndexReader   reader = IndexReader.Open((Directory)dir, true, null);
            IndexSearcher Search = new IndexSearcher(reader);

            Assert.True(reader.HasDeletions);

            ScoreDoc[] result;
            Query      q = new TermQuery(new Term("body", "body"));

            FieldCacheRangeFilter <sbyte?> fcrf;

            result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 20, T, T), 100, null).ScoreDocs;
            Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable");
            Assert.AreEqual(40, result.Length, "find all");

            result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 0, 20, T, T), 100, null).ScoreDocs;
            Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable");
            Assert.AreEqual(20, result.Length, "find all");

            result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, 0, T, T), 100, null).ScoreDocs;
            Assert.False(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable");
            Assert.AreEqual(20, result.Length, "find all");

            result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", 10, 20, T, T), 100, null).ScoreDocs;
            Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable");
            Assert.AreEqual(11, result.Length, "find all");

            result = Search.Search(q, fcrf = FieldCacheRangeFilter.NewByteRange("id", -20, -10, T, T), 100, null).ScoreDocs;
            Assert.True(fcrf.GetDocIdSet(reader.GetSequentialSubReaders()[0], null).IsCacheable, "DocIdSet must be not cacheable");
            Assert.AreEqual(11, result.Length, "find all");
        }
Ejemplo n.º 40
0
        static TestNumericRangeQuery32()
        {
            {
                try
                {
                    // set the theoretical maximum term count for 8bit (see docs for the number)
                    BooleanQuery.MaxClauseCount = 3 * 255 * 2 + 255;

                    directory = new RAMDirectory();
                    IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED, null);

                    NumericField field8 = new NumericField("field8", 8, Field.Store.YES, true), field4 = new NumericField("field4", 4, Field.Store.YES, true), field2 = new NumericField("field2", 2, Field.Store.YES, true), fieldNoTrie = new NumericField("field" + System.Int32.MaxValue, System.Int32.MaxValue, Field.Store.YES, true), ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true), ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true), ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true);

                    Document doc = new Document();
                    // add fields, that have a distance to test general functionality
                    doc.Add(field8); doc.Add(field4); doc.Add(field2); doc.Add(fieldNoTrie);
                    // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
                    doc.Add(ascfield8); doc.Add(ascfield4); doc.Add(ascfield2);

                    // Add a series of noDocs docs with increasing int values
                    for (int l = 0; l < noDocs; l++)
                    {
                        int val = distance * l + startOffset;
                        field8.SetIntValue(val);
                        field4.SetIntValue(val);
                        field2.SetIntValue(val);
                        fieldNoTrie.SetIntValue(val);

                        val = l - (noDocs / 2);
                        ascfield8.SetIntValue(val);
                        ascfield4.SetIntValue(val);
                        ascfield2.SetIntValue(val);
                        writer.AddDocument(doc, null);
                    }

                    writer.Optimize(null);
                    writer.Close();
                    searcher = new IndexSearcher(directory, true, null);
                }
                catch (System.Exception e)
                {
                    throw new System.SystemException("", e);
                }
            }
        }
Ejemplo n.º 41
0
        public void Indexer_AfterReIndex(object sender, Businesslogic.ReIndexEventArgs e)
        {
            Businesslogic.Indexer        i  = (Businesslogic.Indexer)sender;
            Lucene.Net.Index.IndexWriter iw = i.ContentIndex(false);

            try
            {
                umbraco.BusinessLogic.Log.Add(umbraco.BusinessLogic.LogTypes.Debug, -1, "Re-indexing Video");

                string            url = "http://umbraco.org/FullVideoXmlFeed.aspx";
                XPathNodeIterator xni = umbraco.library.GetXmlDocumentByUrl(url, 3600).Current.Select("//item");

                while (xni.MoveNext())
                {
                    string content = umbraco.library.StripHtml(xni.Current.SelectSingleNode("./content").Value);
                    string name    = xni.Current.SelectSingleNode("./title").Value;
                    string image   = xni.Current.SelectSingleNode("./image").Value;
                    string id      = xni.Current.SelectSingleNode("./id").Value;
                    string link    = xni.Current.SelectSingleNode("./link").Value;

                    Hashtable fields = new Hashtable();

                    fields.Add("name", name);
                    fields.Add("content", content);
                    fields.Add("image", image);
                    fields.Add("id", id);
                    fields.Add("url", link);

                    i.AddToIndex("video_" + id.ToString(), "videos", fields, iw);

                    umbraco.BusinessLogic.Log.Add(umbraco.BusinessLogic.LogTypes.Debug, -1, "adding " + name + " video");
                }
            } catch (Exception ex) {
                umbraco.BusinessLogic.Log.Add(umbraco.BusinessLogic.LogTypes.Debug, -1, ex.ToString());
            }

            umbraco.BusinessLogic.Log.Add(umbraco.BusinessLogic.LogTypes.Debug, -1, "Re-indexing videos - DONE");


            iw.Optimize();
            iw.Close();
        }
Ejemplo n.º 42
0
        public virtual void  TestKeepLastNDeletionPolicyWithReader()
        {
            int N = 10;

            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);

                Directory   dir    = new RAMDirectory();
                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, policy, IndexWriter.MaxFieldLength.UNLIMITED, null);
                writer.UseCompoundFile = useCompoundFile;
                writer.Close();
                Term  searchTerm = new Term("content", "aaa");
                Query query      = new TermQuery(searchTerm);

                for (int i = 0; i < N + 1; i++)
                {
                    writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED, null);
                    writer.UseCompoundFile = useCompoundFile;
                    for (int j = 0; j < 17; j++)
                    {
                        AddDoc(writer);
                    }
                    // this is a commit
                    writer.Close();
                    IndexReader reader = IndexReader.Open(dir, policy, false, null);
                    reader.DeleteDocument(3 * i + 1, null);
                    reader.SetNorm(4 * i + 1, "content", 2.0F, null);
                    IndexSearcher searcher = new IndexSearcher(reader);
                    ScoreDoc[]    hits     = searcher.Search(query, null, 1000, null).ScoreDocs;
                    Assert.AreEqual(16 * (1 + i), hits.Length);
                    // this is a commit
                    reader.Close();
                    searcher.Close();
                }
                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED, null);
                writer.UseCompoundFile = useCompoundFile;
                writer.Optimize(null);
                // this is a commit
                writer.Close();

                Assert.AreEqual(2 * (N + 2), policy.numOnInit);
                Assert.AreEqual(2 * (N + 2) - 1, policy.numOnCommit);

                IndexSearcher searcher2 = new IndexSearcher(dir, false, null);
                ScoreDoc[]    hits2     = searcher2.Search(query, null, 1000, null).ScoreDocs;
                Assert.AreEqual(176, hits2.Length);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir, null);

                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN, null);
                int expectedCount = 176;

                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = IndexReader.Open(dir, true, null);

                        // Work backwards in commits on what the expected
                        // count should be.
                        searcher2 = new IndexSearcher(reader);
                        hits2     = searcher2.Search(query, null, 1000, null).ScoreDocs;
                        if (i > 1)
                        {
                            if (i % 2 == 0)
                            {
                                expectedCount += 1;
                            }
                            else
                            {
                                expectedCount -= 17;
                            }
                        }
                        Assert.AreEqual(expectedCount, hits2.Length);
                        searcher2.Close();
                        reader.Close();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits before last 5");
                        }
                    }
                    catch (System.IO.IOException e)
                    {
                        if (i != N)
                        {
                            throw e;
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen), null);
                    }
                    gen--;
                }

                dir.Close();
            }
        }
Ejemplo n.º 43
0
        // builds an index with payloads in the given Directory and performs
        // different tests to verify the payload encoding
        private void  PerformTest(Directory dir)
        {
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            // should be in sync with value in TermInfosWriter
            int skipInterval = 16;

            int numTerms = 5;

            System.String fieldName = "f1";

            int numDocs = skipInterval + 1;

            // create content for the test documents with just a few terms
            Term[] terms = GenerateTerms(fieldName, numTerms);
            System.Text.StringBuilder sb = new System.Text.StringBuilder();
            for (int i = 0; i < terms.Length; i++)
            {
                sb.Append(terms[i].Text);
                sb.Append(" ");
            }
            System.String content = sb.ToString();


            int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;

            byte[] payloadData = GenerateRandomData(payloadDataLength);

            Document d = new Document();

            d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED));
            // add the same document multiple times to have the same payload lengths for all
            // occurrences within two consecutive skip intervals
            int offset = 0;

            for (int i = 0; i < 2 * numDocs; i++)
            {
                analyzer.SetPayloadData(fieldName, payloadData, offset, 1);
                offset += numTerms;
                writer.AddDocument(d);
            }

            // make sure we create more than one segment to test merging
            writer.Commit();

            // now we make sure to have different payload lengths next at the next skip point
            for (int i = 0; i < numDocs; i++)
            {
                analyzer.SetPayloadData(fieldName, payloadData, offset, i);
                offset += i * numTerms;
                writer.AddDocument(d);
            }

            writer.Optimize();
            // flush
            writer.Close();


            /*
             * Verify the index
             * first we test if all payloads are stored correctly
             */
            IndexReader reader = IndexReader.Open(dir, true);

            byte[] verifyPayloadData = new byte[payloadDataLength];
            offset = 0;
            TermPositions[] tps = new TermPositions[numTerms];
            for (int i = 0; i < numTerms; i++)
            {
                tps[i] = reader.TermPositions(terms[i]);
            }

            while (tps[0].Next())
            {
                for (int i = 1; i < numTerms; i++)
                {
                    tps[i].Next();
                }
                int freq = tps[0].Freq;

                for (int i = 0; i < freq; i++)
                {
                    for (int j = 0; j < numTerms; j++)
                    {
                        tps[j].NextPosition();
                        tps[j].GetPayload(verifyPayloadData, offset);
                        offset += tps[j].PayloadLength;
                    }
                }
            }

            for (int i = 0; i < numTerms; i++)
            {
                tps[i].Close();
            }

            AssertByteArrayEquals(payloadData, verifyPayloadData);

            /*
             *  test lazy skipping
             */
            TermPositions tp = reader.TermPositions(terms[0]);

            tp.Next();
            tp.NextPosition();
            // now we don't read this payload
            tp.NextPosition();
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            byte[] payload = tp.GetPayload(null, 0);
            Assert.AreEqual(payload[0], payloadData[numTerms]);
            tp.NextPosition();

            // we don't read this payload and skip to a different document
            tp.SkipTo(5);
            tp.NextPosition();
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            payload = tp.GetPayload(null, 0);
            Assert.AreEqual(payload[0], payloadData[5 * numTerms]);


            /*
             * Test different lengths at skip points
             */
            tp.Seek(terms[1]);
            tp.Next();
            tp.NextPosition();
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(2 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(3 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.PayloadLength, "Wrong payload length.");

            /*
             * Test multiple call of getPayload()
             */
            tp.GetPayload(null, 0);

            // it is forbidden to call getPayload() more than once
            // without calling nextPosition()
            Assert.Throws <IOException>(() => tp.GetPayload(null, 0), "Expected exception not thrown");

            reader.Close();

            // test long payload
            analyzer = new PayloadAnalyzer();
            writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
            System.String singleTerm = "lucene";

            d = new Document();
            d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED));
            // add a payload whose length is greater than the buffer size of BufferedIndexOutput
            payloadData = GenerateRandomData(2000);
            analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
            writer.AddDocument(d);


            writer.Optimize();
            // flush
            writer.Close();

            reader = IndexReader.Open(dir, true);
            tp     = reader.TermPositions(new Term(fieldName, singleTerm));
            tp.Next();
            tp.NextPosition();

            verifyPayloadData = new byte[tp.PayloadLength];
            tp.GetPayload(verifyPayloadData, 0);
            byte[] portion = new byte[1500];
            Array.Copy(payloadData, 100, portion, 0, 1500);

            AssertByteArrayEquals(portion, verifyPayloadData);
            reader.Close();
        }
Ejemplo n.º 44
0
 public void Save()
 {
     writer.Optimize();
     writer.Commit();
 }
Ejemplo n.º 45
0
        public virtual void  TestBasic()
        {
            Directory   dir      = new MockRAMDirectory();
            Analyzer    analyzer = new StandardAnalyzer(Util.Version.LUCENE_CURRENT);
            IndexWriter writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);

            writer.MergeFactor = 2;
            writer.SetMaxBufferedDocs(2);
            writer.SetSimilarity(new SimpleSimilarity());


            System.Text.StringBuilder sb   = new System.Text.StringBuilder(265);
            System.String             term = "term";
            for (int i = 0; i < 30; i++)
            {
                Document d = new Document();
                sb.Append(term).Append(" ");
                System.String content = sb.ToString();
                Field         noTf    = new Field("noTf", content + (i % 2 == 0?"":" notf"), Field.Store.NO, Field.Index.ANALYZED);
                noTf.OmitTermFreqAndPositions = true;
                d.Add(noTf);

                Field tf = new Field("tf", content + (i % 2 == 0?" tf":""), Field.Store.NO, Field.Index.ANALYZED);
                d.Add(tf);

                writer.AddDocument(d);
                //System.out.println(d);
            }

            writer.Optimize();
            // flush
            writer.Close();
            _TestUtil.CheckIndex(dir);

            /*
             * Verify the index
             */
            Searcher searcher = new IndexSearcher(dir, true);

            searcher.Similarity = new SimpleSimilarity();

            Term      a  = new Term("noTf", term);
            Term      b  = new Term("tf", term);
            Term      c  = new Term("noTf", "notf");
            Term      d2 = new Term("tf", "tf");
            TermQuery q1 = new TermQuery(a);
            TermQuery q2 = new TermQuery(b);
            TermQuery q3 = new TermQuery(c);
            TermQuery q4 = new TermQuery(d2);


            searcher.Search(q1, new AnonymousClassCountingHitCollector(this));
            //System.out.println(CountingHitCollector.getCount());


            searcher.Search(q2, new AnonymousClassCountingHitCollector1(this));
            //System.out.println(CountingHitCollector.getCount());



            searcher.Search(q3, new AnonymousClassCountingHitCollector2(this));
            //System.out.println(CountingHitCollector.getCount());


            searcher.Search(q4, new AnonymousClassCountingHitCollector3(this));
            //System.out.println(CountingHitCollector.getCount());



            BooleanQuery bq = new BooleanQuery();

            bq.Add(q1, Occur.MUST);
            bq.Add(q4, Occur.MUST);

            searcher.Search(bq, new AnonymousClassCountingHitCollector4(this));
            Assert.IsTrue(15 == CountingHitCollector.GetCount());

            searcher.Close();
            dir.Close();
        }
Ejemplo n.º 46
0
        public virtual void  TestKeepAllDeletionPolicy()
        {
            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                // Never deletes a commit
                KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this);

                Directory dir = new RAMDirectory();
                policy.dir = dir;

                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, policy, IndexWriter.MaxFieldLength.UNLIMITED, null);
                writer.SetMaxBufferedDocs(10);
                writer.UseCompoundFile = useCompoundFile;
                writer.SetMergeScheduler(new SerialMergeScheduler(), null);
                for (int i = 0; i < 107; i++)
                {
                    AddDoc(writer);
                }
                writer.Close();

                writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.UNLIMITED, null);
                writer.UseCompoundFile = useCompoundFile;
                writer.Optimize(null);
                writer.Close();

                Assert.AreEqual(2, policy.numOnInit);

                // If we are not auto committing then there should
                // be exactly 2 commits (one per close above):
                Assert.AreEqual(2, policy.numOnCommit);

                // Test listCommits
                ICollection <IndexCommit> commits = IndexReader.ListCommits(dir, null);
                // 1 from opening writer + 2 from closing writer
                Assert.AreEqual(3, commits.Count);

                System.Collections.IEnumerator it = commits.GetEnumerator();
                // Make sure we can open a reader on each commit:
                while (it.MoveNext())
                {
                    IndexCommit commit = (IndexCommit)it.Current;
                    IndexReader r      = IndexReader.Open(commit, null, false, null);
                    r.Close();
                }

                // Simplistic check: just verify all segments_N's still
                // exist, and, I can open a reader on each:
                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN, null);
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir, null);
                while (gen > 0)
                {
                    IndexReader reader = IndexReader.Open(dir, true, null);
                    reader.Close();
                    dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen), null);
                    gen--;

                    if (gen > 0)
                    {
                        // Now that we've removed a commit point, which
                        // should have orphan'd at least one index file.
                        // Open & close a writer and assert that it
                        // actually removed something:
                        int preCount = dir.ListAll(null).Length;
                        writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, policy, IndexWriter.MaxFieldLength.LIMITED, null);
                        writer.Close();
                        int postCount = dir.ListAll(null).Length;
                        Assert.IsTrue(postCount < preCount);
                    }
                }

                dir.Close();
            }
        }
Ejemplo n.º 47
0
        public virtual void  TestSimpleCase()
        {
            // main directory
            Directory dir = new RAMDirectory();
            // two auxiliary directories
            Directory aux  = new RAMDirectory();
            Directory aux2 = new RAMDirectory();

            IndexWriter writer = null;

            writer = NewWriter(dir, true);
            // add 100 documents
            AddDocs(writer, 100);
            Assert.AreEqual(100, writer.MaxDoc());
            writer.Close();

            writer = NewWriter(aux, true);
            writer.UseCompoundFile = false;             // use one without a compound file
            // add 40 documents in separate files
            AddDocs(writer, 40);
            Assert.AreEqual(40, writer.MaxDoc());
            writer.Close();

            writer = NewWriter(aux2, true);
            // add 40 documents in compound files
            AddDocs2(writer, 50);
            Assert.AreEqual(50, writer.MaxDoc());
            writer.Close();

            // test doc count before segments are merged
            writer = NewWriter(dir, false);
            Assert.AreEqual(100, writer.MaxDoc());
            writer.AddIndexesNoOptimize(new Directory[] { aux, aux2 });
            Assert.AreEqual(190, writer.MaxDoc());
            writer.Close();

            // make sure the old index is correct
            VerifyNumDocs(aux, 40);

            // make sure the new index is correct
            VerifyNumDocs(dir, 190);

            // now add another set in.
            Directory aux3 = new RAMDirectory();

            writer = NewWriter(aux3, true);
            // add 40 documents
            AddDocs(writer, 40);
            Assert.AreEqual(40, writer.MaxDoc());
            writer.Close();

            // test doc count before segments are merged/index is optimized
            writer = NewWriter(dir, false);
            Assert.AreEqual(190, writer.MaxDoc());
            writer.AddIndexesNoOptimize(new Directory[] { aux3 });
            Assert.AreEqual(230, writer.MaxDoc());
            writer.Close();

            // make sure the new index is correct
            VerifyNumDocs(dir, 230);

            VerifyTermDocs(dir, new Term("content", "aaa"), 180);

            VerifyTermDocs(dir, new Term("content", "bbb"), 50);

            // now optimize it.
            writer = NewWriter(dir, false);
            writer.Optimize();
            writer.Close();

            // make sure the new index is correct
            VerifyNumDocs(dir, 230);

            VerifyTermDocs(dir, new Term("content", "aaa"), 180);

            VerifyTermDocs(dir, new Term("content", "bbb"), 50);

            // now add a single document
            Directory aux4 = new RAMDirectory();

            writer = NewWriter(aux4, true);
            AddDocs2(writer, 1);
            writer.Close();

            writer = NewWriter(dir, false);
            Assert.AreEqual(230, writer.MaxDoc());
            writer.AddIndexesNoOptimize(new Directory[] { aux4 });
            Assert.AreEqual(231, writer.MaxDoc());
            writer.Close();

            VerifyNumDocs(dir, 231);

            VerifyTermDocs(dir, new Term("content", "bbb"), 51);
        }
        public virtual void  TestNorms()
        {
            // tmp dir
            System.String tempDir = System.IO.Path.GetTempPath();
            if (tempDir == null)
            {
                throw new System.IO.IOException("java.io.tmpdir undefined, cannot run test");
            }

            // test with a single index: index1
            System.IO.FileInfo indexDir1 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex1"));
            Directory          dir1      = FSDirectory.Open(indexDir1);

            IndexWriter.Unlock(dir1);

            norms         = new System.Collections.ArrayList();
            modifiedNorms = new System.Collections.ArrayList();

            CreateIndex(dir1);
            DoTestNorms(dir1);

            // test with a single index: index2
            System.Collections.ArrayList norms1         = norms;
            System.Collections.ArrayList modifiedNorms1 = modifiedNorms;
            int numDocNorms1 = numDocNorms;

            norms         = new System.Collections.ArrayList();
            modifiedNorms = new System.Collections.ArrayList();
            numDocNorms   = 0;

            System.IO.FileInfo indexDir2 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex2"));
            Directory          dir2      = FSDirectory.Open(indexDir2);

            CreateIndex(dir2);
            DoTestNorms(dir2);

            // add index1 and index2 to a third index: index3
            System.IO.FileInfo indexDir3 = new System.IO.FileInfo(System.IO.Path.Combine(tempDir, "lucenetestindex3"));
            Directory          dir3      = FSDirectory.Open(indexDir3);

            CreateIndex(dir3);
            IndexWriter iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED);

            iw.SetMaxBufferedDocs(5);
            iw.SetMergeFactor(3);
            iw.AddIndexes(new Directory[] { dir1, dir2 });
            iw.Close();

            norms1.AddRange(norms);
            norms = norms1;
            modifiedNorms1.AddRange(modifiedNorms);
            modifiedNorms = modifiedNorms1;
            numDocNorms  += numDocNorms1;

            // test with index3
            VerifyIndex(dir3);
            DoTestNorms(dir3);

            // now with optimize
            iw = new IndexWriter(dir3, anlzr, false, IndexWriter.MaxFieldLength.LIMITED);
            iw.SetMaxBufferedDocs(5);
            iw.SetMergeFactor(3);
            iw.Optimize();
            iw.Close();
            VerifyIndex(dir3);

            dir1.Close();
            dir2.Close();
            dir3.Close();
        }
Ejemplo n.º 49
0
        public virtual void  TestPhrasePrefix()
        {
            RAMDirectory indexStore = new RAMDirectory();
            IndexWriter  writer     = new IndexWriter(indexStore, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null);

            Add("blueberry pie", writer);
            Add("blueberry strudel", writer);
            Add("blueberry pizza", writer);
            Add("blueberry chewing gum", writer);
            Add("bluebird pizza", writer);
            Add("bluebird foobar pizza", writer);
            Add("piccadilly circus", writer);
            writer.Optimize(null);
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(indexStore, true, null);

            // search for "blueberry pi*":
            MultiPhraseQuery query1 = new MultiPhraseQuery();
            // search for "strawberry pi*":
            MultiPhraseQuery query2 = new MultiPhraseQuery();

            query1.Add(new Term("body", "blueberry"));
            query2.Add(new Term("body", "strawberry"));

            System.Collections.ArrayList termsWithPrefix = new System.Collections.ArrayList();
            IndexReader ir = IndexReader.Open((Directory)indexStore, true, null);

            // this TermEnum gives "piccadilly", "pie" and "pizza".
            System.String prefix = "pi";
            TermEnum      te     = ir.Terms(new Term("body", prefix), null);

            do
            {
                if (te.Term.Text.StartsWith(prefix))
                {
                    termsWithPrefix.Add(te.Term);
                }
            }while (te.Next(null));

            query1.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));
            Assert.AreEqual("body:\"blueberry (piccadilly pie pizza)\"", query1.ToString());
            query2.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));
            Assert.AreEqual("body:\"strawberry (piccadilly pie pizza)\"", query2.ToString());

            ScoreDoc[] result;
            result = searcher.Search(query1, null, 1000, null).ScoreDocs;
            Assert.AreEqual(2, result.Length);
            result = searcher.Search(query2, null, 1000, null).ScoreDocs;
            Assert.AreEqual(0, result.Length);

            // search for "blue* pizza":
            MultiPhraseQuery query3 = new MultiPhraseQuery();

            termsWithPrefix.Clear();
            prefix = "blue";
            te     = ir.Terms(new Term("body", prefix), null);
            do
            {
                if (te.Term.Text.StartsWith(prefix))
                {
                    termsWithPrefix.Add(te.Term);
                }
            }while (te.Next(null));
            query3.Add((Term[])termsWithPrefix.ToArray(typeof(Term)));
            query3.Add(new Term("body", "pizza"));

            result = searcher.Search(query3, null, 1000, null).ScoreDocs;
            Assert.AreEqual(2, result.Length);             // blueberry pizza, bluebird pizza
            Assert.AreEqual("body:\"(blueberry bluebird) pizza\"", query3.ToString());

            // test slop:
            query3.Slop = 1;
            result      = searcher.Search(query3, null, 1000, null).ScoreDocs;
            Assert.AreEqual(3, result.Length);             // blueberry pizza, bluebird pizza, bluebird foobar pizza

            MultiPhraseQuery query4 = new MultiPhraseQuery();

            // okay, all terms must belong to the same field
            Assert.Throws <ArgumentException>(() =>
            {
                query4.Add(new Term("field1", "foo"));
                query4.Add(new Term("field2", "foobar"));
            });

            searcher.Close();
            indexStore.Close();
        }
Ejemplo n.º 50
0
        private void  DeleteReaderWriterConflict(bool optimize)
        {
            //Directory dir = new RAMDirectory();
            Directory dir = GetDirectory(true);

            Term searchTerm  = new Term("content", "aaa");
            Term searchTerm2 = new Term("content", "bbb");

            //  add 100 documents with term : aaa
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);

            for (int i = 0; i < 100; i++)
            {
                AddDoc(writer, searchTerm.Text());
            }
            writer.Close();

            // OPEN READER AT THIS POINT - this should fix the view of the
            // index at the point of having 100 "aaa" documents and 0 "bbb"
            IndexReader reader = IndexReader.Open(dir);

            Assert.AreEqual(100, reader.DocFreq(searchTerm), "first docFreq");
            Assert.AreEqual(0, reader.DocFreq(searchTerm2), "first docFreq");
            AssertTermDocsCount("first reader", reader, searchTerm, 100);
            AssertTermDocsCount("first reader", reader, searchTerm2, 0);

            // add 100 documents with term : bbb
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
            for (int i = 0; i < 100; i++)
            {
                AddDoc(writer, searchTerm2.Text());
            }

            // REQUEST OPTIMIZATION
            // This causes a new segment to become current for all subsequent
            // searchers. Because of this, deletions made via a previously open
            // reader, which would be applied to that reader's segment, are lost
            // for subsequent searchers/readers
            if (optimize)
            {
                writer.Optimize();
            }
            writer.Close();

            // The reader should not see the new data
            Assert.AreEqual(100, reader.DocFreq(searchTerm), "first docFreq");
            Assert.AreEqual(0, reader.DocFreq(searchTerm2), "first docFreq");
            AssertTermDocsCount("first reader", reader, searchTerm, 100);
            AssertTermDocsCount("first reader", reader, searchTerm2, 0);


            // DELETE DOCUMENTS CONTAINING TERM: aaa
            // NOTE: the reader was created when only "aaa" documents were in
            int deleted = 0;

            try
            {
                deleted = reader.Delete(searchTerm);
                Assert.Fail("Delete allowed on an index reader with stale segment information");
            }
            catch (System.IO.IOException e)
            {
                /* success */
            }

            // Re-open index reader and try again. This time it should see
            // the new data.
            reader.Close();
            reader = IndexReader.Open(dir);
            Assert.AreEqual(100, reader.DocFreq(searchTerm), "first docFreq");
            Assert.AreEqual(100, reader.DocFreq(searchTerm2), "first docFreq");
            AssertTermDocsCount("first reader", reader, searchTerm, 100);
            AssertTermDocsCount("first reader", reader, searchTerm2, 100);

            deleted = reader.Delete(searchTerm);
            Assert.AreEqual(100, deleted, "deleted count");
            Assert.AreEqual(100, reader.DocFreq(searchTerm), "deleted docFreq");
            Assert.AreEqual(100, reader.DocFreq(searchTerm2), "deleted docFreq");
            AssertTermDocsCount("deleted termDocs", reader, searchTerm, 0);
            AssertTermDocsCount("deleted termDocs", reader, searchTerm2, 100);
            reader.Close();

            // CREATE A NEW READER and re-test
            reader = IndexReader.Open(dir);
            Assert.AreEqual(100, reader.DocFreq(searchTerm), "deleted docFreq");
            Assert.AreEqual(100, reader.DocFreq(searchTerm2), "deleted docFreq");
            AssertTermDocsCount("deleted termDocs", reader, searchTerm, 0);
            AssertTermDocsCount("deleted termDocs", reader, searchTerm2, 100);
            reader.Close();
        }
Ejemplo n.º 51
0
        /* Open pre-lockless index, add docs, do a delete &
         * setNorm, and search */
        public virtual void  ChangeIndexWithAdds(System.String dirName)
        {
            System.String origDirName = dirName;
            dirName = FullDir(dirName);

            Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirName));

            // open writer
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);

            // add 10 docs
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer, 35 + i);
            }

            // make sure writer sees right total -- writer seems not to know about deletes in .del?
            int expected;

            if (Compare(origDirName, "24") < 0)
            {
                expected = 45;
            }
            else
            {
                expected = 46;
            }
            Assert.AreEqual(expected, writer.MaxDoc(), "wrong doc count");
            writer.Close();

            // make sure searching sees right # hits
            IndexSearcher searcher = new IndexSearcher(dir, true);

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Document   d    = searcher.Doc(hits[0].Doc);

            Assert.AreEqual("21", d.Get("id"), "wrong first document");
            TestHits(hits, 44, searcher.IndexReader);
            searcher.Close();

            // make sure we can do delete & setNorm against this
            // pre-lockless segment:
            IndexReader reader     = IndexReader.Open(dir, false);
            Term        searchTerm = new Term("id", "6");
            int         delCount   = reader.DeleteDocuments(searchTerm);

            Assert.AreEqual(1, delCount, "wrong delete count");
            reader.SetNorm(22, "content", (float)2.0);
            reader.Close();

            // make sure they "took":
            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(43, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            TestHits(hits, 43, searcher.IndexReader);
            searcher.Close();

            // optimize
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(dir, true);
            hits     = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;
            Assert.AreEqual(43, hits.Length, "wrong number of hits");
            d = searcher.Doc(hits[0].Doc);
            TestHits(hits, 43, searcher.IndexReader);
            Assert.AreEqual("22", d.Get("id"), "wrong first document");
            searcher.Close();

            dir.Close();
        }
Ejemplo n.º 52
0
        public virtual void  TestKeepLastNDeletionPolicy()
        {
            int N = 5;

            for (int pass = 0; pass < 2; pass++)
            {
                bool useCompoundFile = (pass % 2) != 0;

                Directory dir = new RAMDirectory();

                KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(this, N);

                for (int j = 0; j < N + 1; j++)
                {
                    IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, policy, IndexWriter.MaxFieldLength.UNLIMITED, null);
                    writer.SetMaxBufferedDocs(10);
                    writer.UseCompoundFile = useCompoundFile;
                    for (int i = 0; i < 17; i++)
                    {
                        AddDoc(writer);
                    }
                    writer.Optimize(null);
                    writer.Close();
                }

                Assert.IsTrue(policy.numDelete > 0);
                Assert.AreEqual(N + 1, policy.numOnInit);
                Assert.AreEqual(N + 1, policy.numOnCommit);

                // Simplistic check: just verify only the past N segments_N's still
                // exist, and, I can open a reader on each:
                dir.DeleteFile(IndexFileNames.SEGMENTS_GEN, null);
                long gen = SegmentInfos.GetCurrentSegmentGeneration(dir, null);
                for (int i = 0; i < N + 1; i++)
                {
                    try
                    {
                        IndexReader reader = IndexReader.Open(dir, true, null);
                        reader.Close();
                        if (i == N)
                        {
                            Assert.Fail("should have failed on commits prior to last " + N);
                        }
                    }
                    catch (System.IO.IOException e)
                    {
                        if (i != N)
                        {
                            throw e;
                        }
                    }
                    if (i < N)
                    {
                        dir.DeleteFile(IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen), null);
                    }
                    gen--;
                }

                dir.Close();
            }
        }
Ejemplo n.º 53
0
        public virtual void  TestEmptyIndex()
        {
            // creating two directories for indices
            Directory indexStoreA = new MockRAMDirectory();
            Directory indexStoreB = new MockRAMDirectory();

            // creating a document to store
            Document lDoc = new Document();

            lDoc.Add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED));
            lDoc.Add(new Field("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED));
            lDoc.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

            // creating a document to store
            Document lDoc2 = new Document();

            lDoc2.Add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.ANALYZED));
            lDoc2.Add(new Field("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED));
            lDoc2.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

            // creating a document to store
            Document lDoc3 = new Document();

            lDoc3.Add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.ANALYZED));
            lDoc3.Add(new Field("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED));
            lDoc3.Add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));

            // creating an index writer for the first index
            IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null);
            // creating an index writer for the second index, but writing nothing
            IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED, null);

            //--------------------------------------------------------------------
            // scenario 1
            //--------------------------------------------------------------------

            // writing the documents to the first index
            writerA.AddDocument(lDoc, null);
            writerA.AddDocument(lDoc2, null);
            writerA.AddDocument(lDoc3, null);
            writerA.Optimize(null);
            writerA.Close();

            // closing the second index
            writerB.Close();

            // creating the query
            QueryParser parser = new QueryParser(Util.Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(Util.Version.LUCENE_CURRENT));
            Query       query  = parser.Parse("handle:1");

            // building the searchables
            Searcher[] searchers = new Searcher[2];
            // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
            searchers[0] = new IndexSearcher(indexStoreB, true, null);
            searchers[1] = new IndexSearcher(indexStoreA, true, null);
            // creating the multiSearcher
            Searcher mSearcher = GetMultiSearcherInstance(searchers);

            // performing the search
            ScoreDoc[] hits = mSearcher.Search(query, null, 1000, null).ScoreDocs;

            Assert.AreEqual(3, hits.Length);

            // iterating over the hit documents
            for (int i = 0; i < hits.Length; i++)
            {
                mSearcher.Doc(hits[i].Doc, null);
            }
            mSearcher.Close();


            //--------------------------------------------------------------------
            // scenario 2
            //--------------------------------------------------------------------

            // adding one document to the empty index
            writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED, null);
            writerB.AddDocument(lDoc, null);
            writerB.Optimize(null);
            writerB.Close();

            // building the searchables
            Searcher[] searchers2 = new Searcher[2];
            // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index
            searchers2[0] = new IndexSearcher(indexStoreB, true, null);
            searchers2[1] = new IndexSearcher(indexStoreA, true, null);
            // creating the mulitSearcher
            MultiSearcher mSearcher2 = GetMultiSearcherInstance(searchers2);

            // performing the same search
            ScoreDoc[] hits2 = mSearcher2.Search(query, null, 1000, null).ScoreDocs;

            Assert.AreEqual(4, hits2.Length);

            // iterating over the hit documents
            for (int i = 0; i < hits2.Length; i++)
            {
                // no exception should happen at this point
                mSearcher2.Doc(hits2[i].Doc, null);
            }

            // test the subSearcher() method:
            Query subSearcherQuery = parser.Parse("id:doc1");

            hits2 = mSearcher2.Search(subSearcherQuery, null, 1000, null).ScoreDocs;
            Assert.AreEqual(2, hits2.Length);
            Assert.AreEqual(0, mSearcher2.SubSearcher(hits2[0].Doc));             // hit from searchers2[0]
            Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[1].Doc));             // hit from searchers2[1]
            subSearcherQuery = parser.Parse("id:doc2");
            hits2            = mSearcher2.Search(subSearcherQuery, null, 1000, null).ScoreDocs;
            Assert.AreEqual(1, hits2.Length);
            Assert.AreEqual(1, mSearcher2.SubSearcher(hits2[0].Doc));             // hit from searchers2[1]
            mSearcher2.Close();

            //--------------------------------------------------------------------
            // scenario 3
            //--------------------------------------------------------------------

            // deleting the document just added, this will cause a different exception to take place
            Term        term    = new Term("id", "doc1");
            IndexReader readerB = IndexReader.Open(indexStoreB, false, null);

            readerB.DeleteDocuments(term, null);
            readerB.Close();

            // optimizing the index with the writer
            writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(Util.Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.LIMITED, null);
            writerB.Optimize(null);
            writerB.Close();

            // building the searchables
            Searcher[] searchers3 = new Searcher[2];

            searchers3[0] = new IndexSearcher(indexStoreB, true, null);
            searchers3[1] = new IndexSearcher(indexStoreA, true, null);
            // creating the mulitSearcher
            Searcher mSearcher3 = GetMultiSearcherInstance(searchers3);

            // performing the same search
            ScoreDoc[] hits3 = mSearcher3.Search(query, null, 1000, null).ScoreDocs;

            Assert.AreEqual(3, hits3.Length);

            // iterating over the hit documents
            for (int i = 0; i < hits3.Length; i++)
            {
                mSearcher3.Doc(hits3[i].Doc, null);
            }
            mSearcher3.Close();
            indexStoreA.Close();
            indexStoreB.Close();
        }
Ejemplo n.º 54
0
 public void DisposeWriter()
 {
     Writer.Optimize();
     //Writer.Commit();
     Writer.Dispose();
 }
Ejemplo n.º 55
0
        public virtual void  TestSkipTo()
        {
            try
            {
                Directory   dir    = new RAMDirectory();
                IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);

                Term ta = new Term("content", "aaa");
                for (int i = 0; i < 10; i++)
                {
                    AddDoc(writer, "aaa aaa aaa aaa");
                }

                Term tb = new Term("content", "bbb");
                for (int i = 0; i < 16; i++)
                {
                    AddDoc(writer, "bbb bbb bbb bbb");
                }

                Term tc = new Term("content", "ccc");
                for (int i = 0; i < 50; i++)
                {
                    AddDoc(writer, "ccc ccc ccc ccc");
                }

                // assure that we deal with a single segment
                writer.Optimize();
                writer.Close();

                IndexReader reader = IndexReader.Open(dir);
                TermDocs    tdocs  = reader.TermDocs();

                // without optimization (assumption skipInterval == 16)

                // with next
                tdocs.Seek(ta);
                Assert.IsTrue(tdocs.Next());
                Assert.AreEqual(0, tdocs.Doc());
                Assert.AreEqual(4, tdocs.Freq());
                Assert.IsTrue(tdocs.Next());
                Assert.AreEqual(1, tdocs.Doc());
                Assert.AreEqual(4, tdocs.Freq());
                Assert.IsTrue(tdocs.SkipTo(0));
                Assert.AreEqual(2, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(4));
                Assert.AreEqual(4, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(9));
                Assert.AreEqual(9, tdocs.Doc());
                Assert.IsFalse(tdocs.SkipTo(10));

                // without next
                tdocs.Seek(ta);
                Assert.IsTrue(tdocs.SkipTo(0));
                Assert.AreEqual(0, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(4));
                Assert.AreEqual(4, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(9));
                Assert.AreEqual(9, tdocs.Doc());
                Assert.IsFalse(tdocs.SkipTo(10));

                // exactly skipInterval documents and therefore with optimization

                // with next
                tdocs.Seek(tb);
                Assert.IsTrue(tdocs.Next());
                Assert.AreEqual(10, tdocs.Doc());
                Assert.AreEqual(4, tdocs.Freq());
                Assert.IsTrue(tdocs.Next());
                Assert.AreEqual(11, tdocs.Doc());
                Assert.AreEqual(4, tdocs.Freq());
                Assert.IsTrue(tdocs.SkipTo(5));
                Assert.AreEqual(12, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(15));
                Assert.AreEqual(15, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(24));
                Assert.AreEqual(24, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(25));
                Assert.AreEqual(25, tdocs.Doc());
                Assert.IsFalse(tdocs.SkipTo(26));

                // without next
                tdocs.Seek(tb);
                Assert.IsTrue(tdocs.SkipTo(5));
                Assert.AreEqual(10, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(15));
                Assert.AreEqual(15, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(24));
                Assert.AreEqual(24, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(25));
                Assert.AreEqual(25, tdocs.Doc());
                Assert.IsFalse(tdocs.SkipTo(26));

                // much more than skipInterval documents and therefore with optimization

                // with next
                tdocs.Seek(tc);
                Assert.IsTrue(tdocs.Next());
                Assert.AreEqual(26, tdocs.Doc());
                Assert.AreEqual(4, tdocs.Freq());
                Assert.IsTrue(tdocs.Next());
                Assert.AreEqual(27, tdocs.Doc());
                Assert.AreEqual(4, tdocs.Freq());
                Assert.IsTrue(tdocs.SkipTo(5));
                Assert.AreEqual(28, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(40));
                Assert.AreEqual(40, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(57));
                Assert.AreEqual(57, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(74));
                Assert.AreEqual(74, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(75));
                Assert.AreEqual(75, tdocs.Doc());
                Assert.IsFalse(tdocs.SkipTo(76));

                //without next
                tdocs.Seek(tc);
                Assert.IsTrue(tdocs.SkipTo(5));
                Assert.AreEqual(26, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(40));
                Assert.AreEqual(40, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(57));
                Assert.AreEqual(57, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(74));
                Assert.AreEqual(74, tdocs.Doc());
                Assert.IsTrue(tdocs.SkipTo(75));
                Assert.AreEqual(75, tdocs.Doc());
                Assert.IsFalse(tdocs.SkipTo(76));

                tdocs.Close();
                reader.Close();
                dir.Close();
            }
            catch (System.IO.IOException e)
            {
                Assert.IsTrue(false);
            }
        }
Ejemplo n.º 56
0
 public void CleanUp() // Cleans up indexer
 {
     writer.Optimize();
     writer.Flush(true, true, true);
     writer.Dispose();
 }
Ejemplo n.º 57
0
        private void  DeleteReaderReaderConflict(bool optimize)
        {
            Directory dir = GetDirectory(true);

            Term searchTerm1 = new Term("content", "aaa");
            Term searchTerm2 = new Term("content", "bbb");
            Term searchTerm3 = new Term("content", "ccc");

            //  add 100 documents with term : aaa
            //  add 100 documents with term : bbb
            //  add 100 documents with term : ccc
            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);

            for (int i = 0; i < 100; i++)
            {
                AddDoc(writer, searchTerm1.Text());
                AddDoc(writer, searchTerm2.Text());
                AddDoc(writer, searchTerm3.Text());
            }
            if (optimize)
            {
                writer.Optimize();
            }
            writer.Close();

            // OPEN TWO READERS
            // Both readers get segment info as exists at this time
            IndexReader reader1 = IndexReader.Open(dir);

            Assert.AreEqual(100, reader1.DocFreq(searchTerm1), "first opened");
            Assert.AreEqual(100, reader1.DocFreq(searchTerm2), "first opened");
            Assert.AreEqual(100, reader1.DocFreq(searchTerm3), "first opened");
            AssertTermDocsCount("first opened", reader1, searchTerm1, 100);
            AssertTermDocsCount("first opened", reader1, searchTerm2, 100);
            AssertTermDocsCount("first opened", reader1, searchTerm3, 100);

            IndexReader reader2 = IndexReader.Open(dir);

            Assert.AreEqual(100, reader2.DocFreq(searchTerm1), "first opened");
            Assert.AreEqual(100, reader2.DocFreq(searchTerm2), "first opened");
            Assert.AreEqual(100, reader2.DocFreq(searchTerm3), "first opened");
            AssertTermDocsCount("first opened", reader2, searchTerm1, 100);
            AssertTermDocsCount("first opened", reader2, searchTerm2, 100);
            AssertTermDocsCount("first opened", reader2, searchTerm3, 100);

            // DELETE DOCS FROM READER 2 and CLOSE IT
            // delete documents containing term: aaa
            // when the reader is closed, the segment info is updated and
            // the first reader is now stale
            reader2.Delete(searchTerm1);
            Assert.AreEqual(100, reader2.DocFreq(searchTerm1), "after delete 1");
            Assert.AreEqual(100, reader2.DocFreq(searchTerm2), "after delete 1");
            Assert.AreEqual(100, reader2.DocFreq(searchTerm3), "after delete 1");
            AssertTermDocsCount("after delete 1", reader2, searchTerm1, 0);
            AssertTermDocsCount("after delete 1", reader2, searchTerm2, 100);
            AssertTermDocsCount("after delete 1", reader2, searchTerm3, 100);
            reader2.Close();

            // Make sure reader 1 is unchanged since it was open earlier
            Assert.AreEqual(100, reader1.DocFreq(searchTerm1), "after delete 1");
            Assert.AreEqual(100, reader1.DocFreq(searchTerm2), "after delete 1");
            Assert.AreEqual(100, reader1.DocFreq(searchTerm3), "after delete 1");
            AssertTermDocsCount("after delete 1", reader1, searchTerm1, 100);
            AssertTermDocsCount("after delete 1", reader1, searchTerm2, 100);
            AssertTermDocsCount("after delete 1", reader1, searchTerm3, 100);


            // ATTEMPT TO DELETE FROM STALE READER
            // delete documents containing term: bbb
            try
            {
                reader1.Delete(searchTerm2);
                Assert.Fail("Delete allowed from a stale index reader");
            }
            catch (System.IO.IOException e)
            {
                /* success */
            }

            // RECREATE READER AND TRY AGAIN
            reader1.Close();
            reader1 = IndexReader.Open(dir);
            Assert.AreEqual(100, reader1.DocFreq(searchTerm1), "reopened");
            Assert.AreEqual(100, reader1.DocFreq(searchTerm2), "reopened");
            Assert.AreEqual(100, reader1.DocFreq(searchTerm3), "reopened");
            AssertTermDocsCount("reopened", reader1, searchTerm1, 0);
            AssertTermDocsCount("reopened", reader1, searchTerm2, 100);
            AssertTermDocsCount("reopened", reader1, searchTerm3, 100);

            reader1.Delete(searchTerm2);
            Assert.AreEqual(100, reader1.DocFreq(searchTerm1), "deleted 2");
            Assert.AreEqual(100, reader1.DocFreq(searchTerm2), "deleted 2");
            Assert.AreEqual(100, reader1.DocFreq(searchTerm3), "deleted 2");
            AssertTermDocsCount("deleted 2", reader1, searchTerm1, 0);
            AssertTermDocsCount("deleted 2", reader1, searchTerm2, 0);
            AssertTermDocsCount("deleted 2", reader1, searchTerm3, 100);
            reader1.Close();

            // Open another reader to confirm that everything is deleted
            reader2 = IndexReader.Open(dir);
            Assert.AreEqual(100, reader2.DocFreq(searchTerm1), "reopened 2");
            Assert.AreEqual(100, reader2.DocFreq(searchTerm2), "reopened 2");
            Assert.AreEqual(100, reader2.DocFreq(searchTerm3), "reopened 2");
            AssertTermDocsCount("reopened 2", reader2, searchTerm1, 0);
            AssertTermDocsCount("reopened 2", reader2, searchTerm2, 0);
            AssertTermDocsCount("reopened 2", reader2, searchTerm3, 100);
            reader2.Close();

            dir.Close();
        }
Ejemplo n.º 58
0
        public virtual void  TestOpenPriorSnapshot()
        {
            // Never deletes a commit
            KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy(this);

            Directory dir = new MockRAMDirectory();

            policy.dir = dir;

            IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, null);

            writer.SetMaxBufferedDocs(2);
            for (int i = 0; i < 10; i++)
            {
                AddDoc(writer);
                if ((1 + i) % 2 == 0)
                {
                    writer.Commit(null);
                }
            }
            writer.Close();

            ICollection <IndexCommit> commits = IndexReader.ListCommits(dir, null);

            Assert.AreEqual(6, commits.Count);
            IndexCommit lastCommit = null;

            System.Collections.IEnumerator it = commits.GetEnumerator();
            while (it.MoveNext())
            {
                IndexCommit commit = (IndexCommit)it.Current;
                if (lastCommit == null || commit.Generation > lastCommit.Generation)
                {
                    lastCommit = commit;
                }
            }
            Assert.IsTrue(lastCommit != null);

            // Now add 1 doc and optimize
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, null);
            AddDoc(writer);
            Assert.AreEqual(11, writer.NumDocs(null));
            writer.Optimize(null);
            writer.Close();

            Assert.AreEqual(7, IndexReader.ListCommits(dir, null).Count);

            // Now open writer on the commit just before optimize:
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit, null);
            Assert.AreEqual(10, writer.NumDocs(null));

            // Should undo our rollback:
            writer.Rollback(null);

            IndexReader r = IndexReader.Open(dir, true, null);

            // Still optimized, still 11 docs
            Assert.IsTrue(r.IsOptimized());
            Assert.AreEqual(11, r.NumDocs());
            r.Close();

            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, lastCommit, null);
            Assert.AreEqual(10, writer.NumDocs(null));
            // Commits the rollback:
            writer.Close();

            // Now 8 because we made another commit
            Assert.AreEqual(8, IndexReader.ListCommits(dir, null).Count);

            r = IndexReader.Open(dir, true, null);
            // Not optimized because we rolled it back, and now only
            // 10 docs
            Assert.IsTrue(!r.IsOptimized());
            Assert.AreEqual(10, r.NumDocs());
            r.Close();

            // Reoptimize
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), policy, IndexWriter.MaxFieldLength.LIMITED, null);
            writer.Optimize(null);
            writer.Close();

            r = IndexReader.Open(dir, true, null);
            Assert.IsTrue(r.IsOptimized());
            Assert.AreEqual(10, r.NumDocs());
            r.Close();

            // Now open writer on the commit just before optimize,
            // but this time keeping only the last commit:
            writer = new IndexWriter(dir, new WhitespaceAnalyzer(), new KeepOnlyLastCommitDeletionPolicy(), IndexWriter.MaxFieldLength.LIMITED, lastCommit, null);
            Assert.AreEqual(10, writer.NumDocs(null));

            // Reader still sees optimized index, because writer
            // opened on the prior commit has not yet committed:
            r = IndexReader.Open(dir, true, null);
            Assert.IsTrue(r.IsOptimized());
            Assert.AreEqual(10, r.NumDocs());
            r.Close();

            writer.Close();

            // Now reader sees unoptimized index:
            r = IndexReader.Open(dir, true, null);
            Assert.IsTrue(!r.IsOptimized());
            Assert.AreEqual(10, r.NumDocs());
            r.Close();

            dir.Close();
        }
Ejemplo n.º 59
0
 /// <summary>
 /// Flushes the buffer and closes the index
 /// </summary>
 public void CleanUpIndexer()
 {
     writer.Optimize();
     writer.Flush(true, true, true);
     writer.Dispose();
 }
Ejemplo n.º 60
0
 /// helper funciton for CreateIndex()
 private void CleanUpIndex()
 {
     writer.Optimize();
     writer.Flush(true, true, true);
     writer.Dispose();
 }