Пример #1
0
        public virtual void  TestMulipleTerms()
        {
            query.SetSlop(2);
            query.Add(new Term("field", "one"));
            query.Add(new Term("field", "three"));
            query.Add(new Term("field", "five"));
            ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length, "two total moves");
            QueryUtils.Check(query, searcher);


            query = new PhraseQuery();
            query.SetSlop(5);             // it takes six moves to match this phrase
            query.Add(new Term("field", "five"));
            query.Add(new Term("field", "three"));
            query.Add(new Term("field", "one"));
            hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length, "slop of 5 not close enough");
            QueryUtils.Check(query, searcher);


            query.SetSlop(6);
            hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length, "slop of 6 just right");
            QueryUtils.Check(query, searcher);
        }
Пример #2
0
        public virtual void  TestNotCloseEnough()
        {
            query.SetSlop(2);
            query.Add(new Term("Field", "one"));
            query.Add(new Term("Field", "five"));
            Hits hits = searcher.Search(query);

            Assert.AreEqual(0, hits.Length());
        }
        public void ProcessRequest(HttpContext context)
        {
            context.Response.ContentType = "text/plain";
            string searchKey = context.Request["wd"];

            string indexPath = context.Server.MapPath("../IndexData");
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();
            //把用户输入的关键字进行分词
            foreach (string word in Picture.Utility.SplitContent.SplitWords(searchKey))
            {
                query.Add(new Term("tag", word));
            }
            //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系
            query.SetSlop(100); //指定关键词相隔最大距离

            //TopScoreDocCollector盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果
            ScoreDoc[] docs = collector.TopDocs(0,10).scoreDocs;

            //展示数据实体对象集合
            var tagModels = new List<Picture.Model.TagModel>();
            for (int i = 0; i < docs.Length; i++)
            {
                int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id)
                Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document

                Picture.Model.TagModel tag = new Picture.Model.TagModel();
                //picture.ImgSummary = doc.Get("summary");
                tag.TagName= Picture.Utility.SplitContent.HightLight(searchKey, doc.Get("tag"));
                //book.ContentDescription = doc.Get("content");//未使用高亮
                //搜索关键字高亮显示 使用盘古提供高亮插件
                //book.ContentDescription = Picture.Utility.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content"));
                tag.TId = Convert.ToInt32(doc.Get("id"));
                tagModels.Add(tag);
            }

            SearchPreviewResult result = new SearchPreviewResult()
            {
                q=searchKey,
                p=false
            };

            foreach (var item in tagModels)
            {
                result.s.Add(item.TagName);
            }

            System.Web.Script.Serialization.JavaScriptSerializer jss = new System.Web.Script.Serialization.JavaScriptSerializer();

            context.Response.Write(jss.Serialize(result));
        }
Пример #4
0
        public virtual void  TestPalyndrome3()
        {
            // search on non palyndrome, find phrase with no slop, using exact phrase scorer
            query.SetSlop(0);             // to use exact phrase scorer
            query.Add(new Term("field", "one"));
            query.Add(new Term("field", "two"));
            query.Add(new Term("field", "three"));
            ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length, "phrase found with exact phrase scorer");
            float score0 = hits[0].score;

            //System.out.println("(exact) field: one two three: "+score0);
            QueryUtils.Check(query, searcher);

            // search on non palyndrome, find phrase with slop 3, though no slop required here.
            query.SetSlop(4);             // to use sloppy scorer
            hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length, "just sloppy enough");
            float score1 = hits[0].score;

            //System.out.println("(sloppy) field: one two three: "+score1);
            Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "exact scorer and sloppy scorer score the same when slop does not matter");
            QueryUtils.Check(query, searcher);

            // search ordered in palyndrome, find it twice
            query = new PhraseQuery();
            query.SetSlop(4);             // must be at least four for both ordered and reversed to match
            query.Add(new Term("palindrome", "one"));
            query.Add(new Term("palindrome", "two"));
            query.Add(new Term("palindrome", "three"));
            hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length, "just sloppy enough");
            float score2 = hits[0].score;

            //System.out.println("palindrome: one two three: "+score2);
            QueryUtils.Check(query, searcher);

            //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
            //Assert.IsTrue(score1+SCORE_COMP_THRESH<score2,"ordered scores higher in palindrome");

            // search reveresed in palyndrome, find it twice
            query = new PhraseQuery();
            query.SetSlop(4);             // must be at least four for both ordered and reversed to match
            query.Add(new Term("palindrome", "three"));
            query.Add(new Term("palindrome", "two"));
            query.Add(new Term("palindrome", "one"));
            hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length, "just sloppy enough");
            float score3 = hits[0].score;

            //System.out.println("palindrome: three two one: "+score3);
            QueryUtils.Check(query, searcher);

            //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
            //Assert.IsTrue(score1+SCORE_COMP_THRESH<score3,"reversed scores higher in palindrome");
            //Assert.AreEqual(score2, score3, SCORE_COMP_THRESH, "ordered or reversed does not matter");
        }
Пример #5
0
        /// <summary>
        /// 搜索
        /// </summary>
        protected void SearchContent(string kw)
        {
            string indexPath = @"D:\lucenedir";
            kw = kw.ToLower();//默认情况下盘古分词区分大小写,需转换成小写进行搜索
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery queryMsg = new PhraseQuery();
            foreach (string word in Common.WebCommon.PanGuSplit(kw))//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            {
                queryMsg.Add(new Term("msg", word));//根据文章内容进行搜索
            }
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            //query.Add(new Term("body", "大学生"));
            queryMsg.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)

            PhraseQuery queryTitle = new PhraseQuery();
            foreach (string word in Common.WebCommon.PanGuSplit(kw))
            {
                queryTitle.Add(new Term("title", word));
            }
            queryTitle.SetSlop(100);
            BooleanQuery query = new BooleanQuery();
            query.Add(queryMsg, BooleanClause.Occur.SHOULD);
            query.Add(queryTitle, BooleanClause.Occur.SHOULD);

            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到 300(从300开始)到320(结束)的文档内容.可以用来实现分页功能
            List<SearchResult> list = new List<SearchResult>();
            for (int i = 0; i < docs.Length; i++)
            {
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id)
                Document doc = searcher.Doc(docId);//找到文档id对应的文档详细信息
                SearchResult result = new SearchResult();
                result.ContentDescription = WebCommon.Highlight(kw,WebCommon.CutString(doc.Get("msg"),150));//分词高亮显示
                result.Title = doc.Get("title");
                result.Id = Convert.ToInt32(doc.Get("id"));
                result.PublishDate = Convert.ToDateTime(doc.Get("PublishDate"));
                result.ISBN = doc.Get("ISBN");
                result.Author = doc.Get("Author");
                result.UnitPrice = Convert.ToDecimal(doc.Get("UnitPrice"));

                list.Add(result);
            }
            this.BookListRepeater.DataSource = list;
            this.BookListRepeater.DataBind();

            AddKeyWord(kw);
        }
Пример #6
0
        public virtual void  TestSimilarity_()
        {
            RAMDirectory store  = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(store, new SimpleAnalyzer(), true);

            writer.SetSimilarity(new SimpleSimilarity());

            Document d1 = new Document();

            d1.Add(Field.Text("Field", "a c"));

            Document d2 = new Document();

            d2.Add(Field.Text("Field", "a b c"));

            writer.AddDocument(d1);
            writer.AddDocument(d2);
            writer.Optimize();
            writer.Close();

            float[] scores = new float[4];

            Searcher searcher = new IndexSearcher(store);

            searcher.SetSimilarity(new SimpleSimilarity());

            Term a = new Term("Field", "a");
            Term b = new Term("Field", "b");
            Term c = new Term("Field", "c");

            searcher.Search(new TermQuery(b), new AnonymousClassHitCollector(this));

            BooleanQuery bq = new BooleanQuery();

            bq.Add(new TermQuery(a), false, false);
            bq.Add(new TermQuery(b), false, false);
            //System.out.println(bq.toString("Field"));
            searcher.Search(bq, new AnonymousClassHitCollector1(this));

            PhraseQuery pq = new PhraseQuery();

            pq.Add(a);
            pq.Add(c);
            //System.out.println(pq.toString("Field"));
            searcher.Search(pq, new AnonymousClassHitCollector2(this));

            pq.SetSlop(2);
            //System.out.println(pq.toString("Field"));
            searcher.Search(pq, new AnonymousClassHitCollector3(this));
        }
Пример #7
0
        public virtual void  TestMulipleTerms()
        {
            SetUp();

            query.SetSlop(2);
            query.Add(new Term("Field", "one"));
            query.Add(new Term("Field", "three"));
            query.Add(new Term("Field", "five"));
            Hits hits = searcher.Search(query);

            Assert.AreEqual(1, hits.Length(), "two total moves");

            query = new PhraseQuery();
            query.SetSlop(5);             // it takes six moves to match this phrase
            query.Add(new Term("Field", "five"));
            query.Add(new Term("Field", "three"));
            query.Add(new Term("Field", "one"));
            hits = searcher.Search(query);
            Assert.AreEqual(0, hits.Length(), "slop of 5 not close enough");

            query.SetSlop(6);
            hits = searcher.Search(query);
            Assert.AreEqual(1, hits.Length(), "slop of 6 just right");
        }
Пример #8
0
        public virtual void  TestSimilarity_Renamed()
        {
            RAMDirectory store  = new RAMDirectory();
            IndexWriter  writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            writer.SetSimilarity(new SimpleSimilarity());

            Document d1 = new Document();

            d1.Add(new Field("field", "a c", Field.Store.YES, Field.Index.ANALYZED));

            Document d2 = new Document();

            d2.Add(new Field("field", "a b c", Field.Store.YES, Field.Index.ANALYZED));

            writer.AddDocument(d1);
            writer.AddDocument(d2);
            writer.Optimize();
            writer.Close();

            Searcher searcher = new IndexSearcher(store);

            searcher.SetSimilarity(new SimpleSimilarity());

            Term a = new Term("field", "a");
            Term b = new Term("field", "b");
            Term c = new Term("field", "c");

            searcher.Search(new TermQuery(b), new AnonymousClassCollector(this));

            BooleanQuery bq = new BooleanQuery();

            bq.Add(new TermQuery(a), BooleanClause.Occur.SHOULD);
            bq.Add(new TermQuery(b), BooleanClause.Occur.SHOULD);
            //System.out.println(bq.toString("field"));
            searcher.Search(bq, new AnonymousClassCollector1(this));

            PhraseQuery pq = new PhraseQuery();

            pq.Add(a);
            pq.Add(c);
            //System.out.println(pq.toString("field"));
            searcher.Search(pq, new AnonymousClassCollector2(this));

            pq.SetSlop(2);
            //System.out.println(pq.toString("field"));
            searcher.Search(pq, new AnonymousClassCollector3(this));
        }
Пример #9
0
        public virtual void  TestNonExistingPhrase()
        {
            // phrase without repetitions that exists in 2 docs
            query.Add(new Term("nonexist", "phrase"));
            query.Add(new Term("nonexist", "notexist"));
            query.Add(new Term("nonexist", "found"));
            query.SetSlop(2);             // would be found this way

            ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(2, hits.Length, "phrase without repetitions exists in 2 docs");
            QueryUtils.Check(query, searcher);

            // phrase with repetitions that exists in 2 docs
            query = new PhraseQuery();
            query.Add(new Term("nonexist", "phrase"));
            query.Add(new Term("nonexist", "exist"));
            query.Add(new Term("nonexist", "exist"));
            query.SetSlop(1);             // would be found

            hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(2, hits.Length, "phrase with repetitions exists in two docs");
            QueryUtils.Check(query, searcher);

            // phrase I with repetitions that does not exist in any doc
            query = new PhraseQuery();
            query.Add(new Term("nonexist", "phrase"));
            query.Add(new Term("nonexist", "notexist"));
            query.Add(new Term("nonexist", "phrase"));
            query.SetSlop(1000);             // would not be found no matter how high the slop is

            hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc");
            QueryUtils.Check(query, searcher);

            // phrase II with repetitions that does not exist in any doc
            query = new PhraseQuery();
            query.Add(new Term("nonexist", "phrase"));
            query.Add(new Term("nonexist", "exist"));
            query.Add(new Term("nonexist", "exist"));
            query.Add(new Term("nonexist", "exist"));
            query.SetSlop(1000);             // would not be found no matter how high the slop is

            hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc");
            QueryUtils.Check(query, searcher);
        }
Пример #10
0
        public virtual void  TestOrderDoesntMatter()
        {
            SetUp();

            query.SetSlop(2);             // must be at least two for reverse order match
            query.Add(new Term("Field", "two"));
            query.Add(new Term("Field", "one"));
            Hits hits = searcher.Search(query);

            Assert.AreEqual(1, hits.Length(), "just sloppy enough");

            query = new PhraseQuery();
            query.SetSlop(2);
            query.Add(new Term("Field", "three"));
            query.Add(new Term("Field", "one"));
            hits = searcher.Search(query);
            Assert.AreEqual(0, hits.Length(), "not sloppy enough");
        }
Пример #11
0
        public virtual void  TestOrderDoesntMatter()
        {
            query.SetSlop(2);             // must be at least two for reverse order match
            query.Add(new Term("field", "two"));
            query.Add(new Term("field", "one"));
            ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length, "just sloppy enough");
            QueryUtils.Check(query, searcher);


            query = new PhraseQuery();
            query.SetSlop(2);
            query.Add(new Term("field", "three"));
            query.Add(new Term("field", "one"));
            hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length, "not sloppy enough");
            QueryUtils.Check(query, searcher);
        }
Пример #12
0
        public virtual void  TestSlopScoring()
        {
            Directory   directory = new RAMDirectory();
            IndexWriter writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Document doc = new Document();

            doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            Document doc2 = new Document();

            doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc2);

            Document doc3 = new Document();

            doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc3);

            writer.Optimize();
            writer.Close();

            Searcher    searcher = new IndexSearcher(directory);
            PhraseQuery query    = new PhraseQuery();

            query.Add(new Term("field", "firstname"));
            query.Add(new Term("field", "lastname"));
            query.SetSlop(System.Int32.MaxValue);
            ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);
            // Make sure that those matches where the terms appear closer to
            // each other get a higher score:
            Assert.AreEqual(0.71, hits[0].score, 0.01);
            Assert.AreEqual(0, hits[0].doc);
            Assert.AreEqual(0.44, hits[1].score, 0.01);
            Assert.AreEqual(1, hits[1].doc);
            Assert.AreEqual(0.31, hits[2].score, 0.01);
            Assert.AreEqual(2, hits[2].doc);
            QueryUtils.Check(query, searcher);
        }
Пример #13
0
        public virtual void  TestSlop1()
        {
            SetUp();

            // Ensures slop of 1 works with terms in order.
            query.SetSlop(1);
            query.Add(new Term("Field", "one"));
            query.Add(new Term("Field", "two"));
            Hits hits = searcher.Search(query);

            Assert.AreEqual(1, hits.Length(), "in order");

            // Ensures slop of 1 does not work for phrases out of order;
            // must be at least 2.
            query = new PhraseQuery();
            query.SetSlop(1);
            query.Add(new Term("Field", "two"));
            query.Add(new Term("Field", "one"));
            hits = searcher.Search(query);
            Assert.AreEqual(0, hits.Length(), "reversed, slop not 2 or more");
        }
Пример #14
0
        public virtual void  TestSlop1()
        {
            // Ensures slop of 1 works with terms in order.
            query.SetSlop(1);
            query.Add(new Term("field", "one"));
            query.Add(new Term("field", "two"));
            ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length, "in order");
            QueryUtils.Check(query, searcher);


            // Ensures slop of 1 does not work for phrases out of order;
            // must be at least 2.
            query = new PhraseQuery();
            query.SetSlop(1);
            query.Add(new Term("field", "two"));
            query.Add(new Term("field", "one"));
            hits = searcher.Search(query, null, 1000).scoreDocs;
            Assert.AreEqual(0, hits.Length, "reversed, slop not 2 or more");
            QueryUtils.Check(query, searcher);
        }
Пример #15
0
        public virtual int DoSloppyPhrase(IndexSearcher s, int termsInIndex, int maxClauses, int iter)
        {
            int ret = 0;

            for (int i = 0; i < iter; i++)
            {
                int         nClauses = r.Next(maxClauses - 1) + 2;         // min 2 clauses
                PhraseQuery q        = new PhraseQuery();
                for (int j = 0; j < nClauses; j++)
                {
                    int tnum = r.Next(termsInIndex);
                    q.Add(new Term("f", System.Convert.ToString((char)(tnum + 'A'))), j);
                }
                q.SetSlop(termsInIndex);                 // this could be random too

                CountingHitCollector hc = new CountingHitCollector();
                s.Search(q, hc);
                ret += hc.GetSum();
            }

            return(ret);
        }
Пример #16
0
        /// <summary>
        /// 从索引库中检索关键字
        /// </summary>
        private void SearchFromIndexData() {
            string indexPath = Context.Server.MapPath("~/IndexData");
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();
            //把用户输入的关键字进行分词
            foreach(string word in Common.SplitContent.SplitWords(Request.QueryString["SearchKey"])) {
                query.Add(new Term("content", word));
            }
            //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系
            query.SetSlop(100); //指定关键词相隔最大距离

            //TopScoreDocCollector盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;

            //展示数据实体对象集合
            List<PZYM.Shop.Model.Books> bookResult = new List<PZYM.Shop.Model.Books>();
            for(int i = 0; i < docs.Length; i++) {
                int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id)
                Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document


                PZYM.Shop.Model.Books book = new PZYM.Shop.Model.Books();
                book.Title = doc.Get("title");
                //book.ContentDescription = doc.Get("content");//未使用高亮
                //搜索关键字高亮显示 使用盘古提供高亮插件
                book.ContentDescription = Common.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content"));
                book.Id = Convert.ToInt32(doc.Get("id"));
                bookResult.Add(book);
            }
            Repeater1.DataSource = bookResult;
            Repeater1.DataBind();
        }
Пример #17
0
        private float CheckPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults)
        {
            query.SetSlop(slop);

            RAMDirectory       ramDir   = new RAMDirectory();
            WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
            IndexWriter        writer   = new IndexWriter(ramDir, analyzer, MaxFieldLength.UNLIMITED);

            writer.AddDocument(doc);
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(ramDir);
            TopDocs       td       = searcher.Search(query, null, 10);

            //System.out.println("slop: "+slop+"  query: "+query+"  doc: "+doc+"  Expecting number of hits: "+expectedNumResults+" maxScore="+td.getMaxScore());
            Assert.AreEqual(expectedNumResults, td.TotalHits, "slop: " + slop + "  query: " + query + "  doc: " + doc + "  Wrong number of hits");

            //QueryUtils.check(query,searcher);

            searcher.Close();
            ramDir.Close();

            return(td.GetMaxScore());
        }
Пример #18
0
        /// <summary>
        /// 从索引库中检索关键字
        /// </summary>
        private static List<CommentSet> SearchFromIndexData(string IndexSavePath, string searchKey)
        {
            string indexPath = IndexSavePath;
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();
            //把用户输入的关键字进行分词
            foreach (string word in Picture.Utility.SplitContent.SplitWords(searchKey))
            {
                query.Add(new Term("content", word));
            }
            //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系
            query.SetSlop(100); //指定关键词相隔最大距离

            //TopScoreDocCollector盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;

            //展示数据实体对象集合
            var commontModels = new List<CommentSet>();
            for (int i = 0; i < docs.Length; i++)
            {
                int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id)
                Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document

               CommentSet  commont = new CommentSet();
                commont.Content = doc.Get("content");
                //book.ContentDescription = doc.Get("content");//未使用高亮
                //搜索关键字高亮显示 使用盘古提供高亮插件
                //book.ContentDescription = Picture.Utility.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content"));
                commont.Id = Convert.ToInt32(doc.Get("id"));
                commontModels.Add(commont);
            }
            return commontModels;
        }
Пример #19
0
 protected Query Pq(float boost, int slop, String field, params String[] texts)
 {
     PhraseQuery query = new PhraseQuery();
     foreach (String text in texts)
     {
         query.Add(new Term(field, text));
     }
     query.SetBoost(boost);
     query.SetSlop(slop);
     return query;
 }
Пример #20
0
        public void CompositeTermPhraseWildcardTests()
        {
            BooleanQuery originalQuery = new BooleanQuery();
            Term term = new Term("_name", "value");
            TermQuery termQuery = new TermQuery(term);
            originalQuery.Add(termQuery, BooleanClause.Occur.MUST);
            PhraseQuery phraseQuery = new PhraseQuery();
            Term phraseTerm = new Term("_name", "phrase");
            phraseQuery.SetSlop(2);
            phraseQuery.Add(phraseTerm);
            originalQuery.Add(phraseQuery, BooleanClause.Occur.MUST);

            Term wildcardTerm = new Term("_name", "*wildcard*");
            WildcardQuery wildcardQuery = new WildcardQuery(wildcardTerm);
            originalQuery.Add(wildcardQuery, BooleanClause.Occur.SHOULD);

            string queryString = originalQuery.ToString();

            QueryBuilder builder = new QueryBuilder();
            builder.Setup
                (
                    x => x.Term("_name", "value"),
                    x => x.Phrase(2).AddTerm("_name", "phrase"),
                    x => x.WildCard("_name", "*wildcard*", BooleanClause.Occur.SHOULD)
                );
            Query replacementQuery = builder.Build();
            string newQueryString = replacementQuery.ToString();

            Assert.AreEqual(queryString, newQueryString);
            Console.Write(queryString);
        }
Пример #21
0
        public void BoostedCaseInsensitivePhrase()
        {
            BooleanQuery originalQuery = new BooleanQuery();
            Term term = new Term("_name", "value");
            PhraseQuery phraseQuery = new PhraseQuery();
            phraseQuery.SetSlop(2);
            phraseQuery.Add(term);
            phraseQuery.SetBoost(10);
            originalQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
            string queryString = originalQuery.ToString();

            QueryBuilder builder = new QueryBuilder();
            builder.Setup(x => x.Phrase(2, 10).AddTerm("_name", "Value"));
            Query replacementQuery = builder.Build();
            string newQueryString = replacementQuery.ToString();

            Assert.AreEqual(queryString, newQueryString);
            Console.Write(queryString);
        }
Пример #22
0
 /*
  * Check if src and dest have overlapped part and if it is, create PhraseQueries and add expandQueries.
  * 
  * ex1) src="a b", dest="c d"       => no overlap
  * ex2) src="a b", dest="a b c"     => no overlap
  * ex3) src="a b", dest="b c"       => overlap; expandQueries={"a b c"}
  * ex4) src="a b c", dest="b c d"   => overlap; expandQueries={"a b c d"}
  * ex5) src="a b c", dest="b c"     => no overlap
  * ex6) src="a b c", dest="b"       => no overlap
  * ex7) src="a a a a", dest="a a a" => overlap;
  *                                     expandQueries={"a a a a a","a a a a a a"}
  * ex8) src="a b c d", dest="b c"   => no overlap
  */
 private void CheckOverlap(Dictionary<Query,Query> expandQueries, Term[] src, Term[] dest, int slop, float boost)
 {
     // beginning from 1 (not 0) is safe because that the PhraseQuery has multiple terms
     // is guaranteed in flatten() method (if PhraseQuery has only one term, flatten()
     // converts PhraseQuery to TermQuery)
     for (int i = 1; i < src.Length; i++)
     {
         bool overlap = true;
         for (int j = i; j < src.Length; j++)
         {
             if ((j - i) < dest.Length && !src[j].Text().Equals(dest[j - i].Text()))
             {
                 overlap = false;
                 break;
             }
         }
         if (overlap && src.Length - i < dest.Length)
         {
             PhraseQuery pq = new PhraseQuery();
             foreach (Term srcTerm in src)
                 pq.Add(srcTerm);
             for (int k = src.Length - i; k < dest.Length; k++)
             {
                 pq.Add(new Term(src[0].Field(), dest[k].Text()));
             }
             pq.SetSlop(slop);
             pq.SetBoost(boost);
             if (!expandQueries.ContainsKey(pq))
                 expandQueries.Add(pq,pq);
         }
     }
 }
Пример #23
0
 public virtual void  TestNotCloseEnough()
 {
     query.SetSlop(2);
     query.Add(new Term("field", "one"));
     query.Add(new Term("field", "five"));
     ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
     Assert.AreEqual(0, hits.Length);
     QueryUtils.Check(query, searcher);
 }
Пример #24
0
        private Query CreateStringValueQuery(QueryFieldValue value, FieldInfo currentField)
        {
            switch (value.Token)
            {
                case SnLucLexer.Token.Number:
                case SnLucLexer.Token.String:
                    if(value.StringValue == ContentQuery.EmptyText)
                        return new TermQuery(new Term(currentField.Name, value.StringValue));
                    if (value.StringValue == ContentQuery.EmptyInnerQueryText)
                        return new TermQuery(new Term("Id", NumericUtils.IntToPrefixCoded(0)));

                    var words = GetAnalyzedText(currentField.Name, value.StringValue);

                    if (words.Length == 0)
                        words = new String[] { String.Empty }; //return null;
                    if (words.Length == 1)
                    {
                        var term = new Term(currentField.Name, words[0]);
                        if(value.FuzzyValue == null)
                            return new TermQuery(term);
                        return new FuzzyQuery(term, Convert.ToSingle(value.FuzzyValue));
                    }

                    var phraseQuery = new PhraseQuery();
                    foreach(var word in words)
                        phraseQuery.Add(new Term(currentField.Name, word));

                    if (value.FuzzyValue != null)
                    {
                        var slop = Convert.ToInt32(value.FuzzyValue.Value);
                        phraseQuery.SetSlop(slop);
                    }
                    return phraseQuery;
                case SnLucLexer.Token.WildcardString:
                    if (!value.StringValue.EndsWith("*"))
                        return new WildcardQuery(new Term(currentField.Name, value.StringValue));
                    var s = value.StringValue.TrimEnd('*');
                    if (s.Contains('?') || s.Contains('*'))
                        return new WildcardQuery(new Term(currentField.Name, value.StringValue));
                    return new PrefixQuery(new Term(currentField.Name, s));
                default:
                    throw new NotImplementedException("CreateValueQuery with Token: " + value.Token);
            }
        }
Пример #25
0
		public virtual int DoSloppyPhrase(IndexSearcher s, int termsInIndex, int maxClauses, int iter)
		{
			int ret = 0;
			
			for (int i = 0; i < iter; i++)
			{
				int nClauses = r.Next(maxClauses - 1) + 2; // min 2 clauses
				PhraseQuery q = new PhraseQuery();
				for (int j = 0; j < nClauses; j++)
				{
					int tnum = r.Next(termsInIndex);
					q.Add(new Term("f", System.Convert.ToString((char)(tnum + 'A'))), j);
				}
				q.SetSlop(termsInIndex); // this could be random too
				
				CountingHitCollector hc = new CountingHitCollector();
				s.Search(q, hc);
				ret += hc.GetSum();
			}
			
			return ret;
		}
Пример #26
0
        public void PhraseQueryText(string[] frase, string textoParaProcurar, int distanciaEntrePalavras)
        {
            const string texto = "texto";
            using (var directory = new RAMDirectory())
            {
                using (var indexWriter = new IndexWriter(directory, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED))
                {
                    var doc = new Document();
                    doc.Add(new Field(texto, textoParaProcurar, Field.Store.YES, Field.Index.ANALYZED));
                    indexWriter.AddDocument(doc);
                }

                using (var searcher = new IndexSearcher(directory, true))
                {
                    var phraseQuery = new PhraseQuery();
                    phraseQuery.SetSlop(distanciaEntrePalavras);

                    foreach (var palavra in frase)
                        phraseQuery.Add(new Term(texto, palavra));

                    var matches = searcher.Search(phraseQuery, 10);
                    var encontrou = matches.TotalHits > 0;
                    var textoResultado = NaoEncontrou(textoParaProcurar, distanciaEntrePalavras, frase);
                    Assert.IsTrue(encontrou, textoResultado);
                }
            }
        }
Пример #27
0
		public virtual void  TestPalyndrome3()
		{
			
			// search on non palyndrome, find phrase with no slop, using exact phrase scorer
			query.SetSlop(0); // to use exact phrase scorer
			query.Add(new Term("field", "one"));
			query.Add(new Term("field", "two"));
			query.Add(new Term("field", "three"));
			Hits hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length(), "phrase found with exact phrase scorer");
			float score0 = hits.Score(0);
			//System.out.println("(exact) field: one two three: "+score0);
			QueryUtils.Check(query, searcher);
			
			// search on non palyndrome, find phrase with slop 3, though no slop required here.
			query.SetSlop(4); // to use sloppy scorer 
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length(), "just sloppy enough");
			float score1 = hits.Score(0);
			//System.out.println("(sloppy) field: one two three: "+score1);
			Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "exact scorer and sloppy scorer score the same when slop does not matter");
			QueryUtils.Check(query, searcher);
			
			// search ordered in palyndrome, find it twice
			query = new PhraseQuery();
			query.SetSlop(4); // must be at least four for both ordered and reversed to match
			query.Add(new Term("palindrome", "one"));
			query.Add(new Term("palindrome", "two"));
			query.Add(new Term("palindrome", "three"));
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length(), "just sloppy enough");
			float score2 = hits.Score(0);
			//System.out.println("palindrome: one two three: "+score2);
			QueryUtils.Check(query, searcher);
			
			//commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). 
			//assertTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);
			
			// search reveresed in palyndrome, find it twice
			query = new PhraseQuery();
			query.SetSlop(4); // must be at least four for both ordered and reversed to match
			query.Add(new Term("palindrome", "three"));
			query.Add(new Term("palindrome", "two"));
			query.Add(new Term("palindrome", "one"));
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length(), "just sloppy enough");
			float score3 = hits.Score(0);
			//System.out.println("palindrome: three two one: "+score3);
			QueryUtils.Check(query, searcher);
			
			//commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). 
			//assertTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
			//assertEquals("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
		}
Пример #28
0
		public virtual void  TestSlopScoring()
		{
			Directory directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
			
			Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			
			Lucene.Net.Documents.Document doc2 = new Lucene.Net.Documents.Document();
			doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc2);
			
			Lucene.Net.Documents.Document doc3 = new Lucene.Net.Documents.Document();
			doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc3);
			
			writer.Optimize();
			writer.Close();
			
			Searcher searcher = new IndexSearcher(directory);
			PhraseQuery query = new PhraseQuery();
			query.Add(new Term("field", "firstname"));
			query.Add(new Term("field", "lastname"));
			query.SetSlop(System.Int32.MaxValue);
			Hits hits = searcher.Search(query);
			Assert.AreEqual(3, hits.Length());
			// Make sure that those matches where the terms appear closer to
			// each other get a higher score:
			Assert.AreEqual(0.71, hits.Score(0), 0.01);
			Assert.AreEqual(0, hits.Id(0));
			Assert.AreEqual(0.44, hits.Score(1), 0.01);
			Assert.AreEqual(1, hits.Id(1));
			Assert.AreEqual(0.31, hits.Score(2), 0.01);
			Assert.AreEqual(2, hits.Id(2));
			QueryUtils.Check(query, searcher);
		}
Пример #29
0
		public virtual void  TestOrderDoesntMatter()
		{
			query.SetSlop(2); // must be at least two for reverse order match
			query.Add(new Term("field", "two"));
			query.Add(new Term("field", "one"));
			Hits hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length(), "just sloppy enough");
			QueryUtils.Check(query, searcher);

			
			query = new PhraseQuery();
			query.SetSlop(2);
			query.Add(new Term("field", "three"));
			query.Add(new Term("field", "one"));
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length(), "not sloppy enough");
			QueryUtils.Check(query, searcher);
		}
Пример #30
0
		private float CheckPhraseQuery(Document doc, PhraseQuery query, int slop, int expectedNumResults)
		{
			query.SetSlop(slop);
			
			RAMDirectory ramDir = new RAMDirectory();
			WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
			IndexWriter writer = new IndexWriter(ramDir, analyzer, MaxFieldLength.UNLIMITED);
			writer.AddDocument(doc);
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(ramDir);
			TopDocs td = searcher.Search(query, null, 10);
			//System.out.println("slop: "+slop+"  query: "+query+"  doc: "+doc+"  Expecting number of hits: "+expectedNumResults+" maxScore="+td.getMaxScore());
			Assert.AreEqual(expectedNumResults, td.totalHits, "slop: " + slop + "  query: " + query + "  doc: " + doc + "  Wrong number of hits");
			
			//QueryUtils.check(query,searcher);
			
			searcher.Close();
			ramDir.Close();
			
			return td.GetMaxScore();
		}
Пример #31
0
        /// <summary>
        /// 添加PhraseQuery
        /// </summary>
        /// <param name="fieldName">待搜索的字段名称</param>
        /// <param name="phrase">待搜索的短语</param>
        /// <param name="boostLevel">权重级别</param>
        /// <param name="asFilter">是否作为过滤条件</param>
        /// <returns>LuceneSearchBuilder</returns>
        public LuceneSearchBuilder WithPhrase(string fieldName, string phrase, BoostLevel? boostLevel = null, bool asFilter = false)
        {
            string filteredPhrase = ClauseScrubber.LuceneKeywordsScrub(phrase);
            if (string.IsNullOrEmpty(filteredPhrase))
                return this;

            if (filteredPhrase.Length == 1)
                return WithField(fieldName, filteredPhrase, false, boostLevel, asFilter);

            string[] nameSegments = ClauseScrubber.SegmentForPhraseQuery(filteredPhrase);

            PhraseQuery phraseQuery = new PhraseQuery();
            foreach (var nameSegment in nameSegments)
                phraseQuery.Add(new Term(fieldName, nameSegment));

            phraseQuery.SetSlop(PhraseQuerySlop);

            if (boostLevel.HasValue)
                SetBoost(phraseQuery, boostLevel.Value);

            if (asFilter)
                filters.Add(new BooleanClause(phraseQuery, BooleanClause.Occur.MUST));
            else
                clauses.Add(new BooleanClause(phraseQuery, BooleanClause.Occur.MUST));

            return this;
        }
Пример #32
0
        /// <summary>
        /// Sets up and adds a phrase query object allowing the search for an explcit term in the field
        /// To add terms, use the AddTerm() query extension
        /// </summary>
        /// <param name="occur">Whether it must, must not or should occur in the field</param>
        /// <param name="slop">The allowed distance between the terms</param>
        /// <param name="boost">A boost multiplier (1 is default / normal).</param>
        /// <param name="key">The dictionary key to allow reference beyond the initial scope</param>
        /// <returns>The generated phrase query object</returns>
        public virtual PhraseQuery Phrase(int slop, float? boost = null, BooleanClause.Occur occur = null, string key = null)
        {
            PhraseQuery query = new PhraseQuery();

            SetBoostValue(query, boost);
            query.SetSlop(slop);

            Add(query, occur, key);
            return query;
        }
Пример #33
0
        /// <summary>
        /// 批量添加PhraseQuery
        /// </summary>
        /// <param name="phrase">待搜索的短语</param>
        /// <param name="fieldNameAndBoosts">字段名称及权重集合</param>
        /// <param name="occur">搜索条件间的关系</param>
        /// <param name="asFilter">是否作为过滤器条件</param>
        /// <returns></returns>
        public LuceneSearchBuilder WithPhrases(Dictionary<string, BoostLevel> fieldNameAndBoosts, string phrase, BooleanClause.Occur occur, bool asFilter = false)
        {
            string filteredPhrase = ClauseScrubber.LuceneKeywordsScrub(phrase);
            if (string.IsNullOrEmpty(filteredPhrase))
                return this;

            string[] nameSegments = ClauseScrubber.SegmentForPhraseQuery(filteredPhrase);
            if (nameSegments.Length == 1)
            {
                return WithFields(fieldNameAndBoosts, nameSegments[0], false, occur, asFilter);
            }
            else
            {
                BooleanQuery query = new BooleanQuery();
                foreach (var fieldNameAndBoost in fieldNameAndBoosts)
                {
                    PhraseQuery phraseQuery = new PhraseQuery();
                    foreach (var nameSegment in nameSegments)
                        phraseQuery.Add(new Term(fieldNameAndBoost.Key, nameSegment));

                    phraseQuery.SetSlop(PhraseQuerySlop);
                    SetBoost(phraseQuery, fieldNameAndBoost.Value);
                    query.Add(phraseQuery, occur);
                }

                if (asFilter)
                    filters.Add(new BooleanClause(query, BooleanClause.Occur.MUST));
                else
                    clauses.Add(new BooleanClause(query, BooleanClause.Occur.MUST));

                return this;
            }
        }
Пример #34
0
        //搜索
        protected void SearchContent()
        {
            string indexPath = @"D:\lucenedir";
            string kw = Request.Form["txtContent"];
            kw = kw.ToLower();//默认情况下盘古分词区分大小写,需转换成小写进行搜索
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();
            foreach (string word in Common.WebCommon.PanGuSplit(kw))//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            {
                query.Add(new Term("msg", word));//根据文章内容进行搜索
            }
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            //query.Add(new Term("body", "大学生"));

            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到 300(从300开始)到320(结束)的文档内容.可以用来实现分页功能
            List<SearchResult> list = new List<SearchResult>();
            for (int i = 0; i < docs.Length; i++)
            {
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id)
                Document doc = searcher.Doc(docId);//找到文档id对应的文档详细信息
                SearchResult result = new SearchResult();
                //result.Msg = Common.WebCommon.Highlight(kw,doc.Get("msg"));//分词高亮显示
                //result.Title = doc.Get("title");
                //result.Url = "/BookDeatail.apsx?id=" + doc.Get("id");
                list.Add(result);
            }
            this.SearchRepeater.DataSource = list;
            this.SearchRepeater.DataBind();
        }
Пример #35
0
        /// <summary>
        /// 批量添加PhraseQuery
        /// </summary>
        /// <param name="phrases">待搜索的短语集合</param>
        /// <param name="fieldNameAndBoosts">字段名称及权重集合</param>
        /// <param name="occur">搜索条件间的关系</param>
        /// <param name="asFilter">是否作为过滤器条件</param>
        /// <returns></returns>
        public LuceneSearchBuilder WithPhrases(Dictionary<string, BoostLevel> fieldNameAndBoosts, IEnumerable<string> phrases, BooleanClause.Occur occur, bool asFilter = false)
        {
            foreach (var fieldNameAndBoost in fieldNameAndBoosts)
            {
                BooleanQuery query = new BooleanQuery();
                foreach (string phrase in phrases)
                {
                    string filteredPhrase = ClauseScrubber.LuceneKeywordsScrub(phrase);
                    if (string.IsNullOrEmpty(filteredPhrase))
                        continue;

                    if (filteredPhrase.Length == 1)
                    {
                        Term term = new Term(fieldNameAndBoost.Key, filteredPhrase);
                        Query q = new PrefixQuery(term);

                        SetBoost(q, fieldNameAndBoost.Value);

                        query.Add(q, BooleanClause.Occur.SHOULD);

                        continue;
                    }

                    string[] nameSegments = ClauseScrubber.SegmentForPhraseQuery(filteredPhrase);

                    PhraseQuery phraseQuery = new PhraseQuery();
                    foreach (var nameSegment in nameSegments)
                        phraseQuery.Add(new Term(fieldNameAndBoost.Key, nameSegment));

                    phraseQuery.SetSlop(PhraseQuerySlop);

                    SetBoost(phraseQuery, fieldNameAndBoost.Value);

                    query.Add(phraseQuery, BooleanClause.Occur.SHOULD);

                }

                if (asFilter)
                    filters.Add(new BooleanClause(query, occur));
                else
                    clauses.Add(new BooleanClause(query, occur));
            }

            return this;
        }
Пример #36
0
		public virtual void  TestSlop1()
		{
			// Ensures slop of 1 works with terms in order.
			query.SetSlop(1);
			query.Add(new Term("field", "one"));
			query.Add(new Term("field", "two"));
			Hits hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length(), "in order");
			QueryUtils.Check(query, searcher);

			
			// Ensures slop of 1 does not work for phrases out of order;
			// must be at least 2.
			query = new PhraseQuery();
			query.SetSlop(1);
			query.Add(new Term("field", "two"));
			query.Add(new Term("field", "one"));
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length(), "reversed, slop not 2 or more");
			QueryUtils.Check(query, searcher);
		}
Пример #37
0
		public virtual void  TestSimilarity_Renamed()
		{
			RAMDirectory store = new RAMDirectory();
			IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			writer.SetSimilarity(new SimpleSimilarity());
			
			Document d1 = new Document();
			d1.Add(new Field("field", "a c", Field.Store.YES, Field.Index.ANALYZED));
			
			Document d2 = new Document();
			d2.Add(new Field("field", "a b c", Field.Store.YES, Field.Index.ANALYZED));
			
			writer.AddDocument(d1);
			writer.AddDocument(d2);
			writer.Optimize();
			writer.Close();
			
			Searcher searcher = new IndexSearcher(store);
			searcher.SetSimilarity(new SimpleSimilarity());
			
			Term a = new Term("field", "a");
			Term b = new Term("field", "b");
			Term c = new Term("field", "c");
			
			searcher.Search(new TermQuery(b), new AnonymousClassCollector(this));
			
			BooleanQuery bq = new BooleanQuery();
			bq.Add(new TermQuery(a), BooleanClause.Occur.SHOULD);
			bq.Add(new TermQuery(b), BooleanClause.Occur.SHOULD);
			//System.out.println(bq.toString("field"));
			searcher.Search(bq, new AnonymousClassCollector1(this));
			
			PhraseQuery pq = new PhraseQuery();
			pq.Add(a);
			pq.Add(c);
			//System.out.println(pq.toString("field"));
			searcher.Search(pq, new AnonymousClassCollector2(this));
			
			pq.SetSlop(2);
			//System.out.println(pq.toString("field"));
			searcher.Search(pq, new AnonymousClassCollector3(this));
		}
Пример #38
0
		public virtual void  TestMulipleTerms()
		{
			query.SetSlop(2);
			query.Add(new Term("field", "one"));
			query.Add(new Term("field", "three"));
			query.Add(new Term("field", "five"));
			Hits hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length(), "two total moves");
			QueryUtils.Check(query, searcher);

			
			query = new PhraseQuery();
			query.SetSlop(5); // it takes six moves to match this phrase
			query.Add(new Term("field", "five"));
			query.Add(new Term("field", "three"));
			query.Add(new Term("field", "one"));
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length(), "slop of 5 not close enough");
			QueryUtils.Check(query, searcher);

			
			query.SetSlop(6);
			hits = searcher.Search(query);
			Assert.AreEqual(1, hits.Length(), "slop of 6 just right");
			QueryUtils.Check(query, searcher);
		}
Пример #39
0
		public virtual void  TestSlopScoring()
		{
			Directory directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			
			Document doc = new Document();
			doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			Document doc2 = new Document();
			doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc2);
			
			Document doc3 = new Document();
			doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc3);
			
			writer.Optimize();
			writer.Close();
			
			Searcher searcher = new IndexSearcher(directory);
			PhraseQuery query = new PhraseQuery();
			query.Add(new Term("field", "firstname"));
			query.Add(new Term("field", "lastname"));
			query.SetSlop(System.Int32.MaxValue);
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(3, hits.Length);
			// Make sure that those matches where the terms appear closer to
			// each other get a higher score:
			Assert.AreEqual(0.71, hits[0].Score, 0.01);
			Assert.AreEqual(0, hits[0].Doc);
			Assert.AreEqual(0.44, hits[1].Score, 0.01);
			Assert.AreEqual(1, hits[1].Doc);
			Assert.AreEqual(0.31, hits[2].Score, 0.01);
			Assert.AreEqual(2, hits[2].Doc);
			QueryUtils.Check(query, searcher);
		}
Пример #40
0
		public virtual void  TestNonExistingPhrase()
		{
			// phrase without repetitions that exists in 2 docs
			query.Add(new Term("nonexist", "phrase"));
			query.Add(new Term("nonexist", "notexist"));
			query.Add(new Term("nonexist", "found"));
			query.SetSlop(2); // would be found this way
			
			Hits hits = searcher.Search(query);
			Assert.AreEqual(2, hits.Length(), "phrase without repetitions exists in 2 docs");
			QueryUtils.Check(query, searcher);
			
			// phrase with repetitions that exists in 2 docs
			query = new PhraseQuery();
			query.Add(new Term("nonexist", "phrase"));
			query.Add(new Term("nonexist", "exist"));
			query.Add(new Term("nonexist", "exist"));
			query.SetSlop(1); // would be found 
			
			hits = searcher.Search(query);
			Assert.AreEqual(2, hits.Length(), "phrase with repetitions exists in two docs");
			QueryUtils.Check(query, searcher);
			
			// phrase I with repetitions that does not exist in any doc
			query = new PhraseQuery();
			query.Add(new Term("nonexist", "phrase"));
			query.Add(new Term("nonexist", "notexist"));
			query.Add(new Term("nonexist", "phrase"));
			query.SetSlop(1000); // would not be found no matter how high the slop is
			
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length(), "nonexisting phrase with repetitions does not exist in any doc");
			QueryUtils.Check(query, searcher);
			
			// phrase II with repetitions that does not exist in any doc
			query = new PhraseQuery();
			query.Add(new Term("nonexist", "phrase"));
			query.Add(new Term("nonexist", "exist"));
			query.Add(new Term("nonexist", "exist"));
			query.Add(new Term("nonexist", "exist"));
			query.SetSlop(1000); // would not be found no matter how high the slop is
			
			hits = searcher.Search(query);
			Assert.AreEqual(0, hits.Length(), "nonexisting phrase with repetitions does not exist in any doc");
			QueryUtils.Check(query, searcher);
		}
Пример #41
0
		public virtual void  TestSimilarity_()
		{
			RAMDirectory store = new RAMDirectory();
			IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
			writer.SetSimilarity(new SimpleSimilarity());
			
			Document d1 = new Document();
			d1.Add(Field.Text("Field", "a c"));
			
			Document d2 = new Document();
			d2.Add(Field.Text("Field", "a b c"));
			
			writer.AddDocument(d1);
			writer.AddDocument(d2);
			writer.Optimize();
			writer.Close();
			
			float[] scores = new float[4];
			
			Searcher searcher = new IndexSearcher(store);
			searcher.SetSimilarity(new SimpleSimilarity());
			
			Term a = new Term("Field", "a");
			Term b = new Term("Field", "b");
			Term c = new Term("Field", "c");
			
			searcher.Search(new TermQuery(b), new AnonymousClassHitCollector(this));
			
			BooleanQuery bq = new BooleanQuery();
			bq.Add(new TermQuery(a), false, false);
			bq.Add(new TermQuery(b), false, false);
			//System.out.println(bq.toString("Field"));
			searcher.Search(bq, new AnonymousClassHitCollector1(this));
			
			PhraseQuery pq = new PhraseQuery();
			pq.Add(a);
			pq.Add(c);
			//System.out.println(pq.toString("Field"));
			searcher.Search(pq, new AnonymousClassHitCollector2(this));
			
			pq.SetSlop(2);
			//System.out.println(pq.toString("Field"));
			searcher.Search(pq, new AnonymousClassHitCollector3(this));
		}
Пример #42
0
		/// <exception cref="ParseException">throw in overridden method to disallow
		/// </exception>
		protected internal virtual Query GetFieldQuery(System.String field, System.String queryText)
		{
			// Use the analyzer to get all the tokens, and then build a TermQuery,
			// PhraseQuery, or nothing based on the term count
			
			TokenStream source = analyzer.TokenStream(field, new System.IO.StringReader(queryText));
			System.Collections.ArrayList v = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
			Lucene.Net.Analysis.Token t;
			int positionCount = 0;
			bool severalTokensAtSamePosition = false;
			
			while (true)
			{
				try
				{
					t = source.Next();
				}
				catch (System.IO.IOException e)
				{
					t = null;
				}
				if (t == null)
					break;
				v.Add(t);
				if (t.GetPositionIncrement() != 0)
					positionCount += t.GetPositionIncrement();
				else
					severalTokensAtSamePosition = true;
			}
			try
			{
				source.Close();
			}
			catch (System.IO.IOException e)
			{
				// ignore
			}
			
			if (v.Count == 0)
				return null;
			else if (v.Count == 1)
			{
				t = (Lucene.Net.Analysis.Token) v[0];
				return new TermQuery(new Term(field, t.TermText()));
			}
			else
			{
				if (severalTokensAtSamePosition)
				{
					if (positionCount == 1)
					{
						// no phrase query:
						BooleanQuery q = new BooleanQuery(true);
						for (int i = 0; i < v.Count; i++)
						{
							t = (Lucene.Net.Analysis.Token) v[i];
							TermQuery currentQuery = new TermQuery(new Term(field, t.TermText()));
							q.Add(currentQuery, BooleanClause.Occur.SHOULD);
						}
						return q;
					}
					else
					{
						// phrase query:
						MultiPhraseQuery mpq = new MultiPhraseQuery();
						System.Collections.ArrayList multiTerms = new System.Collections.ArrayList();
						for (int i = 0; i < v.Count; i++)
						{
							t = (Lucene.Net.Analysis.Token) v[i];
							if (t.GetPositionIncrement() == 1 && multiTerms.Count > 0)
							{
								mpq.Add((Term[]) multiTerms.ToArray(typeof(Term)));
								multiTerms.Clear();
							}
							multiTerms.Add(new Term(field, t.TermText()));
						}
						mpq.Add((Term[]) multiTerms.ToArray(typeof(Term)));
						return mpq;
					}
				}
				else
				{
					PhraseQuery q = new PhraseQuery();
					q.SetSlop(phraseSlop);
					for (int i = 0; i < v.Count; i++)
					{
						q.Add(new Term(field, ((Lucene.Net.Analysis.Token) v[i]).TermText()));
					}
					return q;
				}
			}
		}
Пример #43
0
        /// <summary>
        /// 搜索内容
        /// </summary>
        /// <returns></returns>
        public ActionResult SearchContent()
        {
            if (Request["btnCreate"] != null)
            {
                return Redirect("/Search/GenrateSearchLibrary");
            }

            string indexPath = ConfigurationManager.AppSettings["LuceneNetPath"];
            string searchStr = Request["txtSearchContent"] ?? "";

            if (searchStr == "")
            {
                return View("Index");
            }
            //将用户搜索的词加入热词库
            SearchDetailsBll.AddEntity(new SearchDetails()
            {
                Id = Guid.NewGuid(),
                KeyWords = searchStr,
                SearchDateTime = DateTime.Now
            });

            List<string> stringList = Common.SearchHelper.ChangeStringToSegment(searchStr);
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();
            foreach (string word in stringList)//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            {
                query.Add(new Term("content", word));
            }
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            // query.Add(new Term("body", "大学生"));
            //query.Add(new Term("body", searchStr));//body中含有kw的文章
            query.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)
            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查询结果中的文档的ID,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到300(从300开始),到320(结束)的文档内容.
            //可以用来实现分页功能
            //this.listBox1.Items.Clear();
            List<SearchResultViewModel> list = new List<SearchResultViewModel>();
            for (int i = 0; i < docs.Length; i++)
            {
                //
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id)
                Document doc = searcher.Doc(docId);//找到文档id对应的文档详细信息
                list.Add(new SearchResultViewModel()
                {
                    Id = doc.Get("id"),
                    Title = doc.Get("title"),
                    Content = Common.SearchHelper.ChangeStringToHighLight(searchStr, doc.Get("content"))
                });

            }
            ViewData["list"] = list;
            ViewData["searchContent"] = searchStr;
            return View("Index");
        }