Add() public method

Adds a term to the end of the query phrase. The relative position of the term is the one immediately after the last term added.
public Add ( Lucene.Net.Index.Term term ) : void
term Lucene.Net.Index.Term
return void
Example #1
0
        public override Query VisitPhraseQuery(PhraseQuery phraseq)
        {
            _dump.Append("PhraseQ(");

            var terms = phraseq.GetTerms();
            PhraseQuery newQuery = null;

            int index = 0;
            int count = terms.Length;
            while (index < count)
            {
                var visitedTerm = VisitTerm(terms[index]);
                if (newQuery != null)
                {
                    newQuery.Add(visitedTerm);
                }
                else if (visitedTerm != terms[index])
                {
                    newQuery = new PhraseQuery();
                    for (int i = 0; i < index; i++)
                        newQuery.Add(terms[i]);
                    newQuery.Add(visitedTerm);
                }
                index++;
                if (index < count)
                    _dump.Append(", ");
            }
            _dump.Append(", Slop:").Append(phraseq.GetSlop()).Append(BoostToString(phraseq)).Append(")");
            if (newQuery != null)
                return newQuery;
            return phraseq;
        }
Example #2
0
		public virtual void  TestPhrase()
		{
			PhraseQuery query = new PhraseQuery();
			query.Add(new Term("Field", "seventy"));
			query.Add(new Term("Field", "seven"));
			CheckHits(query, new int[]{77, 177, 277, 377, 477, 577, 677, 777, 877, 977});
		}
        public virtual void Test1()
        {
            BooleanQuery q = new BooleanQuery();

            PhraseQuery phraseQuery = new PhraseQuery();
            phraseQuery.Slop = 1;
            phraseQuery.Add(new Term(FIELD, "w1"));
            phraseQuery.Add(new Term(FIELD, "w2"));
            q.Add(phraseQuery, Occur.MUST);
            q.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true), Occur.SHOULD);
            q.Add(Snear(Sf("w3", 2), St("w2"), St("w3"), 5, true), Occur.SHOULD);

            Query t = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")), new ItemizedFilter(new int[] { 1, 3 }));
            t.Boost = 1000;
            q.Add(t, Occur.SHOULD);

            t = new ConstantScoreQuery(new ItemizedFilter(new int[] { 0, 2 }));
            t.Boost = 30;
            q.Add(t, Occur.SHOULD);

            DisjunctionMaxQuery dm = new DisjunctionMaxQuery(0.2f);
            dm.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true));
            dm.Add(new TermQuery(new Term(FIELD, "QQ")));

            BooleanQuery xxYYZZ = new BooleanQuery();
            xxYYZZ.Add(new TermQuery(new Term(FIELD, "xx")), Occur.SHOULD);
            xxYYZZ.Add(new TermQuery(new Term(FIELD, "yy")), Occur.SHOULD);
            xxYYZZ.Add(new TermQuery(new Term(FIELD, "zz")), Occur.MUST_NOT);

            dm.Add(xxYYZZ);

            BooleanQuery xxW1 = new BooleanQuery();
            xxW1.Add(new TermQuery(new Term(FIELD, "xx")), Occur.MUST_NOT);
            xxW1.Add(new TermQuery(new Term(FIELD, "w1")), Occur.MUST_NOT);

            dm.Add(xxW1);

            DisjunctionMaxQuery dm2 = new DisjunctionMaxQuery(0.5f);
            dm2.Add(new TermQuery(new Term(FIELD, "w1")));
            dm2.Add(new TermQuery(new Term(FIELD, "w2")));
            dm2.Add(new TermQuery(new Term(FIELD, "w3")));
            dm.Add(dm2);

            q.Add(dm, Occur.SHOULD);

            BooleanQuery b = new BooleanQuery();
            b.MinimumNumberShouldMatch = 2;
            b.Add(Snear("w1", "w2", 1, true), Occur.SHOULD);
            b.Add(Snear("w2", "w3", 1, true), Occur.SHOULD);
            b.Add(Snear("w1", "w3", 3, true), Occur.SHOULD);

            q.Add(b, Occur.SHOULD);

            Qtest(q, new int[] { 0, 1, 2 });
        }
        public virtual void TestANDPhrase()
        {
            PhraseQuery phrase1 = new PhraseQuery();
            phrase1.Add(new Term("field", "foo"));
            phrase1.Add(new Term("field", "bar"));
            PhraseQuery phrase2 = new PhraseQuery();
            phrase2.Add(new Term("field", "star"));
            phrase2.Add(new Term("field", "wars"));
            BooleanQuery expected = new BooleanQuery();
            expected.Add(phrase1, BooleanClause.Occur.MUST);
            expected.Add(phrase2, BooleanClause.Occur.MUST);

            assertEquals(expected, Parse("\"foo bar\"+\"star wars\""));
        }
Example #5
0
        // This is a simplified query builder which works for single Terms and single Phrases
        // Returns null, TermQuery, or PhraseQuery
        public static Lucene.Net.Search.Query GetFieldQuery(Analyzer analyzer, string field, string queryText)
        {
            TokenStream stream = analyzer.TokenStream(field, new StringReader(queryText));
            TokenFilter filter = new CachingTokenFilter(stream);
            filter.Reset();

            // This attribute way of getting token properties isn't very good, but it's the non-obsolete one.
            var attr1 = filter.GetAttribute<ITermAttribute>();
            Func<string> getText = () => attr1 != null ? attr1.Term : null;

            Func<int> getPositionIncrement;
            if (filter.HasAttribute<IPositionIncrementAttribute>())
            {
                var attr = filter.GetAttribute<IPositionIncrementAttribute>();
                getPositionIncrement = () => attr.PositionIncrement;
            }
            else
            {
                getPositionIncrement = () => 1;
            }

            // 0 tokens
            if (!filter.IncrementToken())
            {
                return new BooleanQuery();
            }

            // 1 token?
            string token1 = getText();
            int position = 0;
            if (!filter.IncrementToken())
            {
                return new TermQuery(new Term(field, token1));
            }

            // many tokens - handle first token
            PhraseQuery ret = new PhraseQuery();
            ret.Add(new Term(field, token1));

            do
            {
                // handle rest of tokens
                string tokenNext = getText();
                position += getPositionIncrement();
                ret.Add(new Term(field, tokenNext), position);
            }
            while (filter.IncrementToken());

            return ret;
        }
 public virtual void TestIncreasingSloppiness()
 {
     Term t1 = RandomTerm();
     Term t2 = RandomTerm();
     PhraseQuery q1 = new PhraseQuery();
     q1.Add(t1);
     q1.Add(t2);
     PhraseQuery q2 = new PhraseQuery();
     q2.Add(t1);
     q2.Add(t2);
     for (int i = 0; i < 10; i++)
     {
         q1.Slop = i;
         q2.Slop = i + 1;
         AssertSubsetOf(q1, q2);
     }
 }
        public void ProcessRequest(HttpContext context)
        {
            context.Response.ContentType = "text/plain";
            string searchKey = context.Request["wd"];

            string indexPath = context.Server.MapPath("../IndexData");
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();
            //把用户输入的关键字进行分词
            foreach (string word in Picture.Utility.SplitContent.SplitWords(searchKey))
            {
                query.Add(new Term("tag", word));
            }
            //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系
            query.SetSlop(100); //指定关键词相隔最大距离

            //TopScoreDocCollector盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果
            ScoreDoc[] docs = collector.TopDocs(0,10).scoreDocs;

            //展示数据实体对象集合
            var tagModels = new List<Picture.Model.TagModel>();
            for (int i = 0; i < docs.Length; i++)
            {
                int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id)
                Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document

                Picture.Model.TagModel tag = new Picture.Model.TagModel();
                //picture.ImgSummary = doc.Get("summary");
                tag.TagName= Picture.Utility.SplitContent.HightLight(searchKey, doc.Get("tag"));
                //book.ContentDescription = doc.Get("content");//未使用高亮
                //搜索关键字高亮显示 使用盘古提供高亮插件
                //book.ContentDescription = Picture.Utility.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content"));
                tag.TId = Convert.ToInt32(doc.Get("id"));
                tagModels.Add(tag);
            }

            SearchPreviewResult result = new SearchPreviewResult()
            {
                q=searchKey,
                p=false
            };

            foreach (var item in tagModels)
            {
                result.s.Add(item.TagName);
            }

            System.Web.Script.Serialization.JavaScriptSerializer jss = new System.Web.Script.Serialization.JavaScriptSerializer();

            context.Response.Write(jss.Serialize(result));
        }
        protected void AddExactFieldValueClause(Index index, BooleanQuery query, string fieldName, string fieldValue)
        {
            //if (String.IsNullOrEmpty(fieldValue)) return;

            fieldValue = IdHelper.ProcessGUIDs(fieldValue);

            var phraseQuery = new PhraseQuery();
            phraseQuery.Add(new Term(fieldName.ToLowerInvariant(), fieldValue));

            query.Add(phraseQuery, BooleanClause.Occur.MUST);
        }
        /// <summary>
        /// 搜索
        /// </summary>
        protected void SearchContent(string kw)
        {
            string indexPath = @"D:\lucenedir";
            kw = kw.ToLower();//默认情况下盘古分词区分大小写,需转换成小写进行搜索
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery queryMsg = new PhraseQuery();
            foreach (string word in Common.WebCommon.PanGuSplit(kw))//先用空格,让用户去分词,空格分隔的就是词“计算机   专业”
            {
                queryMsg.Add(new Term("msg", word));//根据文章内容进行搜索
            }
            //query.Add(new Term("body","语言"));--可以添加查询条件,两者是add关系.顺序没有关系.
            //query.Add(new Term("body", "大学生"));
            queryMsg.SetSlop(100);//多个查询条件的词之间的最大距离.在文章中相隔太远 也就无意义.(例如 “大学生”这个查询条件和"简历"这个查询条件之间如果间隔的词太多也就没有意义了。)

            PhraseQuery queryTitle = new PhraseQuery();
            foreach (string word in Common.WebCommon.PanGuSplit(kw))
            {
                queryTitle.Add(new Term("title", word));
            }
            queryTitle.SetSlop(100);
            BooleanQuery query = new BooleanQuery();
            query.Add(queryMsg, BooleanClause.Occur.SHOULD);
            query.Add(queryTitle, BooleanClause.Occur.SHOULD);

            //TopScoreDocCollector是盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;//得到所有查询结果中的文档,GetTotalHits():表示总条数   TopDocs(300, 20);//表示得到 300(从300开始)到320(结束)的文档内容.可以用来实现分页功能
            List<SearchResult> list = new List<SearchResult>();
            for (int i = 0; i < docs.Length; i++)
            {
                //搜索ScoreDoc[]只能获得文档的id,这样不会把查询结果的Document一次性加载到内存中。降低了内存压力,需要获得文档的详细内容的时候通过searcher.Doc来根据文档id来获得文档的详细内容对象Document.
                int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id)
                Document doc = searcher.Doc(docId);//找到文档id对应的文档详细信息
                SearchResult result = new SearchResult();
                result.ContentDescription = WebCommon.Highlight(kw,WebCommon.CutString(doc.Get("msg"),150));//分词高亮显示
                result.Title = doc.Get("title");
                result.Id = Convert.ToInt32(doc.Get("id"));
                result.PublishDate = Convert.ToDateTime(doc.Get("PublishDate"));
                result.ISBN = doc.Get("ISBN");
                result.Author = doc.Get("Author");
                result.UnitPrice = Convert.ToDecimal(doc.Get("UnitPrice"));

                list.Add(result);
            }
            this.BookListRepeater.DataSource = list;
            this.BookListRepeater.DataBind();

            AddKeyWord(kw);
        }
        public string GetSearchQuery(params ISearchableMetadataValue[] searchableMetadataValues)
        {
            var query = new PhraseQuery();

            foreach (var searchableMetadataValue in searchableMetadataValues)
            {
                query.Add(new Term(searchableMetadataValue.Metadata.SearchName, searchableMetadataValue.Value));
            }

            var filter = query.ToString();

            return GetSearchQuery(filter, new SearchableMetadata[] { });
        }
        public static Query BuildExactFieldValueClause(Index index, string fieldName, string fieldValue)
        {
            Assert.ArgumentNotNull(index, "Index");

            if (string.IsNullOrEmpty(fieldName) || string.IsNullOrEmpty(fieldValue))
            {
                return null;
            }

            fieldValue = IdHelper.ProcessGUIDs(fieldValue);

            var phraseQuery = new PhraseQuery();
            phraseQuery.Add(new Term(fieldName.ToLowerInvariant(), fieldValue.ToLowerInvariant()));

            return phraseQuery;
        }
 protected bool PhaseQueryHasHits(string[] phrases, int i)
 {
     using (var dir = FSDirectory.Open(TestEnvironment.TestIndexDirectory))
     using (var indexSearcher = new IndexSearcher(dir))
     {
         var phraseQuery = new PhraseQuery
         {
             Slop = 0
         };
         foreach (var phrase in phrases)
         {
             phraseQuery.Add(new Term("field", phrase));
         }
         // Search, without subcategories
         var topDocs = indexSearcher.Search(phraseQuery, 10);
         return topDocs.TotalHits > 0;
     }
 }
Example #13
0
        /// <summary>
        /// 从索引库中检索关键字
        /// </summary>
        private void SearchFromIndexData() {
            string indexPath = Context.Server.MapPath("~/IndexData");
            FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NoLockFactory());
            IndexReader reader = IndexReader.Open(directory, true);
            IndexSearcher searcher = new IndexSearcher(reader);
            //搜索条件
            PhraseQuery query = new PhraseQuery();
            //把用户输入的关键字进行分词
            foreach(string word in Common.SplitContent.SplitWords(Request.QueryString["SearchKey"])) {
                query.Add(new Term("content", word));
            }
            //query.Add(new Term("content", "C#"));//多个查询条件时 为且的关系
            query.SetSlop(100); //指定关键词相隔最大距离

            //TopScoreDocCollector盛放查询结果的容器
            TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
            searcher.Search(query, null, collector);//根据query查询条件进行查询,查询结果放入collector容器
            //TopDocs 指定0到GetTotalHits() 即所有查询结果中的文档 如果TopDocs(20,10)则意味着获取第20-30之间文档内容 达到分页的效果
            ScoreDoc[] docs = collector.TopDocs(0, collector.GetTotalHits()).scoreDocs;

            //展示数据实体对象集合
            List<PZYM.Shop.Model.Books> bookResult = new List<PZYM.Shop.Model.Books>();
            for(int i = 0; i < docs.Length; i++) {
                int docId = docs[i].doc;//得到查询结果文档的id(Lucene内部分配的id)
                Document doc = searcher.Doc(docId);//根据文档id来获得文档对象Document


                PZYM.Shop.Model.Books book = new PZYM.Shop.Model.Books();
                book.Title = doc.Get("title");
                //book.ContentDescription = doc.Get("content");//未使用高亮
                //搜索关键字高亮显示 使用盘古提供高亮插件
                book.ContentDescription = Common.SplitContent.HightLight(Request.QueryString["SearchKey"], doc.Get("content"));
                book.Id = Convert.ToInt32(doc.Get("id"));
                bookResult.Add(book);
            }
            Repeater1.DataSource = bookResult;
            Repeater1.DataBind();
        }
Example #14
0
        /// <summary>
        /// Adds the content sub query.
        /// </summary>
        /// <param name="query">The boolean query.</param>
        /// <param name="key">The field key.</param>
        /// <param name="value">The field value.</param>
        /// <param name="matchVariant">The match variant.</param>
        /// <param name="condition">The condition.</param>
        /// <param name="isFirst">if set to <c>true</c> [is first].</param>
        private void AddContentSubQuery(LuceneSearch.BooleanQuery query, string key, string value, MatchVariant matchVariant, QueryCondition condition)
        {
            if (matchVariant == MatchVariant.NotEquals)
            {
                query.Add(new LuceneSearch.TermQuery(new Term(key, value)), LuceneSearch.Occur.MUST_NOT);
                return;
            }

            LuceneSearch.Occur occurrence = this.GetOccur(condition);

            LuceneSearch.TermRangeQuery rangeQuery = this.GetRangeQuery(key, value, matchVariant);
            if (rangeQuery != null)
            {
                query.Add(rangeQuery, occurrence);
                return;
            }

            string[] keywords = value.Split(' ');
            if (keywords.Length > 1)
            {
                LuceneSearch.PhraseQuery phraseQuery = new Lucene.Net.Search.PhraseQuery();

                foreach (string keyword in keywords)
                {
                    phraseQuery.Add(new Term(key, keyword));
                }

                query.Add(phraseQuery, occurrence);
            }
            else if (matchVariant == MatchVariant.Like)
            {
                query.Add(new LuceneSearch.WildcardQuery(new Term(key, value + "*")), occurrence);
            }
            else
            {
                query.Add(new LuceneSearch.TermQuery(new Term(key, value)), occurrence);
            }
        }
        public void AutomaticallyClosesDanglingQuotes()
        {
            // arrange
            var queryText = "title:\"dot NET version:1.2.3";
            var phraseQuery = new PhraseQuery();
            phraseQuery.Add(new Term("Title", "dot"));
            phraseQuery.Add(new Term("Title", "net"));
            phraseQuery.Add(new Term("Title", "version"));
            phraseQuery.Add(new Term("Title", "1"));
            phraseQuery.Add(new Term("Title", "2"));
            phraseQuery.Add(new Term("Title", "3"));

            var expected = new BooleanQuery
            {
                new BooleanClause(new BooleanQuery { new BooleanClause(new BooleanQuery { new BooleanClause(phraseQuery, Occur.SHOULD) }, Occur.SHOULD) }, Occur.MUST)
            };

            // act
            var actual = NuGetQuery.MakeQuery(queryText);

            // assert
            Assert.Equal(expected, actual);
        }
        public virtual void TestPhraseQuery()
        {
            PhraseQuery q = new PhraseQuery();
            q.Add(new Term("f", "b"));
            q.Add(new Term("f", "c"));
            CountingCollector c = new CountingCollector(TopScoreDocCollector.Create(10, true));
            s.Search(q, null, c);
            int maxDocs = s.IndexReader.MaxDoc;
            Assert.AreEqual(maxDocs, c.DocCounts.Count);
            for (int i = 0; i < maxDocs; i++)
            {
                IDictionary<Query, float?> doc0 = c.DocCounts[i];
                Assert.AreEqual(1, doc0.Count);
                Assert.AreEqual(2.0F, doc0[q], FLOAT_TOLERANCE);

                IDictionary<Query, float?> doc1 = c.DocCounts[++i];
                Assert.AreEqual(1, doc1.Count);
                Assert.AreEqual(1.0F, doc1[q], FLOAT_TOLERANCE);
            }
        }
Example #17
0
		public virtual void  TestPalyndrome3()
		{
			
			// search on non palyndrome, find phrase with no slop, using exact phrase scorer
			query.Slop = 0; // to use exact phrase scorer
			query.Add(new Term("field", "one"));
			query.Add(new Term("field", "two"));
			query.Add(new Term("field", "three"));
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "phrase found with exact phrase scorer");
			float score0 = hits[0].Score;
			//System.out.println("(exact) field: one two three: "+score0);
			QueryUtils.Check(query, searcher);
			
			// search on non palyndrome, find phrase with slop 3, though no slop required here.
			query.Slop = 4; // to use sloppy scorer 
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "just sloppy enough");
			float score1 = hits[0].Score;
			//System.out.println("(sloppy) field: one two three: "+score1);
			Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "exact scorer and sloppy scorer score the same when slop does not matter");
			QueryUtils.Check(query, searcher);
			
			// search ordered in palyndrome, find it twice
			query = new PhraseQuery();
			query.Slop = 4; // must be at least four for both ordered and reversed to match
			query.Add(new Term("palindrome", "one"));
			query.Add(new Term("palindrome", "two"));
			query.Add(new Term("palindrome", "three"));
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "just sloppy enough");
			float score2 = hits[0].Score;
			//System.out.println("palindrome: one two three: "+score2);
			QueryUtils.Check(query, searcher);
			
			//commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). 
            //Assert.IsTrue(score1+SCORE_COMP_THRESH<score2,"ordered scores higher in palindrome");
			
			// search reveresed in palyndrome, find it twice
			query = new PhraseQuery();
			query.Slop = 4; // must be at least four for both ordered and reversed to match
			query.Add(new Term("palindrome", "three"));
			query.Add(new Term("palindrome", "two"));
			query.Add(new Term("palindrome", "one"));
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "just sloppy enough");
			float score3 = hits[0].Score;
			//System.out.println("palindrome: three two one: "+score3);
			QueryUtils.Check(query, searcher);
			
			//commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq(). 
            //Assert.IsTrue(score1+SCORE_COMP_THRESH<score3,"reversed scores higher in palindrome");
            //Assert.AreEqual(score2, score3, SCORE_COMP_THRESH, "ordered or reversed does not matter");
		}
Example #18
0
		public virtual void  TestSlopScoring()
		{
			Directory directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			
			Document doc = new Document();
			doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			Document doc2 = new Document();
			doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc2);
			
			Document doc3 = new Document();
			doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc3);
			
			writer.Optimize();
			writer.Close();

		    Searcher searcher = new IndexSearcher(directory, true);
			PhraseQuery query = new PhraseQuery();
			query.Add(new Term("field", "firstname"));
			query.Add(new Term("field", "lastname"));
			query.Slop = System.Int32.MaxValue;
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(3, hits.Length);
			// Make sure that those matches where the terms appear closer to
			// each other get a higher score:
			Assert.AreEqual(0.71, hits[0].Score, 0.01);
			Assert.AreEqual(0, hits[0].Doc);
			Assert.AreEqual(0.44, hits[1].Score, 0.01);
			Assert.AreEqual(1, hits[1].Doc);
			Assert.AreEqual(0.31, hits[2].Score, 0.01);
			Assert.AreEqual(2, hits[2].Doc);
			QueryUtils.Check(query, searcher);
		}
Example #19
0
		public virtual void  TestPhraseQueryWithStopAnalyzer()
		{
			RAMDirectory directory = new RAMDirectory();
			StopAnalyzer stopAnalyzer = new StopAnalyzer(Util.Version.LUCENE_24);
			IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			Document doc = new Document();
			doc.Add(new Field("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			writer.Close();

		    IndexSearcher searcher = new IndexSearcher(directory, true);
			
			// valid exact phrase query
			PhraseQuery query = new PhraseQuery();
			query.Add(new Term("field", "stop"));
			query.Add(new Term("field", "words"));
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			QueryUtils.Check(query, searcher);
			
			
			// StopAnalyzer as of 2.4 does not leave "holes", so this matches.
			query = new PhraseQuery();
			query.Add(new Term("field", "words"));
			query.Add(new Term("field", "here"));
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			QueryUtils.Check(query, searcher);
			
			
			searcher.Close();
		}
Example #20
0
        public virtual void TestBasic()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(NewStringField("id", "0", Field.Store.YES));
            doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO));
            w.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            // 1 extra token, but wizard and oz are close;
            doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO));
            w.AddDocument(doc);
            IndexReader r = w.Reader;

            w.Dispose();

            // Do ordinary BooleanQuery:
            BooleanQuery bq = new BooleanQuery();

            bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
            bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
            IndexSearcher searcher = GetSearcher(r);

            searcher.Similarity = new DefaultSimilarity();

            TopDocs hits = searcher.Search(bq, 10);

            Assert.AreEqual(2, hits.TotalHits);
            Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id"));

            // Now, resort using PhraseQuery:
            PhraseQuery pq = new PhraseQuery();

            pq.Slop = 5;
            pq.Add(new Term("field", "wizard"));
            pq.Add(new Term("field", "oz"));

            TopDocs hits2 = QueryRescorer.Rescore(searcher, hits, pq, 2.0, 10);

            // Resorting changed the order:
            Assert.AreEqual(2, hits2.TotalHits);
            Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id"));

            // Resort using SpanNearQuery:
            SpanTermQuery t1  = new SpanTermQuery(new Term("field", "wizard"));
            SpanTermQuery t2  = new SpanTermQuery(new Term("field", "oz"));
            SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true);

            TopDocs hits3 = QueryRescorer.Rescore(searcher, hits, snq, 2.0, 10);

            // Resorting changed the order:
            Assert.AreEqual(2, hits3.TotalHits);
            Assert.AreEqual("1", searcher.Doc(hits3.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("0", searcher.Doc(hits3.ScoreDocs[1].Doc).Get("id"));

            r.Dispose();
            dir.Dispose();
        }
		public virtual void  TestSlopScoring()
		{
			Directory directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
			
			Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			
			Lucene.Net.Documents.Document doc2 = new Lucene.Net.Documents.Document();
			doc2.Add(new Field("field", "foo firstname xxx lastname foo", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc2);
			
			Lucene.Net.Documents.Document doc3 = new Lucene.Net.Documents.Document();
			doc3.Add(new Field("field", "foo firstname xxx yyy lastname foo", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc3);
			
			writer.Optimize();
			writer.Close();
			
			Searcher searcher = new IndexSearcher(directory);
			PhraseQuery query = new PhraseQuery();
			query.Add(new Term("field", "firstname"));
			query.Add(new Term("field", "lastname"));
			query.SetSlop(System.Int32.MaxValue);
			Hits hits = searcher.Search(query);
			Assert.AreEqual(3, hits.Length());
			// Make sure that those matches where the terms appear closer to
			// each other get a higher score:
			Assert.AreEqual(0.71, hits.Score(0), 0.01);
			Assert.AreEqual(0, hits.Id(0));
			Assert.AreEqual(0.44, hits.Score(1), 0.01);
			Assert.AreEqual(1, hits.Id(1));
			Assert.AreEqual(0.31, hits.Score(2), 0.01);
			Assert.AreEqual(2, hits.Id(2));
			QueryUtils.Check(query, searcher);
		}
Example #22
0
        public virtual void TestNonExistingPhrase()
        {
            // phrase without repetitions that exists in 2 docs
            query.Add(new Term("nonexist", "phrase"));
            query.Add(new Term("nonexist", "notexist"));
            query.Add(new Term("nonexist", "found"));
            query.Slop = 2; // would be found this way

            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length, "phrase without repetitions exists in 2 docs");
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, query, searcher);

            // phrase with repetitions that exists in 2 docs
            query = new PhraseQuery();
            query.Add(new Term("nonexist", "phrase"));
            query.Add(new Term("nonexist", "exist"));
            query.Add(new Term("nonexist", "exist"));
            query.Slop = 1; // would be found

            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length, "phrase with repetitions exists in two docs");
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, query, searcher);

            // phrase I with repetitions that does not exist in any doc
            query = new PhraseQuery();
            query.Add(new Term("nonexist", "phrase"));
            query.Add(new Term("nonexist", "notexist"));
            query.Add(new Term("nonexist", "phrase"));
            query.Slop = 1000; // would not be found no matter how high the slop is

            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc");
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, query, searcher);

            // phrase II with repetitions that does not exist in any doc
            query = new PhraseQuery();
            query.Add(new Term("nonexist", "phrase"));
            query.Add(new Term("nonexist", "exist"));
            query.Add(new Term("nonexist", "exist"));
            query.Add(new Term("nonexist", "exist"));
            query.Slop = 1000; // would not be found no matter how high the slop is

            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc");
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, query, searcher);
        }
Example #23
0
        public virtual void TestNullOrSubScorer()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document doc = new Document();

            doc.Add(NewTextField("field", "a b c d", Field.Store.NO));
            w.AddDocument(doc);

            IndexReader   r = w.GetReader();
            IndexSearcher s = NewSearcher(r);

            // this test relies upon coord being the default implementation,
            // otherwise scores are different!
            s.Similarity = new DefaultSimilarity();

            BooleanQuery q = new BooleanQuery();

            q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD);

            // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor
            float score    = s.Search(q, 10).MaxScore;
            Query subQuery = new TermQuery(new Term("field", "not_in_index"));

            subQuery.Boost = 0;
            q.Add(subQuery, Occur.SHOULD);
            float score2 = s.Search(q, 10).MaxScore;

            Assert.AreEqual(score * .5F, score2, 1e-6);

            // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor
            BooleanQuery qq     = (BooleanQuery)q.Clone();
            PhraseQuery  phrase = new PhraseQuery();

            phrase.Add(new Term("field", "not_in_index"));
            phrase.Add(new Term("field", "another_not_in_index"));
            phrase.Boost = 0;
            qq.Add(phrase, Occur.SHOULD);
            score2 = s.Search(qq, 10).MaxScore;
            Assert.AreEqual(score * (1 / 3F), score2, 1e-6);

            // now test BooleanScorer2
            subQuery       = new TermQuery(new Term("field", "b"));
            subQuery.Boost = 0;
            q.Add(subQuery, Occur.MUST);
            score2 = s.Search(q, 10).MaxScore;
            Assert.AreEqual(score * (2 / 3F), score2, 1e-6);

            // PhraseQuery w/ no terms added returns a null scorer
            PhraseQuery pq = new PhraseQuery();

            q.Add(pq, Occur.SHOULD);
            Assert.AreEqual(1, s.Search(q, 10).TotalHits);

            // A required clause which returns null scorer should return null scorer to
            // IndexSearcher.
            q  = new BooleanQuery();
            pq = new PhraseQuery();
            q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD);
            q.Add(pq, Occur.MUST);
            Assert.AreEqual(0, s.Search(q, 10).TotalHits);

            DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f);

            dmq.Add(new TermQuery(new Term("field", "a")));
            dmq.Add(pq);
            Assert.AreEqual(1, s.Search(dmq, 10).TotalHits);

            r.Dispose();
            w.Dispose();
            dir.Dispose();
        }
Example #24
0
        public virtual void TestRandomPhrases()
        {
            Directory dir      = NewDirectory();
            Analyzer  analyzer = new MockAnalyzer(Random());

            RandomIndexWriter       w    = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMergePolicy(NewLogMergePolicy()));
            IList <IList <string> > docs = new List <IList <string> >();

            Documents.Document d = new Documents.Document();
            Field f = NewTextField("f", "", Field.Store.NO);

            d.Add(f);

            Random r = Random();

            int NUM_DOCS = AtLeast(10);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                // must be > 4096 so it spans multiple chunks
                int termCount = TestUtil.NextInt(Random(), 4097, 8200);

                IList <string> doc = new List <string>();

                StringBuilder sb = new StringBuilder();
                while (doc.Count < termCount)
                {
                    if (r.Next(5) == 1 || docs.Count == 0)
                    {
                        // make new non-empty-string term
                        string term;
                        while (true)
                        {
                            term = TestUtil.RandomUnicodeString(r);
                            if (term.Length > 0)
                            {
                                break;
                            }
                        }
                        IOException priorException = null;
                        TokenStream ts             = analyzer.TokenStream("ignore", new StringReader(term));
                        try
                        {
                            ICharTermAttribute termAttr = ts.AddAttribute <ICharTermAttribute>();
                            ts.Reset();
                            while (ts.IncrementToken())
                            {
                                string text = termAttr.ToString();
                                doc.Add(text);
                                sb.Append(text).Append(' ');
                            }
                            ts.End();
                        }
                        catch (IOException e)
                        {
                            priorException = e;
                        }
                        finally
                        {
                            IOUtils.CloseWhileHandlingException(priorException, ts);
                        }
                    }
                    else
                    {
                        // pick existing sub-phrase
                        IList <string> lastDoc = docs[r.Next(docs.Count)];
                        int            len     = TestUtil.NextInt(r, 1, 10);
                        int            start   = r.Next(lastDoc.Count - len);
                        for (int k = start; k < start + len; k++)
                        {
                            string t = lastDoc[k];
                            doc.Add(t);
                            sb.Append(t).Append(' ');
                        }
                    }
                }
                docs.Add(doc);
                f.StringValue = sb.ToString();
                w.AddDocument(d);
            }

            IndexReader   reader = w.Reader;
            IndexSearcher s      = NewSearcher(reader);

            w.Dispose();

            // now search
            int num = AtLeast(10);

            for (int i = 0; i < num; i++)
            {
                int            docID = r.Next(docs.Count);
                IList <string> doc   = docs[docID];

                int           numTerm = TestUtil.NextInt(r, 2, 20);
                int           start   = r.Next(doc.Count - numTerm);
                PhraseQuery   pq      = new PhraseQuery();
                StringBuilder sb      = new StringBuilder();
                for (int t = start; t < start + numTerm; t++)
                {
                    pq.Add(new Term("f", doc[t]));
                    sb.Append(doc[t]).Append(' ');
                }

                TopDocs hits  = s.Search(pq, NUM_DOCS);
                bool    found = false;
                for (int j = 0; j < hits.ScoreDocs.Length; j++)
                {
                    if (hits.ScoreDocs[j].Doc == docID)
                    {
                        found = true;
                        break;
                    }
                }

                Assert.IsTrue(found, "phrase '" + sb + "' not found; start=" + start);
            }

            reader.Dispose();
            dir.Dispose();
        }
Example #25
0
        public virtual void TestPalyndrome3()
        {
            // search on non palyndrome, find phrase with no slop, using exact phrase scorer
            Query.Slop = 0; // to use exact phrase scorer
            Query.Add(new Term("field", "one"));
            Query.Add(new Term("field", "two"));
            Query.Add(new Term("field", "three"));
            ScoreDoc[] hits = Searcher.Search(Query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length, "phrase found with exact phrase scorer");
            float score0 = hits[0].Score;

            //System.out.println("(exact) field: one two three: "+score0);
            QueryUtils.Check(Random(), Query, Searcher, Similarity);

            // just make sure no exc:
            Searcher.Explain(Query, 0);

            // search on non palyndrome, find phrase with slop 3, though no slop required here.
            Query.Slop = 4; // to use sloppy scorer
            hits       = Searcher.Search(Query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length, "just sloppy enough");
            float score1 = hits[0].Score;

            //System.out.println("(sloppy) field: one two three: "+score1);
            Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "exact scorer and sloppy scorer score the same when slop does not matter");
            QueryUtils.Check(Random(), Query, Searcher, Similarity);

            // search ordered in palyndrome, find it twice
            Query      = new PhraseQuery();
            Query.Slop = 4; // must be at least four for both ordered and reversed to match
            Query.Add(new Term("palindrome", "one"));
            Query.Add(new Term("palindrome", "two"));
            Query.Add(new Term("palindrome", "three"));
            hits = Searcher.Search(Query, null, 1000).ScoreDocs;

            // just make sure no exc:
            Searcher.Explain(Query, 0);

            Assert.AreEqual(1, hits.Length, "just sloppy enough");
            //float score2 = hits[0].Score;
            //System.out.println("palindrome: one two three: "+score2);
            QueryUtils.Check(Random(), Query, Searcher, Similarity);

            //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
            //Assert.IsTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);

            // search reveresed in palyndrome, find it twice
            Query      = new PhraseQuery();
            Query.Slop = 4; // must be at least four for both ordered and reversed to match
            Query.Add(new Term("palindrome", "three"));
            Query.Add(new Term("palindrome", "two"));
            Query.Add(new Term("palindrome", "one"));
            hits = Searcher.Search(Query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length, "just sloppy enough");
            //float score3 = hits[0].Score;
            //System.out.println("palindrome: three two one: "+score3);
            QueryUtils.Check(Random(), Query, Searcher, Similarity);

            //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
            //Assert.IsTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
            //Assert.AreEqual("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
        }
Example #26
0
        public virtual void TestPhraseQueryInConjunctionScorer()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(Random(), directory, Similarity, TimeZone);

            Documents.Document doc = new Documents.Document();
            doc.Add(NewTextField("source", "marketing info", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("contents", "foobar", Field.Store.YES));
            doc.Add(NewTextField("source", "marketing info", Field.Store.YES));
            writer.AddDocument(doc);

            IndexReader reader = writer.Reader;

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            PhraseQuery phraseQuery = new PhraseQuery();

            phraseQuery.Add(new Term("source", "marketing"));
            phraseQuery.Add(new Term("source", "info"));
            ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            QueryUtils.Check(Random(), phraseQuery, searcher, Similarity);

            TermQuery    termQuery    = new TermQuery(new Term("contents", "foobar"));
            BooleanQuery booleanQuery = new BooleanQuery();

            booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
            booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);
            QueryUtils.Check(Random(), termQuery, searcher, Similarity);

            reader.Dispose();

            writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE));
            doc    = new Documents.Document();
            doc.Add(NewTextField("contents", "map entry woo", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("contents", "woo map entry", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("contents", "map foobarword entry woo", Field.Store.YES));
            writer.AddDocument(doc);

            reader = writer.Reader;
            writer.Dispose();

            searcher = NewSearcher(reader);

            termQuery   = new TermQuery(new Term("contents", "woo"));
            phraseQuery = new PhraseQuery();
            phraseQuery.Add(new Term("contents", "map"));
            phraseQuery.Add(new Term("contents", "entry"));

            hits = searcher.Search(termQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(3, hits.Length);
            hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);

            booleanQuery = new BooleanQuery();
            booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
            booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);

            booleanQuery = new BooleanQuery();
            booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
            booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
            Assert.AreEqual(2, hits.Length);
            QueryUtils.Check(Random(), booleanQuery, searcher, Similarity);

            reader.Dispose();
            directory.Dispose();
        }
Example #27
0
 public virtual void TestNotCloseEnough()
 {
     Query.Slop = 2;
     Query.Add(new Term("field", "one"));
     Query.Add(new Term("field", "five"));
     ScoreDoc[] hits = Searcher.Search(Query, null, 1000).ScoreDocs;
     Assert.AreEqual(0, hits.Length);
     QueryUtils.Check(Random(), Query, Searcher, Similarity);
 }
Example #28
0
        // Random rnd is passed in so that the exact same random query may be created
        // more than once.
        public static BooleanQuery RandBoolQuery(Random rnd, bool allowMust, int level, string field, string[] vals, Callback cb)
        {
            BooleanQuery current = new BooleanQuery(rnd.Next() < 0);

            for (int i = 0; i < rnd.Next(vals.Length) + 1; i++)
            {
                int qType = 0; // term query
                if (level > 0)
                {
                    qType = rnd.Next(10);
                }
                Query q;
                if (qType < 3)
                {
                    q = new TermQuery(new Term(field, vals[rnd.Next(vals.Length)]));
                }
                else if (qType < 4)
                {
                    Term        t1 = new Term(field, vals[rnd.Next(vals.Length)]);
                    Term        t2 = new Term(field, vals[rnd.Next(vals.Length)]);
                    PhraseQuery pq = new PhraseQuery();
                    pq.Add(t1);
                    pq.Add(t2);
                    pq.Slop = 10; // increase possibility of matching
                    q       = pq;
                }
                else if (qType < 7)
                {
                    q = new WildcardQuery(new Term(field, "w*"));
                }
                else
                {
                    q = RandBoolQuery(rnd, allowMust, level - 1, field, vals, cb);
                }

                int r = rnd.Next(10);
                BooleanClause.Occur occur;
                if (r < 2)
                {
                    occur = BooleanClause.Occur.MUST_NOT;
                }
                else if (r < 5)
                {
                    if (allowMust)
                    {
                        occur = BooleanClause.Occur.MUST;
                    }
                    else
                    {
                        occur = BooleanClause.Occur.SHOULD;
                    }
                }
                else
                {
                    occur = BooleanClause.Occur.SHOULD;
                }

                current.Add(q, occur);
            }
            if (cb != null)
            {
                cb.PostCreate(current);
            }
            return(current);
        }
Example #29
0
        public virtual void  TestPhraseQueryInConjunctionScorer()
        {
            RAMDirectory directory = new RAMDirectory();
            IndexWriter  writer    = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);

            Document doc = new Document();

            doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED));
            doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Optimize();
            writer.Close();

            IndexSearcher searcher = new IndexSearcher(directory);

            PhraseQuery phraseQuery = new PhraseQuery();

            phraseQuery.Add(new Term("source", "marketing"));
            phraseQuery.Add(new Term("source", "info"));
            ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).scoreDocs;
            Assert.AreEqual(2, hits.Length);
            QueryUtils.Check(phraseQuery, searcher);


            TermQuery    termQuery    = new TermQuery(new Term("contents", "foobar"));
            BooleanQuery booleanQuery = new BooleanQuery();

            booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
            booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).scoreDocs;
            Assert.AreEqual(1, hits.Length);
            QueryUtils.Check(termQuery, searcher);


            searcher.Close();

            writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            doc    = new Document();
            doc.Add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new Field("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);

            writer.Optimize();
            writer.Close();

            searcher = new IndexSearcher(directory);

            termQuery   = new TermQuery(new Term("contents", "woo"));
            phraseQuery = new PhraseQuery();
            phraseQuery.Add(new Term("contents", "map"));
            phraseQuery.Add(new Term("contents", "entry"));

            hits = searcher.Search(termQuery, null, 1000).scoreDocs;
            Assert.AreEqual(3, hits.Length);
            hits = searcher.Search(phraseQuery, null, 1000).scoreDocs;
            Assert.AreEqual(2, hits.Length);


            booleanQuery = new BooleanQuery();
            booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
            booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).scoreDocs;
            Assert.AreEqual(2, hits.Length);

            booleanQuery = new BooleanQuery();
            booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
            booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
            hits = searcher.Search(booleanQuery, null, 1000).scoreDocs;
            Assert.AreEqual(2, hits.Length);
            QueryUtils.Check(booleanQuery, searcher);


            searcher.Close();
            directory.Close();
        }
Example #30
0
		public virtual void  TestExact()
		{
			// slop is zero by default
			query.Add(new Term("field", "four"));
			query.Add(new Term("field", "five"));
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "exact match");
			QueryUtils.Check(query, searcher);
			
			
			query = new PhraseQuery();
			query.Add(new Term("field", "two"));
			query.Add(new Term("field", "one"));
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length, "reverse not exact");
			QueryUtils.Check(query, searcher);
		}
		public virtual void  TestPhraseQueryInConjunctionScorer()
		{
			RAMDirectory directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
			
			Lucene.Net.Documents.Document doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			
			doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("contents", "foobar", Field.Store.YES, Field.Index.TOKENIZED));
			doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			
			writer.Optimize();
			writer.Close();
			
			IndexSearcher searcher = new IndexSearcher(directory);
			
			PhraseQuery phraseQuery = new PhraseQuery();
			phraseQuery.Add(new Term("source", "marketing"));
			phraseQuery.Add(new Term("source", "info"));
			Hits hits = searcher.Search(phraseQuery);
			Assert.AreEqual(2, hits.Length());
			QueryUtils.Check(phraseQuery, searcher);

			
			TermQuery termQuery = new TermQuery(new Term("contents", "foobar"));
			BooleanQuery booleanQuery = new BooleanQuery();
			booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
			booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
			hits = searcher.Search(booleanQuery);
			Assert.AreEqual(1, hits.Length());
			QueryUtils.Check(termQuery, searcher);

			
			searcher.Close();
			
			writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
			doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			
			doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("contents", "woo map entry", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			
			doc = new Lucene.Net.Documents.Document();
			doc.Add(new Field("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.TOKENIZED));
			writer.AddDocument(doc);
			
			writer.Optimize();
			writer.Close();
			
			searcher = new IndexSearcher(directory);
			
			termQuery = new TermQuery(new Term("contents", "woo"));
			phraseQuery = new PhraseQuery();
			phraseQuery.Add(new Term("contents", "map"));
			phraseQuery.Add(new Term("contents", "entry"));
			
			hits = searcher.Search(termQuery);
			Assert.AreEqual(3, hits.Length());
			hits = searcher.Search(phraseQuery);
			Assert.AreEqual(2, hits.Length());

			
			booleanQuery = new BooleanQuery();
			booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
			booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
			hits = searcher.Search(booleanQuery);
			Assert.AreEqual(2, hits.Length());
			
			booleanQuery = new BooleanQuery();
			booleanQuery.Add(phraseQuery, BooleanClause.Occur.MUST);
			booleanQuery.Add(termQuery, BooleanClause.Occur.MUST);
			hits = searcher.Search(booleanQuery);
			Assert.AreEqual(2, hits.Length());
			QueryUtils.Check(booleanQuery, searcher);

			
			searcher.Close();
			directory.Close();
		}
Example #32
0
		public virtual void  TestOrderDoesntMatter()
		{
			query.Slop = 2; // must be at least two for reverse order match
			query.Add(new Term("field", "two"));
			query.Add(new Term("field", "one"));
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "just sloppy enough");
			QueryUtils.Check(query, searcher);
			
			
			query = new PhraseQuery();
			query.Slop = 2;
			query.Add(new Term("field", "three"));
			query.Add(new Term("field", "one"));
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length, "not sloppy enough");
			QueryUtils.Check(query, searcher);
		}
Example #33
0
        protected void ApplyLanguageClause(BooleanQuery query, string language, BooleanClause.Occur occurance)
        {
            if (String.IsNullOrEmpty(language)) return;

            var phraseQuery = new PhraseQuery();
            phraseQuery.Add(new Term(BuiltinFields.Language, language.ToLowerInvariant()));

            query.Add(phraseQuery, occurance);
        }
Example #34
0
        public virtual void TestExplain()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();

            doc.Add(NewStringField("id", "0", Field.Store.YES));
            doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO));
            w.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            // 1 extra token, but wizard and oz are close;
            doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO));
            w.AddDocument(doc);
            IndexReader r = w.Reader;

            w.Dispose();

            // Do ordinary BooleanQuery:
            BooleanQuery bq = new BooleanQuery();

            bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
            bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
            IndexSearcher searcher = GetSearcher(r);

            TopDocs hits = searcher.Search(bq, 10);

            Assert.AreEqual(2, hits.TotalHits);
            Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id"));

            // Now, resort using PhraseQuery:
            PhraseQuery pq = new PhraseQuery();

            pq.Add(new Term("field", "wizard"));
            pq.Add(new Term("field", "oz"));

            Rescorer rescorer = new QueryRescorerAnonymousInnerClassHelper2(this, pq);

            TopDocs hits2 = rescorer.Rescore(searcher, hits, 10);

            // Resorting changed the order:
            Assert.AreEqual(2, hits2.TotalHits);
            Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id"));

            int         docID   = hits2.ScoreDocs[0].Doc;
            Explanation explain = rescorer.Explain(searcher, searcher.Explain(bq, docID), docID);
            string      s       = explain.ToString();

            Assert.IsTrue(s.Contains("TestQueryRescorer+"));
            Assert.IsTrue(s.Contains("combined first and second pass score"));
            Assert.IsTrue(s.Contains("first pass score"));
            Assert.IsTrue(s.Contains("= second pass score"));
            Assert.AreEqual(hits2.ScoreDocs[0].Score, explain.Value, 0.0f);

            docID   = hits2.ScoreDocs[1].Doc;
            explain = rescorer.Explain(searcher, searcher.Explain(bq, docID), docID);
            s       = explain.ToString();
            Assert.IsTrue(s.Contains("TestQueryRescorer+"));
            Assert.IsTrue(s.Contains("combined first and second pass score"));
            Assert.IsTrue(s.Contains("first pass score"));
            Assert.IsTrue(s.Contains("no second pass score"));
            Assert.IsFalse(s.Contains("= second pass score"));
            Assert.IsTrue(s.Contains("NON-MATCH"));
            Assert.IsTrue(Math.Abs(hits2.ScoreDocs[1].Score - explain.Value) < 0.0000001f);

            r.Dispose();
            dir.Dispose();
        }
Example #35
0
		public virtual void  TestSlop1()
		{
			// Ensures slop of 1 works with terms in order.
			query.Slop = 1;
			query.Add(new Term("field", "one"));
			query.Add(new Term("field", "two"));
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "in order");
			QueryUtils.Check(query, searcher);
			
			
			// Ensures slop of 1 does not work for phrases out of order;
			// must be at least 2.
			query = new PhraseQuery();
			query.Slop = 1;
			query.Add(new Term("field", "two"));
			query.Add(new Term("field", "one"));
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length, "reversed, slop not 2 or more");
			QueryUtils.Check(query, searcher);
		}
Example #36
0
 public virtual void  TestNotCloseEnough()
 {
     query.SetSlop(2);
     query.Add(new Term("field", "one"));
     query.Add(new Term("field", "five"));
     ScoreDoc[] hits = searcher.Search(query, null, 1000).scoreDocs;
     Assert.AreEqual(0, hits.Length);
     QueryUtils.Check(query, searcher);
 }
Example #37
0
		public virtual void  TestMulipleTerms()
		{
			query.Slop = 2;
			query.Add(new Term("field", "one"));
			query.Add(new Term("field", "three"));
			query.Add(new Term("field", "five"));
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "two total moves");
			QueryUtils.Check(query, searcher);
			
			
			query = new PhraseQuery();
			query.Slop = 5; // it takes six moves to match this phrase
			query.Add(new Term("field", "five"));
			query.Add(new Term("field", "three"));
			query.Add(new Term("field", "one"));
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length, "slop of 5 not close enough");
			QueryUtils.Check(query, searcher);
			
			
			query.Slop = 6;
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length, "slop of 6 just right");
			QueryUtils.Check(query, searcher);
		}
Example #38
0
        public virtual void TestNotCloseEnough()
        {
            query.Slop = 2;
            query.Add(new Term("field", "one"));
            query.Add(new Term("field", "five"));
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, query, searcher);
        }
Example #39
0
		public virtual void  TestPhraseQueryInConjunctionScorer()
		{
			RAMDirectory directory = new RAMDirectory();
			IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			
			Document doc = new Document();
			doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			doc = new Document();
			doc.Add(new Field("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED));
			doc.Add(new Field("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			writer.Optimize();
			writer.Close();

		    IndexSearcher searcher = new IndexSearcher(directory, true);
			
			PhraseQuery phraseQuery = new PhraseQuery();
			phraseQuery.Add(new Term("source", "marketing"));
			phraseQuery.Add(new Term("source", "info"));
			ScoreDoc[] hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs;
			Assert.AreEqual(2, hits.Length);
			QueryUtils.Check(phraseQuery, searcher);
			
			
			TermQuery termQuery = new TermQuery(new Term("contents", "foobar"));
			BooleanQuery booleanQuery = new BooleanQuery();
			booleanQuery.Add(termQuery, Occur.MUST);
			booleanQuery.Add(phraseQuery, Occur.MUST);
			hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
			Assert.AreEqual(1, hits.Length);
			QueryUtils.Check(termQuery, searcher);
			
			
			searcher.Close();
			
			writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
			doc = new Document();
			doc.Add(new Field("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			doc = new Document();
			doc.Add(new Field("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			doc = new Document();
			doc.Add(new Field("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED));
			writer.AddDocument(doc);
			
			writer.Optimize();
			writer.Close();

		    searcher = new IndexSearcher(directory, true);
			
			termQuery = new TermQuery(new Term("contents", "woo"));
			phraseQuery = new PhraseQuery();
			phraseQuery.Add(new Term("contents", "map"));
			phraseQuery.Add(new Term("contents", "entry"));
			
			hits = searcher.Search(termQuery, null, 1000).ScoreDocs;
			Assert.AreEqual(3, hits.Length);
			hits = searcher.Search(phraseQuery, null, 1000).ScoreDocs;
			Assert.AreEqual(2, hits.Length);
			
			
			booleanQuery = new BooleanQuery();
			booleanQuery.Add(termQuery, Occur.MUST);
			booleanQuery.Add(phraseQuery, Occur.MUST);
			hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
			Assert.AreEqual(2, hits.Length);
			
			booleanQuery = new BooleanQuery();
			booleanQuery.Add(phraseQuery, Occur.MUST);
			booleanQuery.Add(termQuery, Occur.MUST);
			hits = searcher.Search(booleanQuery, null, 1000).ScoreDocs;
			Assert.AreEqual(2, hits.Length);
			QueryUtils.Check(booleanQuery, searcher);
			
			
			searcher.Close();
			directory.Close();
		}
        public virtual void TestSetPosition()
        {
            Analyzer          analyzer = new AnalyzerAnonymousInnerClassHelper(this);
            Directory         store    = NewDirectory();
            RandomIndexWriter writer   = new RandomIndexWriter(Random(), store, analyzer, Similarity, TimeZone);
            Document          d        = new Document();

            d.Add(NewTextField("field", "bogus", Field.Store.YES));
            writer.AddDocument(d);
            IndexReader reader = writer.Reader;

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1"));

            pos.NextDoc();
            // first token should be at position 0
            Assert.AreEqual(0, pos.NextPosition());

            pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2"));
            pos.NextDoc();
            // second token should be at position 2
            Assert.AreEqual(2, pos.NextPosition());

            PhraseQuery q;

            ScoreDoc[] hits;

            q = new PhraseQuery();
            q.Add(new Term("field", "1"));
            q.Add(new Term("field", "2"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // same as previous, just specify positions explicitely.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 1);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // specifying correct positions should find the phrase.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 2);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "3"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // phrase query would find it when correct positions are specified.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "4"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // phrase query should fail for non existing searched term
            // even if there exist another searched terms in the same searched position.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "9"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // multi-phrase query should succed for non existing searched term
            // because there exist another searched terms in the same searched position.
            MultiPhraseQuery mq = new MultiPhraseQuery();

            mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0);
            hits = searcher.Search(mq, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "4"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            reader.Dispose();
            store.Dispose();
        }
Example #41
0
		public virtual void  TestNonExistingPhrase()
		{
			// phrase without repetitions that exists in 2 docs
			query.Add(new Term("nonexist", "phrase"));
			query.Add(new Term("nonexist", "notexist"));
			query.Add(new Term("nonexist", "found"));
			query.Slop = 2; // would be found this way
			
			ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(2, hits.Length, "phrase without repetitions exists in 2 docs");
			QueryUtils.Check(query, searcher);
			
			// phrase with repetitions that exists in 2 docs
			query = new PhraseQuery();
			query.Add(new Term("nonexist", "phrase"));
			query.Add(new Term("nonexist", "exist"));
			query.Add(new Term("nonexist", "exist"));
			query.Slop = 1; // would be found 
			
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(2, hits.Length, "phrase with repetitions exists in two docs");
			QueryUtils.Check(query, searcher);
			
			// phrase I with repetitions that does not exist in any doc
			query = new PhraseQuery();
			query.Add(new Term("nonexist", "phrase"));
			query.Add(new Term("nonexist", "notexist"));
			query.Add(new Term("nonexist", "phrase"));
			query.Slop = 1000; // would not be found no matter how high the slop is
			
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc");
			QueryUtils.Check(query, searcher);
			
			// phrase II with repetitions that does not exist in any doc
			query = new PhraseQuery();
			query.Add(new Term("nonexist", "phrase"));
			query.Add(new Term("nonexist", "exist"));
			query.Add(new Term("nonexist", "exist"));
			query.Add(new Term("nonexist", "exist"));
			query.Slop = 1000; // would not be found no matter how high the slop is
			
			hits = searcher.Search(query, null, 1000).ScoreDocs;
			Assert.AreEqual(0, hits.Length, "nonexisting phrase with repetitions does not exist in any doc");
			QueryUtils.Check(query, searcher);
		}
Example #42
0
        public virtual void  TestNullOrSubScorer()
        {
            Directory   dir = new MockRAMDirectory();
            IndexWriter w   = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED, null);
            Document    doc = new Document();

            doc.Add(new Field("field", "a b c d", Field.Store.NO, Field.Index.ANALYZED));
            w.AddDocument(doc, null);

            IndexReader   r = w.GetReader(null);
            IndexSearcher s = new IndexSearcher(r);
            BooleanQuery  q = new BooleanQuery();

            q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD);

            // LUCENE-2617: make sure that a term not in the index still contributes to the score via coord factor
            float score    = s.Search(q, 10, null).MaxScore;
            Query subQuery = new TermQuery(new Term("field", "not_in_index"));

            subQuery.Boost = 0;
            q.Add(subQuery, Occur.SHOULD);
            float score2 = s.Search(q, 10, null).MaxScore;

            Assert.AreEqual(score * .5, score2, 1e-6);

            // LUCENE-2617: make sure that a clause not in the index still contributes to the score via coord factor
            BooleanQuery qq     = (BooleanQuery)q.Clone();
            PhraseQuery  phrase = new PhraseQuery();

            phrase.Add(new Term("field", "not_in_index"));
            phrase.Add(new Term("field", "another_not_in_index"));
            phrase.Boost = 0;
            qq.Add(phrase, Occur.SHOULD);
            score2 = s.Search(qq, 10, null).MaxScore;
            Assert.AreEqual(score * (1.0 / 3), score2, 1e-6);

            // now test BooleanScorer2
            subQuery       = new TermQuery(new Term("field", "b"));
            subQuery.Boost = 0;
            q.Add(subQuery, Occur.MUST);
            score2 = s.Search(q, 10, null).MaxScore;
            Assert.AreEqual(score * (2.0 / 3), score2, 1e-6);

            // PhraseQuery w/ no terms added returns a null scorer
            PhraseQuery pq = new PhraseQuery();

            q.Add(pq, Occur.SHOULD);
            Assert.AreEqual(1, s.Search(q, 10, null).TotalHits);

            // A required clause which returns null scorer should return null scorer to
            // IndexSearcher.
            q  = new BooleanQuery();
            pq = new PhraseQuery();
            q.Add(new TermQuery(new Term("field", "a")), Occur.SHOULD);
            q.Add(pq, Occur.MUST);
            Assert.AreEqual(0, s.Search(q, 10, null).TotalHits);

            DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(1.0f);

            dmq.Add(new TermQuery(new Term("field", "a")));
            dmq.Add(pq);
            Assert.AreEqual(1, s.Search(dmq, 10, null).TotalHits);

            r.Close();
            w.Close();
            dir.Close();
        }
        public void TestShingleAnalyzerWrapperPhraseQuery()
        {
            Analyzer analyzer = new ShingleAnalyzerWrapper(new WhitespaceAnalyzer(), 2);
            Searcher = SetUpSearcher(analyzer);

            var q = new PhraseQuery();

            var ts = analyzer.TokenStream("content", new StringReader("this sentence"));
            var j = -1;

            var posIncrAtt = ts.AddAttribute<IPositionIncrementAttribute>();
            var termAtt = ts.AddAttribute<ITermAttribute>();

            while (ts.IncrementToken())
            {
                j += posIncrAtt.PositionIncrement;
                var termText = termAtt.Term;
                q.Add(new Term("content", termText), j);
            }

            var hits = Searcher.Search(q, null, 1000).ScoreDocs;
            var ranks = new[] {0};
            CompareRanks(hits, ranks);
        }
Example #44
0
        public virtual void TestMissingSecondPassScore()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);

            Document doc = new Document();

            doc.Add(NewStringField("id", "0", Field.Store.YES));
            doc.Add(NewTextField("field", "wizard the the the the the oz", Field.Store.NO));
            w.AddDocument(doc);
            doc = new Document();
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            // 1 extra token, but wizard and oz are close;
            doc.Add(NewTextField("field", "wizard oz the the the the the the", Field.Store.NO));
            w.AddDocument(doc);
            IndexReader r = w.GetReader();

            w.Dispose();

            // Do ordinary BooleanQuery:
            BooleanQuery bq = new BooleanQuery();

            bq.Add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
            bq.Add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
            IndexSearcher searcher = GetSearcher(r);

            TopDocs hits = searcher.Search(bq, 10);

            Assert.AreEqual(2, hits.TotalHits);
            Assert.AreEqual("0", searcher.Doc(hits.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("1", searcher.Doc(hits.ScoreDocs[1].Doc).Get("id"));

            // Now, resort using PhraseQuery, no slop:
            PhraseQuery pq = new PhraseQuery();

            pq.Add(new Term("field", "wizard"));
            pq.Add(new Term("field", "oz"));

            TopDocs hits2 = QueryRescorer.Rescore(searcher, hits, pq, 2.0, 10);

            // Resorting changed the order:
            Assert.AreEqual(2, hits2.TotalHits);
            Assert.AreEqual("1", searcher.Doc(hits2.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("0", searcher.Doc(hits2.ScoreDocs[1].Doc).Get("id"));

            // Resort using SpanNearQuery:
            SpanTermQuery t1  = new SpanTermQuery(new Term("field", "wizard"));
            SpanTermQuery t2  = new SpanTermQuery(new Term("field", "oz"));
            SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] { t1, t2 }, 0, true);

            TopDocs hits3 = QueryRescorer.Rescore(searcher, hits, snq, 2.0, 10);

            // Resorting changed the order:
            Assert.AreEqual(2, hits3.TotalHits);
            Assert.AreEqual("1", searcher.Doc(hits3.ScoreDocs[0].Doc).Get("id"));
            Assert.AreEqual("0", searcher.Doc(hits3.ScoreDocs[1].Doc).Get("id"));

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestShingleAnalyzerWrapperPhraseQuery()
        {
            PhraseQuery q = new PhraseQuery();

            TokenStream ts = analyzer.TokenStream("content", "this sentence");
            try
            {
                int j = -1;

                IPositionIncrementAttribute posIncrAtt = ts.AddAttribute<IPositionIncrementAttribute>();
                ICharTermAttribute termAtt = ts.AddAttribute<ICharTermAttribute>();

                ts.Reset();
                while (ts.IncrementToken())
                {
                    j += posIncrAtt.PositionIncrement;
                    string termText = termAtt.ToString();
                    q.Add(new Term("content", termText), j);
                }
                ts.End();
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(ts);
            }

            ScoreDoc[] hits = searcher.Search(q, null, 1000).ScoreDocs;
            int[] ranks = new int[] { 0 };
            CompareRanks(hits, ranks);
        }
Example #46
0
        public virtual void TestPalyndrome2()
        {
            // search on non palyndrome, find phrase with no slop, using exact phrase scorer
            query.Slop = 0; // to use exact phrase scorer
            query.Add(new Term("field", "two"));
            query.Add(new Term("field", "three"));
            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length, "phrase found with exact phrase scorer");
            float score0 = hits[0].Score;

            //System.out.println("(exact) field: two three: "+score0);
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, query, searcher);

            // search on non palyndrome, find phrase with slop 2, though no slop required here.
            query.Slop = 2; // to use sloppy scorer
            hits       = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length, "just sloppy enough");
            float score1 = hits[0].Score;
            //System.out.println("(sloppy) field: two three: "+score1);
            Assert.AreEqual(score0, score1, SCORE_COMP_THRESH, "exact scorer and sloppy scorer score the same when slop does not matter");
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, query, searcher);

            // search ordered in palyndrome, find it twice
            query      = new PhraseQuery();
            query.Slop = 2; // must be at least two for both ordered and reversed to match
            query.Add(new Term("palindrome", "two"));
            query.Add(new Term("palindrome", "three"));
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length, "just sloppy enough");
            //float score2 = hits[0].Score;
            //System.out.println("palindrome: two three: "+score2);
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, query, searcher);

            //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
            //Assert.IsTrue("ordered scores higher in palindrome",score1+SCORE_COMP_THRESH<score2);

            // search reveresed in palyndrome, find it twice
            query      = new PhraseQuery();
            query.Slop = 2; // must be at least two for both ordered and reversed to match
            query.Add(new Term("palindrome", "three"));
            query.Add(new Term("palindrome", "two"));
            hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length, "just sloppy enough");
            //float score3 = hits[0].Score;
            //System.out.println("palindrome: three two: "+score3);
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, query, searcher);

            //commented out for sloppy-phrase efficiency (issue 736) - see SloppyPhraseScorer.phraseFreq().
            //Assert.IsTrue("reversed scores higher in palindrome",score1+SCORE_COMP_THRESH<score3);
            //Assert.AreEqual("ordered or reversed does not matter",score2, score3, SCORE_COMP_THRESH);
        }
Example #47
0
		/// <exception cref="ParseException">throw in overridden method to disallow
		/// </exception>
		protected internal virtual Query GetFieldQuery(System.String field, System.String queryText)
		{
			// Use the analyzer to get all the tokens, and then build a TermQuery,
			// PhraseQuery, or nothing based on the term count
			
			TokenStream source = analyzer.TokenStream(field, new System.IO.StringReader(queryText));
			System.Collections.ArrayList v = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(10));
			Lucene.Net.Analysis.Token t;
			int positionCount = 0;
			bool severalTokensAtSamePosition = false;
			
			while (true)
			{
				try
				{
					t = source.Next();
				}
				catch (System.IO.IOException e)
				{
					t = null;
				}
				if (t == null)
					break;
				v.Add(t);
				if (t.GetPositionIncrement() != 0)
					positionCount += t.GetPositionIncrement();
				else
					severalTokensAtSamePosition = true;
			}
			try
			{
				source.Close();
			}
			catch (System.IO.IOException e)
			{
				// ignore
			}
			
			if (v.Count == 0)
				return null;
			else if (v.Count == 1)
			{
				t = (Lucene.Net.Analysis.Token) v[0];
				return new TermQuery(new Term(field, t.TermText()));
			}
			else
			{
				if (severalTokensAtSamePosition)
				{
					if (positionCount == 1)
					{
						// no phrase query:
						BooleanQuery q = new BooleanQuery(true);
						for (int i = 0; i < v.Count; i++)
						{
							t = (Lucene.Net.Analysis.Token) v[i];
							TermQuery currentQuery = new TermQuery(new Term(field, t.TermText()));
							q.Add(currentQuery, BooleanClause.Occur.SHOULD);
						}
						return q;
					}
					else
					{
						// phrase query:
						MultiPhraseQuery mpq = new MultiPhraseQuery();
						System.Collections.ArrayList multiTerms = new System.Collections.ArrayList();
						for (int i = 0; i < v.Count; i++)
						{
							t = (Lucene.Net.Analysis.Token) v[i];
							if (t.GetPositionIncrement() == 1 && multiTerms.Count > 0)
							{
								mpq.Add((Term[]) multiTerms.ToArray(typeof(Term)));
								multiTerms.Clear();
							}
							multiTerms.Add(new Term(field, t.TermText()));
						}
						mpq.Add((Term[]) multiTerms.ToArray(typeof(Term)));
						return mpq;
					}
				}
				else
				{
					PhraseQuery q = new PhraseQuery();
					q.SetSlop(phraseSlop);
					for (int i = 0; i < v.Count; i++)
					{
						q.Add(new Term(field, ((Lucene.Net.Analysis.Token) v[i]).TermText()));
					}
					return q;
				}
			}
		}
        public virtual void Test1()
        {
            BooleanQuery q = new BooleanQuery();

            PhraseQuery phraseQuery = new PhraseQuery();

            phraseQuery.Slop = 1;
            phraseQuery.Add(new Term(FIELD, "w1"));
            phraseQuery.Add(new Term(FIELD, "w2"));
            q.Add(phraseQuery, Occur.MUST);
            q.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true), Occur.SHOULD);
            q.Add(Snear(Sf("w3", 2), St("w2"), St("w3"), 5, true), Occur.SHOULD);

            Query t = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")), new ItemizedFilter(new int[] { 1, 3 }));

            t.Boost = 1000;
            q.Add(t, Occur.SHOULD);

            t       = new ConstantScoreQuery(new ItemizedFilter(new int[] { 0, 2 }));
            t.Boost = 30;
            q.Add(t, Occur.SHOULD);

            DisjunctionMaxQuery dm = new DisjunctionMaxQuery(0.2f);

            dm.Add(Snear(St("w2"), Sor("w5", "zz"), 4, true));
            dm.Add(new TermQuery(new Term(FIELD, "QQ")));

            BooleanQuery xxYYZZ = new BooleanQuery();

            xxYYZZ.Add(new TermQuery(new Term(FIELD, "xx")), Occur.SHOULD);
            xxYYZZ.Add(new TermQuery(new Term(FIELD, "yy")), Occur.SHOULD);
            xxYYZZ.Add(new TermQuery(new Term(FIELD, "zz")), Occur.MUST_NOT);

            dm.Add(xxYYZZ);

            BooleanQuery xxW1 = new BooleanQuery();

            xxW1.Add(new TermQuery(new Term(FIELD, "xx")), Occur.MUST_NOT);
            xxW1.Add(new TermQuery(new Term(FIELD, "w1")), Occur.MUST_NOT);

            dm.Add(xxW1);

            DisjunctionMaxQuery dm2 = new DisjunctionMaxQuery(0.5f);

            dm2.Add(new TermQuery(new Term(FIELD, "w1")));
            dm2.Add(new TermQuery(new Term(FIELD, "w2")));
            dm2.Add(new TermQuery(new Term(FIELD, "w3")));
            dm.Add(dm2);

            q.Add(dm, Occur.SHOULD);

            BooleanQuery b = new BooleanQuery();

            b.MinimumNumberShouldMatch = 2;
            b.Add(Snear("w1", "w2", 1, true), Occur.SHOULD);
            b.Add(Snear("w2", "w3", 1, true), Occur.SHOULD);
            b.Add(Snear("w1", "w3", 3, true), Occur.SHOULD);

            q.Add(b, Occur.SHOULD);

            Qtest(q, new int[] { 0, 1, 2 });
        }