public virtual void TestToString() { StopAnalyzer analyzer = new StopAnalyzer(); StopFilter.SetEnablePositionIncrementsDefault(true); QueryParser qp = new QueryParser("field", analyzer); qp.SetEnablePositionIncrements(true); PhraseQuery q = (PhraseQuery)qp.Parse("\"this hi this is a test is\""); Assert.AreEqual("field:\"? hi ? ? ? test\"", q.ToString()); q.Add(new Term("field", "hello"), 1); Assert.AreEqual("field:\"? hi|hello ? ? ? test\"", q.ToString()); }
public virtual void TestToString() { StopAnalyzer analyzer = new StopAnalyzer(); StopFilter.SetEnablePositionIncrementsDefault(true); QueryParser qp = new QueryParser("field", analyzer); qp.SetEnablePositionIncrements(true); PhraseQuery q = (PhraseQuery) qp.Parse("\"this hi this is a test is\""); Assert.AreEqual("field:\"? hi ? ? ? test\"", q.ToString()); q.Add(new Term("field", "hello"), 1); Assert.AreEqual("field:\"? hi|hello ? ? ? test\"", q.ToString()); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnonymousClassAnalyzer(this); Directory store = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(store, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document d = new Document(); d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(store); TermPositions pos = searcher.GetIndexReader().TermPositions(new Term("field", "1")); pos.Next(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = searcher.GetIndexReader().TermPositions(new Term("field", "2")); pos.Next(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[]{new Term("field", "3"), new Term("field", "9")}, 0); hits = searcher.Search(mq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // should not find "1 2" because there is a gap of 1 in the index QueryParser qp = new QueryParser("field", new StopWhitespaceAnalyzer(false)); q = (PhraseQuery) qp.Parse("\"1 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // omitted stop word cannot help because stop filter swallows the increments. q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // query parser alone won't help, because stop filter swallows the increments. qp.SetEnablePositionIncrements(true); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // stop filter alone won't help, because query parser swallows the increments. qp.SetEnablePositionIncrements(false); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // when both qp qnd stopFilter propagate increments, we should find the doc. qp = new QueryParser("field", new StopWhitespaceAnalyzer(true)); qp.SetEnablePositionIncrements(true); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnonymousClassAnalyzer(this); Directory store = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(store, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document d = new Document(); d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(store); TermPositions pos = searcher.GetIndexReader().TermPositions(new Term("field", "1")); pos.Next(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = searcher.GetIndexReader().TermPositions(new Term("field", "2")); pos.Next(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0); hits = searcher.Search(mq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // should not find "1 2" because there is a gap of 1 in the index QueryParser qp = new QueryParser("field", new StopWhitespaceAnalyzer(false)); q = (PhraseQuery)qp.Parse("\"1 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // omitted stop word cannot help because stop filter swallows the increments. q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // query parser alone won't help, because stop filter swallows the increments. qp.SetEnablePositionIncrements(true); q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // stop filter alone won't help, because query parser swallows the increments. qp.SetEnablePositionIncrements(false); q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // when both qp qnd stopFilter propagate increments, we should find the doc. qp = new QueryParser("field", new StopWhitespaceAnalyzer(true)); qp.SetEnablePositionIncrements(true); q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); }
public virtual void TestPositionIncrement() { bool dflt = StopFilter.GetEnablePositionIncrementsDefault(); StopFilter.SetEnablePositionIncrementsDefault(true); try { Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("a", new StopAnalyzer(new System.String[] { "the", "in", "are", "this" })); qp.SetEnablePositionIncrements(true); System.String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\""; // 0 2 5 7 8 int[] expectedPositions = new int[]{1, 3, 4, 6, 9}; PhraseQuery pq = (PhraseQuery) qp.Parse(qtxt); //System.out.println("Query text: "+qtxt); //System.out.println("Result: "+pq); Term[] t = pq.GetTerms(); int[] pos = pq.GetPositions(); for (int i = 0; i < t.Length; i++) { //System.out.println(i+". "+t[i]+" pos: "+pos[i]); Assert.AreEqual(expectedPositions[i], pos[i], "term " + i + " = " + t[i] + " has wrong term-position!"); } } finally { StopFilter.SetEnablePositionIncrementsDefault(dflt); } }