public virtual void TestPositionIncrementGap() { Analyzer analyzer = new AnonymousClassAnalyzer(this); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("repeated", "repeated one", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("repeated", "repeated two", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Flush(); SegmentInfo info = writer.NewestSegment(); writer.Close(); SegmentReader reader = SegmentReader.Get(info); TermPositions termPositions = reader.TermPositions(new Term("repeated", "repeated")); Assert.IsTrue(termPositions.Next()); int freq = termPositions.Freq(); Assert.AreEqual(2, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.AreEqual(502, termPositions.NextPosition()); }
private void InitBlock(AnonymousClassAnalyzer enclosingInstance) { this.enclosingInstance = enclosingInstance; posIncrAtt = (PositionIncrementAttribute) AddAttribute(typeof(PositionIncrementAttribute)); termAtt = (TermAttribute) AddAttribute(typeof(TermAttribute)); offsetAtt = (OffsetAttribute) AddAttribute(typeof(OffsetAttribute)); }
public override void SetUp() { base.SetUp(); directory = new RAMDirectory(); Analyzer analyzer = new AnonymousClassAnalyzer(this); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document doc = new Document(); doc.Add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); doc.Add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.ANALYZED)); IFieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.ANALYZED); doc.Add(repeatedField); doc.Add(new Field("palindrome", "one two three two one", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); doc = new Document(); doc.Add(new Field("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); writer.Optimize(); writer.Close(); searcher = new IndexSearcher(directory, true); query = new PhraseQuery(); }
private void InitBlock(AnonymousClassAnalyzer enclosingInstance) { this.enclosingInstance = enclosingInstance; posIncrAtt = (PositionIncrementAttribute)AddAttribute(typeof(PositionIncrementAttribute)); termAtt = (TermAttribute)AddAttribute(typeof(TermAttribute)); offsetAtt = (OffsetAttribute)AddAttribute(typeof(OffsetAttribute)); }
private void InitBlock(AnonymousClassAnalyzer enclosingInstance) { this.enclosingInstance = enclosingInstance; posIncrAtt = AddAttribute <IPositionIncrementAttribute>(); termAtt = AddAttribute <ITermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); }
private void InitBlock(AnonymousClassAnalyzer enclosingInstance) { this.enclosingInstance = enclosingInstance; posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); termAtt = AddAttribute<ITermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnonymousClassAnalyzer(this); RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, analyzer, true); Document d = new Document(); d.Add(Field.Text("Field", "bogus")); writer.AddDocument(d); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(store); PhraseQuery q; Hits hits; q = new PhraseQuery(); q.Add(new Term("Field", "1")); q.Add(new Term("Field", "2")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "2")); q.Add(new Term("Field", "3")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "3")); q.Add(new Term("Field", "4")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "2")); q.Add(new Term("Field", "4")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "3")); q.Add(new Term("Field", "5")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "4")); q.Add(new Term("Field", "5")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("Field", "2")); q.Add(new Term("Field", "5")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); }
internal virtual void AddDocs(Directory dir, int ndocs, System.String field, System.String val, int maxTF, float percentDocs) { System.Random random = NewRandom(); RepeatingTokenStream ts = new RepeatingTokenStream(val); Analyzer analyzer = new AnonymousClassAnalyzer(random, percentDocs, ts, maxTF, this); Document doc = new Document(); doc.Add(new Field(field, val, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); writer.SetMaxBufferedDocs(100); writer.SetMergeFactor(100); for (int i = 0; i < ndocs; i++) { writer.AddDocument(doc); } writer.Optimize(); writer.Close(); }
public AnonymousClassTokenFilter(AnonymousClassAnalyzer enclosingInstance, TokenStream ts) : base(ts) { InitBlock(enclosingInstance); }
private void InitBlock(AnonymousClassAnalyzer enclosingInstance) { this.enclosingInstance = enclosingInstance; }
public AnonymousClassTokenStream(AnonymousClassAnalyzer enclosingInstance) { InitBlock(enclosingInstance); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnonymousClassAnalyzer(this); RAMDirectory store = new RAMDirectory(); IndexWriter writer = new IndexWriter(store, analyzer, true); Document d = new Document(); d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.TOKENIZED)); writer.AddDocument(d); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(store); PhraseQuery q; Hits hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[]{new Term("field", "3"), new Term("field", "9")}, 0); hits = searcher.Search(mq); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // analyzer to introduce stopwords and increment gaps Analyzer stpa = new AnonymousClassAnalyzer1(this); // should not find "1 2" because there is a gap of 1 in the index Lucene.Net.QueryParsers.QueryParser qp = new Lucene.Net.QueryParsers.QueryParser("field", stpa); q = (PhraseQuery) qp.Parse("\"1 2\""); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // omitted stop word cannot help because stop filter swallows the increments. q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // query parser alone won't help, because stop filter swallows the increments. qp.SetEnablePositionIncrements(true); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); bool dflt = StopFilter.GetEnablePositionIncrementsDefault(); try { // stop filter alone won't help, because query parser swallows the increments. qp.SetEnablePositionIncrements(false); StopFilter.SetEnablePositionIncrementsDefault(true); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q); Assert.AreEqual(0, hits.Length()); // when both qp qnd stopFilter propagate increments, we should find the doc. qp.SetEnablePositionIncrements(true); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q); Assert.AreEqual(1, hits.Length()); } finally { StopFilter.SetEnablePositionIncrementsDefault(dflt); } }
public virtual void TestSetPosition() { Analyzer analyzer = new AnonymousClassAnalyzer(this); Directory store = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(store, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document d = new Document(); d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(store, true); TermPositions pos = searcher.IndexReader.TermPositions(new Term("field", "1")); pos.Next(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = searcher.IndexReader.TermPositions(new Term("field", "2")); pos.Next(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0); hits = searcher.Search(mq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // should not find "1 2" because there is a gap of 1 in the index QueryParser qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(false)); q = (PhraseQuery)qp.Parse("\"1 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // omitted stop word cannot help because stop filter swallows the increments. q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // query parser alone won't help, because stop filter swallows the increments. qp.EnablePositionIncrements = true; q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // stop filter alone won't help, because query parser swallows the increments. qp.EnablePositionIncrements = false; q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // when both qp qnd stopFilter propagate increments, we should find the doc. qp = new QueryParser(Util.Version.LUCENE_CURRENT, "field", new StopWhitespaceAnalyzer(true)); qp.EnablePositionIncrements = true; q = (PhraseQuery)qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); }
public virtual void TestSetPosition() { Analyzer analyzer = new AnonymousClassAnalyzer(this); Directory store = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(store, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document d = new Document(); d.Add(new Field("field", "bogus", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(d); writer.Optimize(); writer.Close(); IndexSearcher searcher = new IndexSearcher(store); TermPositions pos = searcher.GetIndexReader().TermPositions(new Term("field", "1")); pos.Next(); // first token should be at position 0 Assert.AreEqual(0, pos.NextPosition()); pos = searcher.GetIndexReader().TermPositions(new Term("field", "2")); pos.Next(); // second token should be at position 2 Assert.AreEqual(2, pos.NextPosition()); PhraseQuery q; ScoreDoc[] hits; q = new PhraseQuery(); q.Add(new Term("field", "1")); q.Add(new Term("field", "2")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // same as previous, just specify positions explicitely. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 1); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // specifying correct positions should find the phrase. q = new PhraseQuery(); q.Add(new Term("field", "1"), 0); q.Add(new Term("field", "2"), 2); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "3")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // phrase query would find it when correct positions are specified. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "4"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); // phrase query should fail for non existing searched term // even if there exist another searched terms in the same searched position. q = new PhraseQuery(); q.Add(new Term("field", "3"), 0); q.Add(new Term("field", "9"), 0); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // multi-phrase query should succed for non existing searched term // because there exist another searched terms in the same searched position. MultiPhraseQuery mq = new MultiPhraseQuery(); mq.Add(new Term[]{new Term("field", "3"), new Term("field", "9")}, 0); hits = searcher.Search(mq, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "4")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "3")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "4")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); q = new PhraseQuery(); q.Add(new Term("field", "2")); q.Add(new Term("field", "5")); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // should not find "1 2" because there is a gap of 1 in the index QueryParser qp = new QueryParser("field", new StopWhitespaceAnalyzer(false)); q = (PhraseQuery) qp.Parse("\"1 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // omitted stop word cannot help because stop filter swallows the increments. q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // query parser alone won't help, because stop filter swallows the increments. qp.SetEnablePositionIncrements(true); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // stop filter alone won't help, because query parser swallows the increments. qp.SetEnablePositionIncrements(false); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(0, hits.Length); // when both qp qnd stopFilter propagate increments, we should find the doc. qp = new QueryParser("field", new StopWhitespaceAnalyzer(true)); qp.SetEnablePositionIncrements(true); q = (PhraseQuery) qp.Parse("\"1 stop 2\""); hits = searcher.Search(q, null, 1000).ScoreDocs; Assert.AreEqual(1, hits.Length); }
public AnonymousClassTokenFilter(AnonymousClassAnalyzer enclosingInstance, StandardTokenizer standardTokenizer) : base(standardTokenizer) { InitBlock(enclosingInstance); }