Exemplo n.º 1
0
        public override void BeforeClass()
        {
            base.BeforeClass();

            directory = NewDirectory();
            Analyzer          analyzer = new AnalyzerAnonymousClass();
            RandomIndexWriter writer   = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, directory, analyzer);

            Documents.Document doc = new Documents.Document();
            doc.Add(NewTextField("field", "one two three four five", Field.Store.YES));
            doc.Add(NewTextField("repeated", "this is a repeated field - first part", Field.Store.YES));
            IIndexableField repeatedField = NewTextField("repeated", "second part of a repeated field", Field.Store.YES);

            doc.Add(repeatedField);
            doc.Add(NewTextField("palindrome", "one two three two one", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES));
            writer.AddDocument(doc);

            doc = new Documents.Document();
            doc.Add(NewTextField("nonexist", "phrase exist notexist exist found", Field.Store.YES));
            writer.AddDocument(doc);

            reader = writer.GetReader();
            writer.Dispose();

            searcher = NewSearcher(reader);
        }
Exemplo n.º 2
0
        public virtual void TestCrossPlaneNormalization()
        {
            var analyzer = new AnalyzerAnonymousClass();
            var num      = 1000 * RandomMultiplier;

            for (var i = 0; i < num; i++)
            {
                var s  = TestUtil.RandomUnicodeString(Random);
                var ts = analyzer.GetTokenStream("foo", s);
                try
                {
                    ts.Reset();
                    var offsetAtt = ts.AddAttribute <IOffsetAttribute>();
                    while (ts.IncrementToken())
                    {
                        var highlightedText = s.Substring(offsetAtt.StartOffset, offsetAtt.EndOffset - offsetAtt.StartOffset);
                        for (int j = 0, cp = 0; j < highlightedText.Length; j += Character.CharCount(cp))
                        {
                            cp = highlightedText.CodePointAt(j);
                            assertTrue("non-letter:" + cp.ToString("x"), Character.IsLetter(cp));
                        }
                    }
                    ts.End();
                }
                finally
                {
                    IOUtils.DisposeWhileHandlingException(ts);
                }
            }
            // just for fun
            CheckRandomData(Random, analyzer, num);
        }
Exemplo n.º 3
0
        public virtual void TestPositionIncrementGap()
        {
            Analyzer analyzer = new AnalyzerAnonymousClass();

            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));

            Document doc = new Document();

            doc.Add(NewTextField("repeated", "repeated one", Field.Store.YES));
            doc.Add(NewTextField("repeated", "repeated two", Field.Store.YES));

            writer.AddDocument(doc);
            writer.Commit();
            SegmentCommitInfo info = writer.NewestSegment();

            writer.Dispose();
            SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random));

            DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "repeated", new BytesRef("repeated"));

            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            int freq = termPositions.Freq;

            Assert.AreEqual(2, freq);
            Assert.AreEqual(0, termPositions.NextPosition());
            Assert.AreEqual(502, termPositions.NextPosition());
            reader.Dispose();
        }
Exemplo n.º 4
0
        internal virtual void AddDocs(Random random, Directory dir, int ndocs, string field, string val, int maxTF, float percentDocs)
        {
            Analyzer analyzer = new AnalyzerAnonymousClass(random, val, maxTF, percentDocs);

            Document doc = new Document();

            doc.Add(NewStringField(field, val, Field.Store.NO));
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE).SetMaxBufferedDocs(100).SetMergePolicy(NewLogMergePolicy(100)));

            for (int i = 0; i < ndocs; i++)
            {
                writer.AddDocument(doc);
            }

            writer.ForceMerge(1);
            writer.Dispose();
        }
Exemplo n.º 5
0
        public virtual void TestSetPosition()
        {
            Analyzer          analyzer = new AnalyzerAnonymousClass(this);
            Directory         store    = NewDirectory();
            RandomIndexWriter writer   = new RandomIndexWriter(Random, store, analyzer);
            Document          d        = new Document();

            d.Add(NewTextField("field", "bogus", Field.Store.YES));
            writer.AddDocument(d);
            IndexReader reader = writer.GetReader();

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            DocsAndPositionsEnum pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("1"));

            pos.NextDoc();
            // first token should be at position 0
            Assert.AreEqual(0, pos.NextPosition());

            pos = MultiFields.GetTermPositionsEnum(searcher.IndexReader, MultiFields.GetLiveDocs(searcher.IndexReader), "field", new BytesRef("2"));
            pos.NextDoc();
            // second token should be at position 2
            Assert.AreEqual(2, pos.NextPosition());

            PhraseQuery q;

            ScoreDoc[] hits;

            q = new PhraseQuery();
            q.Add(new Term("field", "1"));
            q.Add(new Term("field", "2"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // same as previous, just specify positions explicitely.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 1);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // specifying correct positions should find the phrase.
            q = new PhraseQuery();
            q.Add(new Term("field", "1"), 0);
            q.Add(new Term("field", "2"), 2);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "3"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // phrase query would find it when correct positions are specified.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "4"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            // phrase query should fail for non existing searched term
            // even if there exist another searched terms in the same searched position.
            q = new PhraseQuery();
            q.Add(new Term("field", "3"), 0);
            q.Add(new Term("field", "9"), 0);
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            // multi-phrase query should succed for non existing searched term
            // because there exist another searched terms in the same searched position.
            MultiPhraseQuery mq = new MultiPhraseQuery();

            mq.Add(new Term[] { new Term("field", "3"), new Term("field", "9") }, 0);
            hits = searcher.Search(mq, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "4"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "3"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "4"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            q = new PhraseQuery();
            q.Add(new Term("field", "2"));
            q.Add(new Term("field", "5"));
            hits = searcher.Search(q, null, 1000).ScoreDocs;
            Assert.AreEqual(0, hits.Length);

            reader.Dispose();
            store.Dispose();
        }