コード例 #1
0
        public virtual void TestDups(string expected, params Token[] tokens)
        {
            TokenStream stream = new CannedTokenStream(tokens);

            stream = TokenFilterFactory("RemoveDuplicates").Create(stream);
            AssertTokenStreamContents(stream, Regex.Split(expected, "\\s").TrimEnd());
        }
コード例 #2
0
        public virtual void TestLegalbutVeryLargeOffsets()
        {
            Directory   dir = NewDirectory();
            IndexWriter iw  = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
            Document    doc = new Document();
            Token       t1  = new Token("foo", 0, int.MaxValue - 500);

            if (Random.NextBoolean())
            {
                t1.Payload = new BytesRef("test");
            }
            Token       t2          = new Token("foo", int.MaxValue - 500, int.MaxValue);
            TokenStream tokenStream = new CannedTokenStream(new Token[] { t1, t2 });
            FieldType   ft          = new FieldType(TextField.TYPE_NOT_STORED);

            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            // store some term vectors for the checkindex cross-check
            ft.StoreTermVectors         = true;
            ft.StoreTermVectorPositions = true;
            ft.StoreTermVectorOffsets   = true;
            Field field = new Field("foo", tokenStream, ft);

            doc.Add(field);
            iw.AddDocument(doc);
            iw.Dispose();
            dir.Dispose();
        }
コード例 #3
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testDups(final String expected, final org.apache.lucene.analysis.Token... tokens) throws Exception
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
        public virtual void testDups(string expected, params Token[] tokens)
        {
            TokenStream stream = new CannedTokenStream(tokens);

            stream = tokenFilterFactory("RemoveDuplicates").create(stream);
            assertTokenStreamContents(stream, expected.Split("\\s", true));
        }
コード例 #4
0
        public virtual void TestMixupMultiValued()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document  doc        = new Document();
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.StoreTermVectors         = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorPayloads  = true;
            customType.StoreTermVectorOffsets   = Random.NextBoolean();
            Field       field = new Field("field", "", customType);
            TokenStream ts    = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            doc.Add(field);
            Field field2      = new Field("field", "", customType);
            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field2.SetTokenStream(ts);
            doc.Add(field2);
            Field field3 = new Field("field", "", customType);

            ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field3.SetTokenStream(ts);
            doc.Add(field3);
            writer.AddDocument(doc);
            DirectoryReader reader = writer.GetReader();
            Terms           terms  = reader.GetTermVector(0, "field");

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(terms != null);
            }
            TermsEnum termsEnum = terms.GetIterator(null);

            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload")));
            DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null);

            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(3, de.NextPosition());
            Assert.AreEqual(new BytesRef("test"), de.GetPayload());
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
コード例 #5
0
        public virtual void TestMixupDocs()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer     = new RandomIndexWriter(Random, dir, iwc);
            Document          doc        = new Document();
            FieldType         customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.StoreTermVectors         = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorPayloads  = true;
            customType.StoreTermVectorOffsets   = Random.NextBoolean();
            Field       field = new Field("field", "", customType);
            TokenStream ts    = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            doc.Add(field);
            writer.AddDocument(doc);

            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);

            ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);

            DirectoryReader reader = writer.GetReader();
            Terms           terms  = reader.GetTermVector(1, "field");

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(terms != null);
            }
            TermsEnum termsEnum = terms.GetIterator(null);

            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload")));
            DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null);

            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(0, de.NextPosition());
            Assert.AreEqual(new BytesRef("test"), de.GetPayload());
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
コード例 #6
0
        public virtual void TestMixupMultiValued()
        {
            Directory         dir        = NewDirectory();
            RandomIndexWriter writer     = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document          doc        = new Document();
            FieldType         customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.StoreTermVectors         = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorPayloads  = true;
            customType.StoreTermVectorOffsets   = Random().NextBoolean();
            Field       field = new Field("field", "", customType);
            TokenStream ts    = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field.TokenStream = ts;
            doc.Add(field);
            Field field2      = new Field("field", "", customType);
            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field2.TokenStream = ts;
            doc.Add(field2);
            Field field3 = new Field("field", "", customType);

            ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field3.TokenStream = ts;
            doc.Add(field3);
            writer.AddDocument(doc);
            DirectoryReader reader = writer.Reader;
            Terms           terms  = reader.GetTermVector(0, "field");

            Debug.Assert(terms != null);
            TermsEnum termsEnum = terms.Iterator(null);

            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload")));
            DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null);

            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(3, de.NextPosition());
            Assert.AreEqual(new BytesRef("test"), de.Payload);
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
コード例 #7
0
        public virtual void TestMixupDocs()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            Document doc = new Document();
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
            customType.StoreTermVectors = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorPayloads = true;
            customType.StoreTermVectorOffsets = Random().NextBoolean();
            Field field = new Field("field", "", customType);
            TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>());
            field.TokenStream = ts;
            doc.Add(field);
            writer.AddDocument(doc);

            Token withPayload = new Token("withPayload", 0, 11);
            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>());
            field.TokenStream = ts;
            writer.AddDocument(doc);

            ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>());
            field.TokenStream = ts;
            writer.AddDocument(doc);

            DirectoryReader reader = writer.Reader;
            Terms terms = reader.GetTermVector(1, "field");
            Debug.Assert(terms != null);
            TermsEnum termsEnum = terms.Iterator(null);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload")));
            DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null);
            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(0, de.NextPosition());
            Assert.AreEqual(new BytesRef("test"), de.Payload);
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
コード例 #8
0
        public virtual void TestMixupMultiValued()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document    doc   = new Document();
            Field       field = new TextField("field", "", Field.Store.NO);
            TokenStream ts    = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <PayloadAttribute>());
            field.SetTokenStream(ts);
            doc.Add(field);
            Field field2      = new TextField("field", "", Field.Store.NO);
            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field2.SetTokenStream(ts);
            doc.Add(field2);
            Field field3 = new TextField("field", "", Field.Store.NO);

            ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <PayloadAttribute>());
            field3.SetTokenStream(ts);
            doc.Add(field3);
            writer.AddDocument(doc);
            DirectoryReader      reader = writer.GetReader();
            SegmentReader        sr     = GetOnlySegmentReader(reader);
            DocsAndPositionsEnum de     = sr.GetTermPositionsEnum(new Term("field", "withPayload"));

            de.NextDoc();
            de.NextPosition();
            Assert.AreEqual(new BytesRef("test"), de.GetPayload());
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
コード例 #9
0
        public virtual void TestMixupDocs()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc);
            Document          doc    = new Document();
            Field             field  = new TextField("field", "", Field.Store.NO);
            TokenStream       ts     = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <PayloadAttribute>());
            field.SetTokenStream(ts);
            doc.Add(field);
            writer.AddDocument(doc);
            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);
            ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <PayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);
            DirectoryReader      reader = writer.GetReader();
            AtomicReader         sr     = SlowCompositeReaderWrapper.Wrap(reader);
            DocsAndPositionsEnum de     = sr.GetTermPositionsEnum(new Term("field", "withPayload"));

            de.NextDoc();
            de.NextPosition();
            Assert.AreEqual(new BytesRef("test"), de.GetPayload());
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
コード例 #10
0
ファイル: TestPayloads.cs プロジェクト: wwb/lucenenet
        public virtual void TestMixupMultiValued()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document          doc    = new Document();
            Field             field  = new TextField("field", "", Field.Store.NO);
            TokenStream       ts     = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <PayloadAttribute>());
            field.TokenStream = ts;
            doc.Add(field);
            Field field2      = new TextField("field", "", Field.Store.NO);
            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field2.TokenStream = ts;
            doc.Add(field2);
            Field field3 = new TextField("field", "", Field.Store.NO);

            ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <PayloadAttribute>());
            field3.TokenStream = ts;
            doc.Add(field3);
            writer.AddDocument(doc);
            DirectoryReader      reader = writer.Reader;
            SegmentReader        sr     = GetOnlySegmentReader(reader);
            DocsAndPositionsEnum de     = sr.TermPositionsEnum(new Term("field", "withPayload"));

            de.NextDoc();
            de.NextPosition();
            Assert.AreEqual(new BytesRef("test"), de.Payload);
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
 public virtual void TestDups(string expected, params Token[] tokens)
 {
     TokenStream stream = new CannedTokenStream(tokens);
     stream = TokenFilterFactory("RemoveDuplicates").Create(stream);
     AssertTokenStreamContents(stream, Regex.Split(expected, "\\s"));
 }
コード例 #12
0
        public void TestBooleanPhraseWithSynonym()
        {
            Directory   dir    = NewDirectory();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            Document    doc    = new Document();
            FieldType   type   = new FieldType(TextField.TYPE_NOT_STORED);

            type.StoreTermVectorOffsets   = (true);
            type.StoreTermVectorPositions = (true);
            type.StoreTermVectors         = (true);
            type.Freeze();
            Token syn = new Token("httpwwwfacebookcom", 6, 29);

            syn.PositionIncrement = (0);
            CannedTokenStream ts = new CannedTokenStream(
                new Token("test", 0, 4),
                new Token("http", 6, 10),
                syn,
                new Token("www", 13, 16),
                new Token("facebook", 17, 25),
                new Token("com", 26, 29)
                );
            Field field = new Field("field", ts, type);

            doc.Add(field);
            doc.Add(new StoredField("field", "Test: http://www.facebook.com"));
            writer.AddDocument(doc);
            FastVectorHighlighter highlighter = new FastVectorHighlighter();

            IndexReader reader = DirectoryReader.Open(writer, true);
            int         docId  = 0;

            // query1: match
            PhraseQuery pq = new PhraseQuery();

            pq.Add(new Term("field", "test"));
            pq.Add(new Term("field", "http"));
            pq.Add(new Term("field", "www"));
            pq.Add(new Term("field", "facebook"));
            pq.Add(new Term("field", "com"));
            FieldQuery fieldQuery = highlighter.GetFieldQuery(pq, reader);

            String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1);
            assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);

            // query2: match
            PhraseQuery pq2 = new PhraseQuery();

            pq2.Add(new Term("field", "test"));
            pq2.Add(new Term("field", "httpwwwfacebookcom"));
            pq2.Add(new Term("field", "www"));
            pq2.Add(new Term("field", "facebook"));
            pq2.Add(new Term("field", "com"));
            fieldQuery    = highlighter.GetFieldQuery(pq2, reader);
            bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1);
            assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);

            // query3: OR query1 and query2 together
            BooleanQuery bq = new BooleanQuery();

            bq.Add(pq, Occur.SHOULD);
            bq.Add(pq2, Occur.SHOULD);
            fieldQuery    = highlighter.GetFieldQuery(bq, reader);
            bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1);
            assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);

            reader.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
コード例 #13
0
 public virtual void TestLegalbutVeryLargeOffsets()
 {
     Directory dir = NewDirectory();
     IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null));
     Document doc = new Document();
     Token t1 = new Token("foo", 0, int.MaxValue - 500);
     if (Random().NextBoolean())
     {
         t1.Payload = new BytesRef("test");
     }
     Token t2 = new Token("foo", int.MaxValue - 500, int.MaxValue);
     TokenStream tokenStream = new CannedTokenStream(new Token[] { t1, t2 });
     FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
     ft.IndexOptionsValue = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
     // store some term vectors for the checkindex cross-check
     ft.StoreTermVectors = true;
     ft.StoreTermVectorPositions = true;
     ft.StoreTermVectorOffsets = true;
     Field field = new Field("foo", tokenStream, ft);
     doc.Add(field);
     iw.AddDocument(doc);
     iw.Dispose();
     dir.Dispose();
 }
コード例 #14
0
 //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
 //ORIGINAL LINE: public void testDups(final String expected, final org.apache.lucene.analysis.Token... tokens) throws Exception
 //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
 public virtual void testDups(string expected, params Token[] tokens)
 {
     TokenStream stream = new CannedTokenStream(tokens);
     stream = tokenFilterFactory("RemoveDuplicates").create(stream);
     assertTokenStreamContents(stream, expected.Split("\\s", true));
 }
コード例 #15
0
 public virtual void TestMixupMultiValued()
 {
     Directory dir = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
     Document doc = new Document();
     FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
     customType.StoreTermVectors = true;
     customType.StoreTermVectorPositions = true;
     customType.StoreTermVectorPayloads = true;
     customType.StoreTermVectorOffsets = Random().NextBoolean();
     Field field = new Field("field", "", customType);
     TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);
     Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>());
     field.TokenStream = ts;
     doc.Add(field);
     Field field2 = new Field("field", "", customType);
     Token withPayload = new Token("withPayload", 0, 11);
     withPayload.Payload = new BytesRef("test");
     ts = new CannedTokenStream(withPayload);
     Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>());
     field2.TokenStream = ts;
     doc.Add(field2);
     Field field3 = new Field("field", "", customType);
     ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true);
     Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>());
     field3.TokenStream = ts;
     doc.Add(field3);
     writer.AddDocument(doc);
     DirectoryReader reader = writer.Reader;
     Terms terms = reader.GetTermVector(0, "field");
     Debug.Assert(terms != null);
     TermsEnum termsEnum = terms.Iterator(null);
     Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload")));
     DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null);
     Assert.AreEqual(0, de.NextDoc());
     Assert.AreEqual(3, de.NextPosition());
     Assert.AreEqual(new BytesRef("test"), de.Payload);
     writer.Dispose();
     reader.Dispose();
     dir.Dispose();
 }