public virtual void TestDups(string expected, params Token[] tokens) { TokenStream stream = new CannedTokenStream(tokens); stream = TokenFilterFactory("RemoveDuplicates").Create(stream); AssertTokenStreamContents(stream, Regex.Split(expected, "\\s").TrimEnd()); }
public virtual void TestLegalbutVeryLargeOffsets() { Directory dir = NewDirectory(); IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); Document doc = new Document(); Token t1 = new Token("foo", 0, int.MaxValue - 500); if (Random.NextBoolean()) { t1.Payload = new BytesRef("test"); } Token t2 = new Token("foo", int.MaxValue - 500, int.MaxValue); TokenStream tokenStream = new CannedTokenStream(new Token[] { t1, t2 }); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; // store some term vectors for the checkindex cross-check ft.StoreTermVectors = true; ft.StoreTermVectorPositions = true; ft.StoreTermVectorOffsets = true; Field field = new Field("foo", tokenStream, ft); doc.Add(field); iw.AddDocument(doc); iw.Dispose(); dir.Dispose(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: public void testDups(final String expected, final org.apache.lucene.analysis.Token... tokens) throws Exception //JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET: public virtual void testDups(string expected, params Token[] tokens) { TokenStream stream = new CannedTokenStream(tokens); stream = tokenFilterFactory("RemoveDuplicates").create(stream); assertTokenStreamContents(stream, expected.Split("\\s", true)); }
public virtual void TestMixupMultiValued() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorPayloads = true; customType.StoreTermVectorOffsets = Random.NextBoolean(); Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); doc.Add(field); Field field2 = new Field("field", "", customType); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>()); field2.SetTokenStream(ts); doc.Add(field2); Field field3 = new Field("field", "", customType); ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>()); field3.SetTokenStream(ts); doc.Add(field3); writer.AddDocument(doc); DirectoryReader reader = writer.GetReader(); Terms terms = reader.GetTermVector(0, "field"); if (Debugging.AssertsEnabled) { Debugging.Assert(terms != null); } TermsEnum termsEnum = terms.GetIterator(null); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload"))); DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(3, de.NextPosition()); Assert.AreEqual(new BytesRef("test"), de.GetPayload()); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestMixupDocs() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorPayloads = true; customType.StoreTermVectorOffsets = Random.NextBoolean(); Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); doc.Add(field); writer.AddDocument(doc); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); writer.AddDocument(doc); ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); writer.AddDocument(doc); DirectoryReader reader = writer.GetReader(); Terms terms = reader.GetTermVector(1, "field"); if (Debugging.AssertsEnabled) { Debugging.Assert(terms != null); } TermsEnum termsEnum = terms.GetIterator(null); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload"))); DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(0, de.NextPosition()); Assert.AreEqual(new BytesRef("test"), de.GetPayload()); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestMixupMultiValued() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorPayloads = true; customType.StoreTermVectorOffsets = Random().NextBoolean(); Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>()); field.TokenStream = ts; doc.Add(field); Field field2 = new Field("field", "", customType); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>()); field2.TokenStream = ts; doc.Add(field2); Field field3 = new Field("field", "", customType); ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>()); field3.TokenStream = ts; doc.Add(field3); writer.AddDocument(doc); DirectoryReader reader = writer.Reader; Terms terms = reader.GetTermVector(0, "field"); Debug.Assert(terms != null); TermsEnum termsEnum = terms.Iterator(null); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload"))); DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(3, de.NextPosition()); Assert.AreEqual(new BytesRef("test"), de.Payload); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestMixupDocs() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorPayloads = true; customType.StoreTermVectorOffsets = Random().NextBoolean(); Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>()); field.TokenStream = ts; doc.Add(field); writer.AddDocument(doc); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>()); field.TokenStream = ts; writer.AddDocument(doc); ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>()); field.TokenStream = ts; writer.AddDocument(doc); DirectoryReader reader = writer.Reader; Terms terms = reader.GetTermVector(1, "field"); Debug.Assert(terms != null); TermsEnum termsEnum = terms.Iterator(null); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload"))); DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(0, de.NextPosition()); Assert.AreEqual(new BytesRef("test"), de.Payload); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestMixupMultiValued() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); Field field = new TextField("field", "", Field.Store.NO); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <PayloadAttribute>()); field.SetTokenStream(ts); doc.Add(field); Field field2 = new TextField("field", "", Field.Store.NO); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>()); field2.SetTokenStream(ts); doc.Add(field2); Field field3 = new TextField("field", "", Field.Store.NO); ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <PayloadAttribute>()); field3.SetTokenStream(ts); doc.Add(field3); writer.AddDocument(doc); DirectoryReader reader = writer.GetReader(); SegmentReader sr = GetOnlySegmentReader(reader); DocsAndPositionsEnum de = sr.GetTermPositionsEnum(new Term("field", "withPayload")); de.NextDoc(); de.NextPosition(); Assert.AreEqual(new BytesRef("test"), de.GetPayload()); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestMixupDocs() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); Field field = new TextField("field", "", Field.Store.NO); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <PayloadAttribute>()); field.SetTokenStream(ts); doc.Add(field); writer.AddDocument(doc); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); writer.AddDocument(doc); ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <PayloadAttribute>()); field.SetTokenStream(ts); writer.AddDocument(doc); DirectoryReader reader = writer.GetReader(); AtomicReader sr = SlowCompositeReaderWrapper.Wrap(reader); DocsAndPositionsEnum de = sr.GetTermPositionsEnum(new Term("field", "withPayload")); de.NextDoc(); de.NextPosition(); Assert.AreEqual(new BytesRef("test"), de.GetPayload()); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestMixupMultiValued() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); Field field = new TextField("field", "", Field.Store.NO); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <PayloadAttribute>()); field.TokenStream = ts; doc.Add(field); Field field2 = new TextField("field", "", Field.Store.NO); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>()); field2.TokenStream = ts; doc.Add(field2); Field field3 = new TextField("field", "", Field.Store.NO); ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <PayloadAttribute>()); field3.TokenStream = ts; doc.Add(field3); writer.AddDocument(doc); DirectoryReader reader = writer.Reader; SegmentReader sr = GetOnlySegmentReader(reader); DocsAndPositionsEnum de = sr.TermPositionsEnum(new Term("field", "withPayload")); de.NextDoc(); de.NextPosition(); Assert.AreEqual(new BytesRef("test"), de.Payload); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestDups(string expected, params Token[] tokens) { TokenStream stream = new CannedTokenStream(tokens); stream = TokenFilterFactory("RemoveDuplicates").Create(stream); AssertTokenStreamContents(stream, Regex.Split(expected, "\\s")); }
public void TestBooleanPhraseWithSynonym() { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); Document doc = new Document(); FieldType type = new FieldType(TextField.TYPE_NOT_STORED); type.StoreTermVectorOffsets = (true); type.StoreTermVectorPositions = (true); type.StoreTermVectors = (true); type.Freeze(); Token syn = new Token("httpwwwfacebookcom", 6, 29); syn.PositionIncrement = (0); CannedTokenStream ts = new CannedTokenStream( new Token("test", 0, 4), new Token("http", 6, 10), syn, new Token("www", 13, 16), new Token("facebook", 17, 25), new Token("com", 26, 29) ); Field field = new Field("field", ts, type); doc.Add(field); doc.Add(new StoredField("field", "Test: http://www.facebook.com")); writer.AddDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); IndexReader reader = DirectoryReader.Open(writer, true); int docId = 0; // query1: match PhraseQuery pq = new PhraseQuery(); pq.Add(new Term("field", "test")); pq.Add(new Term("field", "http")); pq.Add(new Term("field", "www")); pq.Add(new Term("field", "facebook")); pq.Add(new Term("field", "com")); FieldQuery fieldQuery = highlighter.GetFieldQuery(pq, reader); String[] bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); // query2: match PhraseQuery pq2 = new PhraseQuery(); pq2.Add(new Term("field", "test")); pq2.Add(new Term("field", "httpwwwfacebookcom")); pq2.Add(new Term("field", "www")); pq2.Add(new Term("field", "facebook")); pq2.Add(new Term("field", "com")); fieldQuery = highlighter.GetFieldQuery(pq2, reader); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); // query3: OR query1 and query2 together BooleanQuery bq = new BooleanQuery(); bq.Add(pq, Occur.SHOULD); bq.Add(pq2, Occur.SHOULD); fieldQuery = highlighter.GetFieldQuery(bq, reader); bestFragments = highlighter.GetBestFragments(fieldQuery, reader, docId, "field", 54, 1); assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]); reader.Dispose(); writer.Dispose(); dir.Dispose(); }
public virtual void TestLegalbutVeryLargeOffsets() { Directory dir = NewDirectory(); IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); Document doc = new Document(); Token t1 = new Token("foo", 0, int.MaxValue - 500); if (Random().NextBoolean()) { t1.Payload = new BytesRef("test"); } Token t2 = new Token("foo", int.MaxValue - 500, int.MaxValue); TokenStream tokenStream = new CannedTokenStream(new Token[] { t1, t2 }); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptionsValue = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; // store some term vectors for the checkindex cross-check ft.StoreTermVectors = true; ft.StoreTermVectorPositions = true; ft.StoreTermVectorOffsets = true; Field field = new Field("foo", tokenStream, ft); doc.Add(field); iw.AddDocument(doc); iw.Dispose(); dir.Dispose(); }
public virtual void TestMixupMultiValued() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorPayloads = true; customType.StoreTermVectorOffsets = Random().NextBoolean(); Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>()); field.TokenStream = ts; doc.Add(field); Field field2 = new Field("field", "", customType); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute<IPayloadAttribute>()); field2.TokenStream = ts; doc.Add(field2); Field field3 = new Field("field", "", customType); ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute<IPayloadAttribute>()); field3.TokenStream = ts; doc.Add(field3); writer.AddDocument(doc); DirectoryReader reader = writer.Reader; Terms terms = reader.GetTermVector(0, "field"); Debug.Assert(terms != null); TermsEnum termsEnum = terms.Iterator(null); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload"))); DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(3, de.NextPosition()); Assert.AreEqual(new BytesRef("test"), de.Payload); writer.Dispose(); reader.Dispose(); dir.Dispose(); }