public virtual void TestLUCENE_3042() { string testString = "t"; Analyzer analyzer = new MockAnalyzer(Random()); Exception priorException = null; TokenStream stream = analyzer.TokenStream("dummy", new StringReader(testString)); try { stream.Reset(); while (stream.IncrementToken()) { // consume } stream.End(); } catch (Exception e) { priorException = e; } finally { IOUtils.CloseWhileHandlingException(priorException, stream); } AssertAnalyzesTo(analyzer, testString, new string[] { "t" }); }
public virtual void TestForwardOffsets() { int num = AtLeast(10000); for (int i = 0; i < num; i++) { string s = TestUtil.RandomHtmlishString(Random(), 20); StringReader reader = new StringReader(s); MockCharFilter charfilter = new MockCharFilter(reader, 2); MockAnalyzer analyzer = new MockAnalyzer(Random()); Exception priorException = null; TokenStream ts = analyzer.TokenStream("bogus", charfilter.Input); try { ts.Reset(); while (ts.IncrementToken()) { ; } ts.End(); } catch (Exception e) { priorException = e; } finally { IOUtils.CloseWhileHandlingException(priorException, ts); } } }
public virtual void TestThreeChars() { CharacterRunAutomaton single = new CharacterRunAutomaton((new RegExp("...")).ToAutomaton()); Analyzer a = new MockAnalyzer(Random(), single, false); AssertAnalyzesTo(a, "foobar", new string[] { "foo", "bar" }, new int[] { 0, 3 }, new int[] { 3, 6 }); // make sure when last term is a "partial" match that End() is correct AssertTokenStreamContents(a.TokenStream("bogus", new StringReader("fooba")), new string[] { "foo" }, new int[] { 0 }, new int[] { 3 }, new int[] { 1 }, new int?(5)); CheckRandomData(Random(), a, 100); }
public virtual void TestTwoChars() { CharacterRunAutomaton single = new CharacterRunAutomaton((new RegExp("..")).ToAutomaton()); Analyzer a = new MockAnalyzer(Random(), single, false); AssertAnalyzesTo(a, "foobar", new string[] { "fo", "ob", "ar" }, new int[] { 0, 2, 4 }, new int[] { 2, 4, 6 }); // make sure when last term is a "partial" match that End() is correct AssertTokenStreamContents(a.TokenStream("bogus", new StringReader("fooba")), new string[] { "fo", "ob" }, new int[] { 0, 2 }, new int[] { 2, 4 }, new int[] { 1, 1 }, new int?(5)); CheckRandomData(Random(), a, 100); }
public virtual void TestForwardOffsets() { int num = AtLeast(10000); for (int i = 0; i < num; i++) { string s = TestUtil.RandomHtmlishString(Random(), 20); StringReader reader = new StringReader(s); MockCharFilter charfilter = new MockCharFilter(reader, 2); MockAnalyzer analyzer = new MockAnalyzer(Random()); Exception priorException = null; TokenStream ts = analyzer.TokenStream("bogus", charfilter.input); try { ts.Reset(); while (ts.IncrementToken()) { ; } ts.End(); } catch (Exception e) { priorException = e; } finally { IOUtils.CloseWhileHandlingException(priorException, ts); } } }
public virtual void TestRandomPhrases() { Directory dir = NewDirectory(); Analyzer analyzer = new MockAnalyzer(Random()); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetMergePolicy(NewLogMergePolicy())); IList<IList<string>> docs = new List<IList<string>>(); Documents.Document d = new Documents.Document(); Field f = NewTextField("f", "", Field.Store.NO); d.Add(f); Random r = Random(); int NUM_DOCS = AtLeast(10); for (int i = 0; i < NUM_DOCS; i++) { // must be > 4096 so it spans multiple chunks int termCount = TestUtil.NextInt(Random(), 4097, 8200); IList<string> doc = new List<string>(); StringBuilder sb = new StringBuilder(); while (doc.Count < termCount) { if (r.Next(5) == 1 || docs.Count == 0) { // make new non-empty-string term string term; while (true) { term = TestUtil.RandomUnicodeString(r); if (term.Length > 0) { break; } } IOException priorException = null; TokenStream ts = analyzer.TokenStream("ignore", new StringReader(term)); try { ICharTermAttribute termAttr = ts.AddAttribute<ICharTermAttribute>(); ts.Reset(); while (ts.IncrementToken()) { string text = termAttr.ToString(); doc.Add(text); sb.Append(text).Append(' '); } ts.End(); } catch (IOException e) { priorException = e; } finally { IOUtils.CloseWhileHandlingException(priorException, ts); } } else { // pick existing sub-phrase IList<string> lastDoc = docs[r.Next(docs.Count)]; int len = TestUtil.NextInt(r, 1, 10); int start = r.Next(lastDoc.Count - len); for (int k = start; k < start + len; k++) { string t = lastDoc[k]; doc.Add(t); sb.Append(t).Append(' '); } } } docs.Add(doc); f.StringValue = sb.ToString(); w.AddDocument(d); } IndexReader reader = w.Reader; IndexSearcher s = NewSearcher(reader); w.Dispose(); // now search int num = AtLeast(10); for (int i = 0; i < num; i++) { int docID = r.Next(docs.Count); IList<string> doc = docs[docID]; int numTerm = TestUtil.NextInt(r, 2, 20); int start = r.Next(doc.Count - numTerm); PhraseQuery pq = new PhraseQuery(); StringBuilder sb = new StringBuilder(); for (int t = start; t < start + numTerm; t++) { pq.Add(new Term("f", doc[t])); sb.Append(doc[t]).Append(' '); } TopDocs hits = s.Search(pq, NUM_DOCS); bool found = false; for (int j = 0; j < hits.ScoreDocs.Length; j++) { if (hits.ScoreDocs[j].Doc == docID) { found = true; break; } } Assert.IsTrue(found, "phrase '" + sb + "' not found; start=" + start); } reader.Dispose(); dir.Dispose(); }