/// <summary> /// Create a new MockTokenFilter. /// </summary> /// <param name="input"> TokenStream to filter </param> /// <param name="filter"> DFA representing the terms that should be removed. </param> public MockTokenFilter(TokenStream input, CharacterRunAutomaton filter) : base(input) { this.Filter = filter; TermAtt = AddAttribute<ICharTermAttribute>(); PosIncrAtt = AddAttribute<IPositionIncrementAttribute>(); }
public virtual void TestStartPositions() { Directory dir = NewDirectory(); // mimic StopAnalyzer CharacterRunAutomaton stopSet = new CharacterRunAutomaton((new RegExp("the|a|of")).ToAutomaton()); Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, analyzer); Document doc = new Document(); doc.Add(NewTextField("field", "the quick brown fox", Field.Store.NO)); writer.AddDocument(doc); Document doc2 = new Document(); doc2.Add(NewTextField("field", "quick brown fox", Field.Store.NO)); writer.AddDocument(doc2); IndexReader reader = writer.Reader; IndexSearcher searcher = NewSearcher(reader); // user queries on "starts-with quick" SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1); Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits); // user queries on "starts-with the quick" SpanQuery include = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 2); sfq = new SpanNotQuery(include, sfq); Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
/// <summary> /// Creates a new MockAnalyzer. /// </summary> /// <param name="random"> Random for payloads behavior </param> /// <param name="runAutomaton"> DFA describing how tokenization should happen (e.g. [a-zA-Z]+) </param> /// <param name="lowerCase"> true if the tokenizer should lowercase terms </param> /// <param name="filter"> DFA describing how terms should be filtered (set of stopwords, etc) </param> public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, bool lowerCase, CharacterRunAutomaton filter) : base(PER_FIELD_REUSE_STRATEGY) { // TODO: this should be solved in a different way; Random should not be shared (!). this.Random = new Random(random.Next()); this.RunAutomaton = runAutomaton; this.LowerCase = lowerCase; this.Filter = filter; }
private readonly Random random = new Random(LuceneTestCase.Random.Next() /*RandomizedContext.Current.Random.nextLong()*/); // LUCENENET TODO: Random seed synchronization public MockTokenizer(AttributeFactory factory, TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase, int maxTokenLength) : base(factory, input) { this.runAutomaton = runAutomaton; this.lowerCase = lowerCase; this.state = runAutomaton.InitialState; this.streamState = State.SETREADER; this.maxTokenLength = maxTokenLength; termAtt = AddAttribute <ICharTermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); }
private void InitializeInstanceFields() { RunAutomaton = new CharacterRunAutomaton(OuterInstance.Automaton); }
public void BeforeClass() { Random random = Random(); Directory = NewDirectory(); Stopword = "" + RandomChar(); CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.MakeString(Stopword)); Analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset); RandomIndexWriter iw = new RandomIndexWriter(random, Directory, Analyzer); Document doc = new Document(); Field id = new StringField("id", "", Field.Store.NO); Field field = new TextField("field", "", Field.Store.NO); doc.Add(id); doc.Add(field); // index some docs int numDocs = AtLeast(1000); for (int i = 0; i < numDocs; i++) { id.StringValue = Convert.ToString(i); field.StringValue = RandomFieldContents(); iw.AddDocument(doc); } // delete some docs int numDeletes = numDocs / 20; for (int i = 0; i < numDeletes; i++) { Term toDelete = new Term("id", Convert.ToString(random.Next(numDocs))); if (random.NextBoolean()) { iw.DeleteDocuments(toDelete); } else { iw.DeleteDocuments(new TermQuery(toDelete)); } } Reader = iw.Reader; S1 = NewSearcher(Reader); S2 = NewSearcher(Reader); iw.Dispose(); }
public AnalyzerAnonymousInnerClassHelper2(TestMockAnalyzer outerInstance, CharacterRunAutomaton dfa, bool lowercase, int limit) { this.OuterInstance = outerInstance; this.Dfa = dfa; this.Lowercase = lowercase; this.Limit = limit; }
public virtual void TestUppercase() { CharacterRunAutomaton single = new CharacterRunAutomaton((new RegExp("[A-Z][a-z]*")).ToAutomaton()); Analyzer a = new MockAnalyzer(Random(), single, false); AssertAnalyzesTo(a, "FooBarBAZ", new string[] { "Foo", "Bar", "B", "A", "Z" }, new int[] { 0, 3, 6, 7, 8 }, new int[] { 3, 6, 7, 8, 9 }); AssertAnalyzesTo(a, "aFooBar", new string[] { "Foo", "Bar" }, new int[] { 1, 4 }, new int[] { 4, 7 }); CheckRandomData(Random(), a, 100); }
public virtual void TestPhraseQueryPositionIncrements() { PhraseQuery expected = new PhraseQuery(); expected.Add(new Term("field", "1")); expected.Add(new Term("field", "2"), 2); CharacterRunAutomaton stopList = new CharacterRunAutomaton((new RegExp("[sS][tT][oO][pP]")).ToAutomaton()); Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false, stopList); QueryBuilder builder = new QueryBuilder(analyzer); Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "1 stop 2")); }
public MockTokenizer(TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase) : this(input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH) { }
public virtual void TestLength() { CharacterRunAutomaton length5 = new CharacterRunAutomaton((new RegExp(".{5,}")).ToAutomaton()); Analyzer a = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, true, length5); AssertAnalyzesTo(a, "ok toolong fine notfine", new string[] { "ok", "fine" }, new int[] { 1, 2 }); }
public virtual void TestKeep() { CharacterRunAutomaton keepWords = new CharacterRunAutomaton(BasicOperations.Complement(Automaton.Union(Arrays.AsList(BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar"))))); Analyzer a = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, keepWords); AssertAnalyzesTo(a, "quick foo brown bar bar fox foo", new string[] { "foo", "bar", "bar", "foo" }, new int[] { 2, 2, 1, 2 }); }
public MockTokenizer(AttributeFactory factory, TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase, int maxTokenLength) : base(factory, input) { this.RunAutomaton = runAutomaton; this.LowerCase = lowerCase; this.state = runAutomaton.InitialState; this.StreamState = State.SETREADER; this.MaxTokenLength = maxTokenLength; TermAtt = AddAttribute<ICharTermAttribute>(); OffsetAtt = AddAttribute<IOffsetAttribute>(); }
public MockTokenizer(AttributeFactory factory, StreamReader input, CharacterRunAutomaton runAutomaton, bool lowerCase) : this(factory, input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH) { }
public MockTokenizer(TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase, int maxTokenLength) : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, runAutomaton, lowerCase, maxTokenLength) { }
public virtual void TestRandomRegexps() { int iters = AtLeast(30); for (int i = 0; i < iters; i++) { CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.RandomAutomaton(Random())); bool lowercase = Random().NextBoolean(); int limit = TestUtil.NextInt(Random(), 0, 500); Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dfa, lowercase, limit); CheckRandomData(Random(), a, 100); a.Dispose(); } }
public MockTokenizer(AttributeFactory factory, TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase) : this(factory, input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH) { }
public virtual void TestSingleChar() { var single = new CharacterRunAutomaton((new RegExp(".")).ToAutomaton()); Analyzer a = new MockAnalyzer(Random(), single, false); AssertAnalyzesTo(a, "foobar", new[] { "f", "o", "o", "b", "a", "r" }, new[] { 0, 1, 2, 3, 4, 5 }, new[] { 1, 2, 3, 4, 5, 6 }); CheckRandomData(Random(), a, 100); }
/// <summary> /// Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton) /// MockAnalyzer(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false}). /// </summary> public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, bool lowerCase) : this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET) { }
public virtual void TestTwoChars() { CharacterRunAutomaton single = new CharacterRunAutomaton((new RegExp("..")).ToAutomaton()); Analyzer a = new MockAnalyzer(Random(), single, false); AssertAnalyzesTo(a, "foobar", new string[] { "fo", "ob", "ar" }, new int[] { 0, 2, 4 }, new int[] { 2, 4, 6 }); // make sure when last term is a "partial" match that End() is correct AssertTokenStreamContents(a.TokenStream("bogus", new StringReader("fooba")), new string[] { "fo", "ob" }, new int[] { 0, 2 }, new int[] { 2, 4 }, new int[] { 1, 1 }, new int?(5)); CheckRandomData(Random(), a, 100); }