Example #1
0
 /// <summary>
 /// Create a new MockTokenFilter.
 /// </summary>
 /// <param name="input"> TokenStream to filter </param>
 /// <param name="filter"> DFA representing the terms that should be removed. </param>
 public MockTokenFilter(TokenStream input, CharacterRunAutomaton filter)
     : base(input)
 {
     this.Filter = filter;
     TermAtt = AddAttribute<ICharTermAttribute>();
     PosIncrAtt = AddAttribute<IPositionIncrementAttribute>();
 }
        public virtual void TestStartPositions()
        {
            Directory dir = NewDirectory();

            // mimic StopAnalyzer
            CharacterRunAutomaton stopSet = new CharacterRunAutomaton((new RegExp("the|a|of")).ToAutomaton());
            Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, stopSet);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, analyzer);
            Document doc = new Document();
            doc.Add(NewTextField("field", "the quick brown fox", Field.Store.NO));
            writer.AddDocument(doc);
            Document doc2 = new Document();
            doc2.Add(NewTextField("field", "quick brown fox", Field.Store.NO));
            writer.AddDocument(doc2);

            IndexReader reader = writer.Reader;
            IndexSearcher searcher = NewSearcher(reader);

            // user queries on "starts-with quick"
            SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1);
            Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits);

            // user queries on "starts-with the quick"
            SpanQuery include = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 2);
            sfq = new SpanNotQuery(include, sfq);
            Assert.AreEqual(1, searcher.Search(sfq, 10).TotalHits);

            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
Example #3
0
 /// <summary>
 /// Creates a new MockAnalyzer.
 /// </summary>
 /// <param name="random"> Random for payloads behavior </param>
 /// <param name="runAutomaton"> DFA describing how tokenization should happen (e.g. [a-zA-Z]+) </param>
 /// <param name="lowerCase"> true if the tokenizer should lowercase terms </param>
 /// <param name="filter"> DFA describing how terms should be filtered (set of stopwords, etc) </param>
 public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, bool lowerCase, CharacterRunAutomaton filter)
     : base(PER_FIELD_REUSE_STRATEGY)
 {
     // TODO: this should be solved in a different way; Random should not be shared (!).
     this.Random = new Random(random.Next());
     this.RunAutomaton = runAutomaton;
     this.LowerCase = lowerCase;
     this.Filter = filter;
 }
Example #4
0
        private readonly Random random = new Random(LuceneTestCase.Random.Next() /*RandomizedContext.Current.Random.nextLong()*/); // LUCENENET TODO: Random seed synchronization

        public MockTokenizer(AttributeFactory factory, TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase, int maxTokenLength)
            : base(factory, input)
        {
            this.runAutomaton   = runAutomaton;
            this.lowerCase      = lowerCase;
            this.state          = runAutomaton.InitialState;
            this.streamState    = State.SETREADER;
            this.maxTokenLength = maxTokenLength;
            termAtt             = AddAttribute <ICharTermAttribute>();
            offsetAtt           = AddAttribute <IOffsetAttribute>();
        }
Example #5
0
 private void InitializeInstanceFields()
 {
     RunAutomaton = new CharacterRunAutomaton(OuterInstance.Automaton);
 }
        public void BeforeClass()
        {
            Random random = Random();
            Directory = NewDirectory();
            Stopword = "" + RandomChar();
            CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.MakeString(Stopword));
            Analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
            RandomIndexWriter iw = new RandomIndexWriter(random, Directory, Analyzer);
            Document doc = new Document();
            Field id = new StringField("id", "", Field.Store.NO);
            Field field = new TextField("field", "", Field.Store.NO);
            doc.Add(id);
            doc.Add(field);

            // index some docs
            int numDocs = AtLeast(1000);
            for (int i = 0; i < numDocs; i++)
            {
                id.StringValue = Convert.ToString(i);
                field.StringValue = RandomFieldContents();
                iw.AddDocument(doc);
            }

            // delete some docs
            int numDeletes = numDocs / 20;
            for (int i = 0; i < numDeletes; i++)
            {
                Term toDelete = new Term("id", Convert.ToString(random.Next(numDocs)));
                if (random.NextBoolean())
                {
                    iw.DeleteDocuments(toDelete);
                }
                else
                {
                    iw.DeleteDocuments(new TermQuery(toDelete));
                }
            }

            Reader = iw.Reader;
            S1 = NewSearcher(Reader);
            S2 = NewSearcher(Reader);
            iw.Dispose();
        }
 public AnalyzerAnonymousInnerClassHelper2(TestMockAnalyzer outerInstance, CharacterRunAutomaton dfa, bool lowercase, int limit)
 {
     this.OuterInstance = outerInstance;
     this.Dfa = dfa;
     this.Lowercase = lowercase;
     this.Limit = limit;
 }
 public virtual void TestUppercase()
 {
     CharacterRunAutomaton single = new CharacterRunAutomaton((new RegExp("[A-Z][a-z]*")).ToAutomaton());
     Analyzer a = new MockAnalyzer(Random(), single, false);
     AssertAnalyzesTo(a, "FooBarBAZ", new string[] { "Foo", "Bar", "B", "A", "Z" }, new int[] { 0, 3, 6, 7, 8 }, new int[] { 3, 6, 7, 8, 9 });
     AssertAnalyzesTo(a, "aFooBar", new string[] { "Foo", "Bar" }, new int[] { 1, 4 }, new int[] { 4, 7 });
     CheckRandomData(Random(), a, 100);
 }
Example #9
0
        public virtual void TestPhraseQueryPositionIncrements()
        {
            PhraseQuery expected = new PhraseQuery();
            expected.Add(new Term("field", "1"));
            expected.Add(new Term("field", "2"), 2);

            CharacterRunAutomaton stopList = new CharacterRunAutomaton((new RegExp("[sS][tT][oO][pP]")).ToAutomaton());

            Analyzer analyzer = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false, stopList);

            QueryBuilder builder = new QueryBuilder(analyzer);
            Assert.AreEqual(expected, builder.CreatePhraseQuery("field", "1 stop 2"));
        }
Example #10
0
 public MockTokenizer(TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase)
     : this(input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH)
 {
 }
 public virtual void TestLength()
 {
     CharacterRunAutomaton length5 = new CharacterRunAutomaton((new RegExp(".{5,}")).ToAutomaton());
     Analyzer a = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, true, length5);
     AssertAnalyzesTo(a, "ok toolong fine notfine", new string[] { "ok", "fine" }, new int[] { 1, 2 });
 }
 public virtual void TestKeep()
 {
     CharacterRunAutomaton keepWords = new CharacterRunAutomaton(BasicOperations.Complement(Automaton.Union(Arrays.AsList(BasicAutomata.MakeString("foo"), BasicAutomata.MakeString("bar")))));
     Analyzer a = new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true, keepWords);
     AssertAnalyzesTo(a, "quick foo brown bar bar fox foo", new string[] { "foo", "bar", "bar", "foo" }, new int[] { 2, 2, 1, 2 });
 }
Example #13
0
 public MockTokenizer(AttributeFactory factory, TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase, int maxTokenLength)
     : base(factory, input)
 {
     this.RunAutomaton = runAutomaton;
     this.LowerCase = lowerCase;
     this.state = runAutomaton.InitialState;
     this.StreamState = State.SETREADER;
     this.MaxTokenLength = maxTokenLength;
     TermAtt = AddAttribute<ICharTermAttribute>();
     OffsetAtt = AddAttribute<IOffsetAttribute>();
 }
Example #14
0
 public MockTokenizer(AttributeFactory factory, StreamReader input, CharacterRunAutomaton runAutomaton, bool lowerCase)
     : this(factory, input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH)
 {
 }
Example #15
0
 public MockTokenizer(TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase)
     : this(input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH)
 {
 }
Example #16
0
 public MockTokenizer(TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase, int maxTokenLength)
     : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, runAutomaton, lowerCase, maxTokenLength)
 {
 }
Example #17
0
 public MockTokenizer(TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase, int maxTokenLength)
     : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, runAutomaton, lowerCase, maxTokenLength)
 {
 }
 public virtual void TestRandomRegexps()
 {
     int iters = AtLeast(30);
     for (int i = 0; i < iters; i++)
     {
         CharacterRunAutomaton dfa = new CharacterRunAutomaton(AutomatonTestUtil.RandomAutomaton(Random()));
         bool lowercase = Random().NextBoolean();
         int limit = TestUtil.NextInt(Random(), 0, 500);
         Analyzer a = new AnalyzerAnonymousInnerClassHelper2(this, dfa, lowercase, limit);
         CheckRandomData(Random(), a, 100);
         a.Dispose();
     }
 }
Example #19
0
 public MockTokenizer(AttributeFactory factory, TextReader input, CharacterRunAutomaton runAutomaton, bool lowerCase)
     : this(factory, input, runAutomaton, lowerCase, DEFAULT_MAX_TOKEN_LENGTH)
 {
 }
 public virtual void TestSingleChar()
 {
     var single = new CharacterRunAutomaton((new RegExp(".")).ToAutomaton());
     Analyzer a = new MockAnalyzer(Random(), single, false);
     AssertAnalyzesTo(a, "foobar", new[] { "f", "o", "o", "b", "a", "r" }, new[] { 0, 1, 2, 3, 4, 5 }, new[] { 1, 2, 3, 4, 5, 6 });
     CheckRandomData(Random(), a, 100);
 }
Example #21
0
 /// <summary>
 /// Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton)
 /// MockAnalyzer(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false}).
 /// </summary>
 public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, bool lowerCase)
     : this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET)
 {
 }
 public virtual void TestTwoChars()
 {
     CharacterRunAutomaton single = new CharacterRunAutomaton((new RegExp("..")).ToAutomaton());
     Analyzer a = new MockAnalyzer(Random(), single, false);
     AssertAnalyzesTo(a, "foobar", new string[] { "fo", "ob", "ar" }, new int[] { 0, 2, 4 }, new int[] { 2, 4, 6 });
     // make sure when last term is a "partial" match that End() is correct
     AssertTokenStreamContents(a.TokenStream("bogus", new StringReader("fooba")), new string[] { "fo", "ob" }, new int[] { 0, 2 }, new int[] { 2, 4 }, new int[] { 1, 1 }, new int?(5));
     CheckRandomData(Random(), a, 100);
 }