public override bool accept(AttributeSource source)
	  {
		if (termAtt == null)
		{
		  termAtt = source.addAttribute(typeof(CharTermAttribute));
		}
		try
		{
		  DateTime date = dateFormat.parse(termAtt.ToString()); //We don't care about the date, just that we can parse it as a date
		  if (date != null)
		  {
			return true;
		  }
		}
		catch (ParseException)
		{

		}

		return false;
	  }
	  /// <summary>
	  /// Fills CharTermAttribute with the current token text.
	  /// </summary>
	  public void getText(CharTermAttribute t)
	  {
		t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead);
	  }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testRandom() throws Exception
        public virtual void testRandom()
        {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int alphabetSize = org.apache.lucene.util.TestUtil.nextInt(random(), 2, 7);
            int alphabetSize = TestUtil.Next(random(), 2, 7);

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int docLen = atLeast(3000);
            int docLen = atLeast(3000);
            //final int docLen = 50;

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String document = getRandomString('a', alphabetSize, docLen);
            string document = getRandomString('a', alphabetSize, docLen);

            if (VERBOSE)
            {
              Console.WriteLine("TEST: doc=" + document);
            }

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final int numSyn = atLeast(5);
            int numSyn = atLeast(5);
            //final int numSyn = 2;

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final java.util.Map<String,OneSyn> synMap = new java.util.HashMap<>();
            IDictionary<string, OneSyn> synMap = new Dictionary<string, OneSyn>();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final java.util.List<OneSyn> syns = new java.util.ArrayList<>();
            IList<OneSyn> syns = new List<OneSyn>();
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final boolean dedup = random().nextBoolean();
            bool dedup = random().nextBoolean();
            if (VERBOSE)
            {
              Console.WriteLine("  dedup=" + dedup);
            }
            b = new SynonymMap.Builder(dedup);
            for (int synIDX = 0;synIDX < numSyn;synIDX++)
            {
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String synIn = getRandomString('a', alphabetSize, org.apache.lucene.util.TestUtil.nextInt(random(), 1, 5)).trim();
              string synIn = getRandomString('a', alphabetSize, TestUtil.Next(random(), 1, 5)).Trim();
              OneSyn s = synMap[synIn];
              if (s == null)
              {
            s = new OneSyn();
            s.@in = synIn;
            syns.Add(s);
            s.@out = new List<>();
            synMap[synIn] = s;
            s.keepOrig = random().nextBoolean();
              }
            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String synOut = getRandomString('0', 10, org.apache.lucene.util.TestUtil.nextInt(random(), 1, 5)).trim();
              string synOut = getRandomString('0', 10, TestUtil.Next(random(), 1, 5)).Trim();
              [email protected](synOut);
              add(synIn, synOut, s.keepOrig);
              if (VERBOSE)
              {
            Console.WriteLine("  syns[" + synIDX + "] = " + s.@in + " -> " + s.@out + " keepOrig=" + s.keepOrig);
              }
            }

            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.reset();
            assertTrue(tokensIn.incrementToken());
            assertFalse(tokensIn.incrementToken());
            tokensIn.end();
            tokensIn.close();

            tokensOut = new SynonymFilter(tokensIn, b.build(), true);
            termAtt = tokensOut.addAttribute(typeof(CharTermAttribute));
            posIncrAtt = tokensOut.addAttribute(typeof(PositionIncrementAttribute));
            posLenAtt = tokensOut.addAttribute(typeof(PositionLengthAttribute));
            offsetAtt = tokensOut.addAttribute(typeof(OffsetAttribute));

            if (dedup)
            {
              pruneDups(syns);
            }

            //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
            //ORIGINAL LINE: final String expected = slowSynMatcher(document, syns, 5);
            string expected = slowSynMatcher(document, syns, 5);

            if (VERBOSE)
            {
              Console.WriteLine("TEST: expected=" + expected);
            }

            verify(document, expected);
        }
 private void assertEquals(CharTermAttribute term, string expected)
 {
     assertEquals(expected.Length, term.length());
     //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
     //ORIGINAL LINE: final char[] buffer = term.buffer();
     char[] buffer = term.buffer();
     for (int chIDX = 0;chIDX < expected.Length;chIDX++)
     {
       assertEquals(expected[chIDX], buffer[chIDX]);
     }
 }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testBasic2() throws Exception
        public virtual void testBasic2()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = false;
            add("aaa", "aaaa1 aaaa2 aaaa3", keepOrig);
            add("bbb", "bbbb1 bbbb2", keepOrig);
            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.reset();
            assertTrue(tokensIn.incrementToken());
            assertFalse(tokensIn.incrementToken());
            tokensIn.end();
            tokensIn.close();

            tokensOut = new SynonymFilter(tokensIn, b.build(), true);
            termAtt = tokensOut.addAttribute(typeof(CharTermAttribute));
            posIncrAtt = tokensOut.addAttribute(typeof(PositionIncrementAttribute));
            posLenAtt = tokensOut.addAttribute(typeof(PositionLengthAttribute));
            offsetAtt = tokensOut.addAttribute(typeof(OffsetAttribute));

            if (keepOrig)
            {
              verify("xyzzy bbb pot of gold", "xyzzy bbb/bbbb1 pot/bbbb2 of gold");
              verify("xyzzy aaa pot of gold", "xyzzy aaa/aaaa1 pot/aaaa2 of/aaaa3 gold");
            }
            else
            {
              verify("xyzzy bbb pot of gold", "xyzzy bbbb1 pot/bbbb2 of gold");
              verify("xyzzy aaa pot of gold", "xyzzy aaaa1 pot/aaaa2 of/aaaa3 gold");
            }
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testOutputHangsOffEnd() throws Exception
        public virtual void testOutputHangsOffEnd()
        {
            b = new SynonymMap.Builder(true);
            const bool keepOrig = false;
            // b hangs off the end (no input token under it):
            add("a", "a b", keepOrig);
            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.reset();
            assertTrue(tokensIn.incrementToken());
            assertFalse(tokensIn.incrementToken());
            tokensIn.end();
            tokensIn.close();

            tokensOut = new SynonymFilter(tokensIn, b.build(), true);
            termAtt = tokensOut.addAttribute(typeof(CharTermAttribute));
            posIncrAtt = tokensOut.addAttribute(typeof(PositionIncrementAttribute));
            offsetAtt = tokensOut.addAttribute(typeof(OffsetAttribute));
            posLenAtt = tokensOut.addAttribute(typeof(PositionLengthAttribute));

            // Make sure endOffset inherits from previous input token:
            verify("a", "a b:1");
        }
        //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
        //ORIGINAL LINE: public void testBasic() throws Exception
        public virtual void testBasic()
        {
            b = new SynonymMap.Builder(true);
            add("a", "foo", true);
            add("a b", "bar fee", true);
            add("b c", "dog collar", true);
            add("c d", "dog harness holder extras", true);
            add("m c e", "dog barks loudly", false);
            add("i j k", "feep", true);

            add("e f", "foo bar", false);
            add("e f", "baz bee", false);

            add("z", "boo", false);
            add("y", "bee", true);

            tokensIn = new MockTokenizer(new StringReader("a"), MockTokenizer.WHITESPACE, true);
            tokensIn.reset();
            assertTrue(tokensIn.incrementToken());
            assertFalse(tokensIn.incrementToken());
            tokensIn.end();
            tokensIn.close();

            tokensOut = new SynonymFilter(tokensIn, b.build(), true);
            termAtt = tokensOut.addAttribute(typeof(CharTermAttribute));
            posIncrAtt = tokensOut.addAttribute(typeof(PositionIncrementAttribute));
            posLenAtt = tokensOut.addAttribute(typeof(PositionLengthAttribute));
            offsetAtt = tokensOut.addAttribute(typeof(OffsetAttribute));

            verify("a b c", "a/bar b/fee c");

            // syn output extends beyond input tokens
            verify("x a b c d", "x a/bar b/fee c/dog d/harness holder extras");

            verify("a b a", "a/bar b/fee a/foo");

            // outputs that add to one another:
            verify("c d c d", "c/dog d/harness c/holder/dog d/extras/harness holder extras");

            // two outputs for same input
            verify("e f", "foo/baz bar/bee");

            // verify multi-word / single-output offsets:
            verify("g i j k g", "g i/feep:7_3 j k g");

            // mixed keepOrig true/false:
            verify("a m c e x", "a/foo dog barks loudly x");
            verify("c d m c e x", "c/dog d/harness holder/dog extras/barks loudly x");
            assertTrue(tokensOut.CaptureCount > 0);

            // no captureStates when no syns matched
            verify("p q r s t", "p q r s t");
            assertEquals(0, tokensOut.CaptureCount);

            // no captureStates when only single-input syns, w/ no
            // lookahead needed, matched
            verify("p q z y t", "p q boo y/bee t");
            assertEquals(0, tokensOut.CaptureCount);
        }
Esempio n. 8
0
		public InputWindowToken(ShingleFilter outerInstance, AttributeSource attSource)
		{
			this.outerInstance = outerInstance;
		  this.attSource = attSource;
		  this.termAtt = attSource.getAttribute(typeof(CharTermAttribute));
		  this.offsetAtt = attSource.getAttribute(typeof(OffsetAttribute));
		}
Esempio n. 9
0
        /*
         * Need to worry about multiple scenarios:
         *  - need to go for the longest match
         *    a b => foo      #shouldn't match if "a b" is followed by "c d"
         *    a b c d => bar
         *  - need to backtrack - retry matches for tokens already read
         *     a b c d => foo
         *       b c => bar
         *     If the input stream is "a b c x", one will consume "a b c d"
         *     trying to match the first rule... all but "a" should be
         *     pushed back so a match may be made on "b c".
         *  - don't try and match generated tokens (thus need separate queue)
         *    matching is not recursive.
         *  - handle optional generation of original tokens in all these cases,
         *    merging token streams to preserve token positions.
         *  - preserve original positionIncrement of first matched token
         */
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
        public override bool incrementToken()
        {
            while (true)
            {
                // if there are any generated tokens, return them... don't try any
                // matches against them, as we specifically don't want recursion.
//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
                if (replacement != null && replacement.hasNext())
                {
//JAVA TO C# CONVERTER TODO TASK: Java iterators are only converted within the context of 'while' and 'for' loops:
                    copy(this, replacement.next());
                    return(true);
                }

                // common case fast-path of first token not matching anything
                AttributeSource firstTok = nextTok();
                if (firstTok == null)
                {
                    return(false);
                }
                CharTermAttribute termAtt = firstTok.addAttribute(typeof(CharTermAttribute));
                SlowSynonymMap    result  = map.submap != null?map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;

                if (result == null)
                {
                    copy(this, firstTok);
                    return(true);
                }

                // fast-path failed, clone ourselves if needed
                if (firstTok == this)
                {
                    firstTok = cloneAttributes();
                }
                // OK, we matched a token, so find the longest match.

                matched = new LinkedList <>();

                result = match(result);

                if (result == null)
                {
                    // no match, simply return the first token read.
                    copy(this, firstTok);
                    return(true);
                }

                // reuse, or create new one each time?
                List <AttributeSource> generated = new List <AttributeSource>(result.synonyms.Length + matched.Count + 1);

                //
                // there was a match... let's generate the new tokens, merging
                // in the matched tokens (position increments need adjusting)
                //
                AttributeSource lastTok     = matched.Count == 0 ? firstTok : matched.Last.Value;
                bool            includeOrig = result.includeOrig();

                AttributeSource            origTok        = includeOrig ? firstTok : null;
                PositionIncrementAttribute firstPosIncAtt = firstTok.addAttribute(typeof(PositionIncrementAttribute));
                int origPos = firstPosIncAtt.PositionIncrement; // position of origTok in the original stream
                int repPos  = 0;                                // curr position in replacement token stream
                int pos     = 0;                                // current position in merged token stream

                for (int i = 0; i < result.synonyms.Length; i++)
                {
                    Token                      repTok       = result.synonyms[i];
                    AttributeSource            newTok       = firstTok.cloneAttributes();
                    CharTermAttribute          newTermAtt   = newTok.addAttribute(typeof(CharTermAttribute));
                    OffsetAttribute            newOffsetAtt = newTok.addAttribute(typeof(OffsetAttribute));
                    PositionIncrementAttribute newPosIncAtt = newTok.addAttribute(typeof(PositionIncrementAttribute));

                    OffsetAttribute lastOffsetAtt = lastTok.addAttribute(typeof(OffsetAttribute));

                    newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
                    newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
                    repPos += repTok.PositionIncrement;
                    if (i == 0)     // make position of first token equal to original
                    {
                        repPos = origPos;
                    }

                    // if necessary, insert original tokens and adjust position increment
                    while (origTok != null && origPos <= repPos)
                    {
                        PositionIncrementAttribute origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
                        origPosInc.PositionIncrement = origPos - pos;
                        generated.Add(origTok);
                        pos    += origPosInc.PositionIncrement;
                        origTok = matched.Count == 0 ? null : matched.RemoveFirst();
                        if (origTok != null)
                        {
                            origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
                            origPos   += origPosInc.PositionIncrement;
                        }
                    }

                    newPosIncAtt.PositionIncrement = repPos - pos;
                    generated.Add(newTok);
                    pos += newPosIncAtt.PositionIncrement;
                }

                // finish up any leftover original tokens
                while (origTok != null)
                {
                    PositionIncrementAttribute origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
                    origPosInc.PositionIncrement = origPos - pos;
                    generated.Add(origTok);
                    pos    += origPosInc.PositionIncrement;
                    origTok = matched.Count == 0 ? null : matched.RemoveFirst();
                    if (origTok != null)
                    {
                        origPosInc = origTok.addAttribute(typeof(PositionIncrementAttribute));
                        origPos   += origPosInc.PositionIncrement;
                    }
                }

                // what if we replaced a longer sequence with a shorter one?
                // a/0 b/5 =>  foo/0
                // should I re-create the gap on the next buffered token?

                replacement = generated.GetEnumerator();
                // Now return to the top of the loop to read and return the first
                // generated token.. The reason this is done is that we may have generated
                // nothing at all, and may need to continue with more matching logic.
            }
        }
Esempio n. 10
0
            public override bool accept(AttributeSource a)
            {
                CharTermAttribute termAtt = a.getAttribute(typeof(CharTermAttribute));

                return(termAtt.ToString().Equals("Dogs", StringComparison.CurrentCultureIgnoreCase));
            }
Esempio n. 11
0
        /// <summary>
        /// Not an explicit test, just useful to print out some info on performance
        /// </summary>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void performance() throws Exception
        public virtual void performance()
        {
            int[] tokCount  = new int[] { 100, 500, 1000, 2000, 5000, 10000 };
            int[] modCounts = new int[] { 1, 2, 5, 10, 20, 50, 100, 200, 500 };
            for (int k = 0; k < tokCount.Length; k++)
            {
                StringBuilder buffer = new StringBuilder();
                Console.WriteLine("-----Tokens: " + tokCount[k] + "-----");
                for (int i = 0; i < tokCount[k]; i++)
                {
                    buffer.Append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).Append(' ');
                }
                //make sure we produce the same tokens
                TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))));
                TokenStream        sink      = teeStream.newSinkTokenStream(new ModuloSinkFilter(this, 100));
                teeStream.consumeAllTokens();
                TokenStream       stream  = new ModuloTokenFilter(this, new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))), 100);
                CharTermAttribute tfTok   = stream.addAttribute(typeof(CharTermAttribute));
                CharTermAttribute sinkTok = sink.addAttribute(typeof(CharTermAttribute));
                for (int i = 0; stream.incrementToken(); i++)
                {
                    assertTrue(sink.incrementToken());
                    assertTrue(tfTok + " is not equal to " + sinkTok + " at token: " + i, tfTok.Equals(sinkTok) == true);
                }

                //simulate two fields, each being analyzed once, for 20 documents
                for (int j = 0; j < modCounts.Length; j++)
                {
                    int  tfPos = 0;
                    long start = DateTimeHelperClass.CurrentUnixTimeMillis();
                    for (int i = 0; i < 20; i++)
                    {
                        stream = new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString())));
                        PositionIncrementAttribute posIncrAtt = stream.getAttribute(typeof(PositionIncrementAttribute));
                        while (stream.incrementToken())
                        {
                            tfPos += posIncrAtt.PositionIncrement;
                        }
                        stream     = new ModuloTokenFilter(this, new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))), modCounts[j]);
                        posIncrAtt = stream.getAttribute(typeof(PositionIncrementAttribute));
                        while (stream.incrementToken())
                        {
                            tfPos += posIncrAtt.PositionIncrement;
                        }
                    }
                    long finish = DateTimeHelperClass.CurrentUnixTimeMillis();
                    Console.WriteLine("ModCount: " + modCounts[j] + " Two fields took " + (finish - start) + " ms");
                    int sinkPos = 0;
                    //simulate one field with one sink
                    start = DateTimeHelperClass.CurrentUnixTimeMillis();
                    for (int i = 0; i < 20; i++)
                    {
                        teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.ToString()))));
                        sink      = teeStream.newSinkTokenStream(new ModuloSinkFilter(this, modCounts[j]));
                        PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(typeof(PositionIncrementAttribute));
                        while (teeStream.incrementToken())
                        {
                            sinkPos += posIncrAtt.PositionIncrement;
                        }
                        //System.out.println("Modulo--------");
                        posIncrAtt = sink.getAttribute(typeof(PositionIncrementAttribute));
                        while (sink.incrementToken())
                        {
                            sinkPos += posIncrAtt.PositionIncrement;
                        }
                    }
                    finish = DateTimeHelperClass.CurrentUnixTimeMillis();
                    Console.WriteLine("ModCount: " + modCounts[j] + " Tee fields took " + (finish - start) + " ms");
                    assertTrue(sinkPos + " does not equal: " + tfPos, sinkPos == tfPos);
                }
                Console.WriteLine("- End Tokens: " + tokCount[k] + "-----");
            }
        }