Ejemplo n.º 1
0
        /*
         * This shows how to construct a phrase query containing shingles.
         */
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void testShingleAnalyzerWrapperPhraseQuery() throws Exception
        public virtual void testShingleAnalyzerWrapperPhraseQuery()
        {
            PhraseQuery q = new PhraseQuery();

            TokenStream ts = analyzer.tokenStream("content", "this sentence");

            try
            {
                int j = -1;

                PositionIncrementAttribute posIncrAtt = ts.addAttribute(typeof(PositionIncrementAttribute));
                CharTermAttribute          termAtt    = ts.addAttribute(typeof(CharTermAttribute));

                ts.reset();
                while (ts.incrementToken())
                {
                    j += posIncrAtt.PositionIncrement;
                    string termText = termAtt.ToString();
                    q.add(new Term("content", termText), j);
                }
                ts.end();
            }
            finally
            {
                IOUtils.closeWhileHandlingException(ts);
            }

            ScoreDoc[] hits  = searcher.search(q, null, 1000).scoreDocs;
            int[]      ranks = new int[] { 0 };
            compareRanks(hits, ranks);
        }
Ejemplo n.º 2
0
 public PhoneticFilter(TokenStream input, Encoder encoder, string name, bool inject) : base(input)
 {
     this.encoder = encoder;
     this.name    = name;
     this.inject  = inject;
     this.termAtt = (TermAttribute)addAttribute(typeof(TermAttribute));
     this.posAtt  = (PositionIncrementAttribute)addAttribute(typeof(PositionIncrementAttribute));
 }
Ejemplo n.º 3
0
        /**
         * Converts the original query string to a collection of Lucene Tokens.
         * @param original the original query string
         * @return a Collection of Lucene Tokens
         */
        public override Collection /*<Token>*/ convert(string original)
        {
            if (original == null) // this can happen with q.alt = and no query
            {
                return(Collections.emptyList());
            }
            Collection /*<Token>*/ result = new ArrayList/*<Token>*/ ();
            //TODO: Extract the words using a simple regex, but not query stuff, and then analyze them to produce the token stream
            Matcher     matcher = QUERY_REGEX.matcher(original);
            TokenStream stream;

            while (matcher.find())
            {
                string word = matcher.group(0);
                if (word.Equals("AND") == false && word.Equals("OR") == false)
                {
                    try {
                        stream = analyzer.reusableTokenStream("", new StringReader(word));
                        // TODO: support custom attributes
                        TermAttribute              termAtt    = (TermAttribute)stream.addAttribute(typeof(TermAttribute));
                        FlagsAttribute             flagsAtt   = (FlagsAttribute)stream.addAttribute(typeof(FlagsAttribute));
                        TypeAttribute              typeAtt    = (TypeAttribute)stream.addAttribute(typeof(TypeAttribute));
                        PayloadAttribute           payloadAtt = (PayloadAttribute)stream.addAttribute(typeof(PayloadAttribute));
                        PositionIncrementAttribute posIncAtt  = (PositionIncrementAttribute)stream.addAttribute(typeof(PositionIncrementAttribute));
                        stream.reset();
                        while (stream.incrementToken())
                        {
                            Token token = new Token();
                            token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
                            token.setStartOffset(matcher.start());
                            token.setEndOffset(matcher.end());
                            token.setFlags(flagsAtt.getFlags());
                            token.setType(typeAtt.type());
                            token.setPayload(payloadAtt.getPayload());
                            token.setPositionIncrement(posIncAtt.getPositionIncrement());
                            result.add(token);
                        }
                    }
#pragma warning disable 168
                    catch (IOException e)
                    {
                    }
#pragma warning restore 168
                }
            }
            return(result);
        }
Ejemplo n.º 4
0
        public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
        {
            this.suffix     = suffix;
            this.prefix     = prefix;
            prefixExhausted = false;

            termAtt    = addAttribute(typeof(CharTermAttribute));
            posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
            payloadAtt = addAttribute(typeof(PayloadAttribute));
            offsetAtt  = addAttribute(typeof(OffsetAttribute));
            typeAtt    = addAttribute(typeof(TypeAttribute));
            flagsAtt   = addAttribute(typeof(FlagsAttribute));

            p_termAtt    = prefix.addAttribute(typeof(CharTermAttribute));
            p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute));
            p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute));
            p_offsetAtt  = prefix.addAttribute(typeof(OffsetAttribute));
            p_typeAtt    = prefix.addAttribute(typeof(TypeAttribute));
            p_flagsAtt   = prefix.addAttribute(typeof(FlagsAttribute));
        }
Ejemplo n.º 5
0
	  public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) : base(suffix)
	  {
		this.suffix = suffix;
		this.prefix = prefix;
		prefixExhausted = false;

		termAtt = addAttribute(typeof(CharTermAttribute));
		posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
		payloadAtt = addAttribute(typeof(PayloadAttribute));
		offsetAtt = addAttribute(typeof(OffsetAttribute));
		typeAtt = addAttribute(typeof(TypeAttribute));
		flagsAtt = addAttribute(typeof(FlagsAttribute));

		p_termAtt = prefix.addAttribute(typeof(CharTermAttribute));
		p_posIncrAtt = prefix.addAttribute(typeof(PositionIncrementAttribute));
		p_payloadAtt = prefix.addAttribute(typeof(PayloadAttribute));
		p_offsetAtt = prefix.addAttribute(typeof(OffsetAttribute));
		p_typeAtt = prefix.addAttribute(typeof(TypeAttribute));
		p_flagsAtt = prefix.addAttribute(typeof(FlagsAttribute));
	  }
 public TokenStreamAnonymousInnerClassHelper(TestRemoveDuplicatesTokenFilter outerInstance, IEnumerator<Token> toks)
 {
     this.outerInstance = outerInstance;
       this.toks = toks;
       termAtt = addAttribute(typeof(CharTermAttribute));
       offsetAtt = addAttribute(typeof(OffsetAttribute));
       posIncAtt = addAttribute(typeof(PositionIncrementAttribute));
 }
Ejemplo n.º 7
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: static void testNGrams(int minGram, int maxGram, String s, final String nonTokenChars, boolean edgesOnly) throws java.io.IOException
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
        internal static void testNGrams(int minGram, int maxGram, string s, string nonTokenChars, bool edgesOnly)
        {
            // convert the string to code points
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] codePoints = toCodePoints(s);
            int[] codePoints = toCodePoints(s);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final int[] offsets = new int[codePoints.length + 1];
            int[] offsets = new int[codePoints.Length + 1];
            for (int i = 0; i < codePoints.Length; ++i)
            {
                offsets[i + 1] = offsets[i] + char.charCount(codePoints[i]);
            }
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.TokenStream grams = new NGramTokenizer(TEST_VERSION_CURRENT, new java.io.StringReader(s), minGram, maxGram, edgesOnly)
            TokenStream grams = new NGramTokenizerAnonymousInnerClassHelper(TEST_VERSION_CURRENT, new StringReader(s), minGram, maxGram, edgesOnly, nonTokenChars);
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.CharTermAttribute termAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);
            CharTermAttribute termAtt = grams.addAttribute(typeof(CharTermAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute posIncAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class);
            PositionIncrementAttribute posIncAtt = grams.addAttribute(typeof(PositionIncrementAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute posLenAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute.class);
            PositionLengthAttribute posLenAtt = grams.addAttribute(typeof(PositionLengthAttribute));
//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
//ORIGINAL LINE: final org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsetAtt = grams.addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class);
            OffsetAttribute offsetAtt = grams.addAttribute(typeof(OffsetAttribute));

            grams.reset();
            for (int start = 0; start < codePoints.Length; ++start)
            {
                for (int end = start + minGram; end <= start + maxGram && end <= codePoints.Length; ++end)
                {
                    if (edgesOnly && start > 0 && isTokenChar(nonTokenChars, codePoints[start - 1]))
                    {
                        // not on an edge
                        goto nextGramContinue;
                    }
                    for (int j = start; j < end; ++j)
                    {
                        if (!isTokenChar(nonTokenChars, codePoints[j]))
                        {
                            goto nextGramContinue;
                        }
                    }
                    assertTrue(grams.incrementToken());
                    assertArrayEquals(Arrays.copyOfRange(codePoints, start, end), toCodePoints(termAtt));
                    assertEquals(1, posIncAtt.PositionIncrement);
                    assertEquals(1, posLenAtt.PositionLength);
                    assertEquals(offsets[start], offsetAtt.startOffset());
                    assertEquals(offsets[end], offsetAtt.endOffset());
                    nextGramContinue :;
                }
                nextGramBreak :;
            }
            assertFalse(grams.incrementToken());
            grams.end();
            assertEquals(s.Length, offsetAtt.startOffset());
            assertEquals(s.Length, offsetAtt.endOffset());
        }