Beispiel #1
0
        public override Lucene.Net.Analysis.Token Next()
        {
            if (parts != null)
            {
                if (++parts_index < parts.Length)
                {
                    string part = parts [parts_index];
                    Lucene.Net.Analysis.Token part_token;
                    // FIXME: Searching for google.com will not match www.google.com.
                    // If we decide to allow google-style "abcd.1234" which means
                    // "abcd 1234" as a consequtive phrase, then adjusting
                    // the startOffset and endOffset would enable matching
                    // google.com to www.google.com
                    int start_offset = (parts_index == 0 && token_type == tokentype_email ?
                                        0 :
                                        last_end_offset + 1);         // assuming only one separator
                    int end_offset = start_offset + part.Length;
                    part_token = new Lucene.Net.Analysis.Token(part,
                                                               start_offset,
                                                               end_offset,
                                                               token_type);
                    part_token.SetPositionIncrement(0);
                    last_end_offset = (parts_index == 0 && token_type == tokentype_email ?
                                       -1 :
                                       end_offset);          // assuming only one separator
                    return(part_token);
                }
                else
                {
                    // clear the array
                    parts           = null;
                    parts_index     = -1;
                    last_end_offset = -1;
                    token_type      = null;
                }
            }

            Token token;

            while ((token = token_stream.Next()) != null)
            {
                //Console.WriteLine ("Found token: [{0}]", token.TermText ());
                if (ProcessToken(ref token))
                {
                    return(token);
                }
            }
            return(null);
        }
Beispiel #2
0
 public override Token Next()
 {
     Token token = input.Next();
     if (token == null)
         return null;
     string result = null;
     try
     {
         result = stemmer.Stem(token.TermText());
     }
     catch (Exception e)
     {
         throw new System.SystemException(e.Message, e);
     }
     Token newToken = new Token(result, token.StartOffset(), token.EndOffset(), token.Type());
     newToken.SetPositionIncrement(token.GetPositionIncrement());
     return newToken;
 }
Beispiel #3
0
        /// <summary> Returns the next input Token whose termText() is not a stop word.</summary>
        public override Token Next(Token result)
        {
            // return the first non-stop word found
            int skippedPositions = 0;

            while ((result = input.Next(result)) != null)
            {
                if (!stopWords.Contains(result.TermBuffer(), 0, result.termLength))
                {
                    if (enablePositionIncrements)
                    {
                        result.SetPositionIncrement(result.GetPositionIncrement() + skippedPositions);
                    }
                    return(result);
                }
                skippedPositions += result.GetPositionIncrement();
            }
            // reached EOS -- return null
            return(null);
        }
        /// <summary> Returns the next input Token whose term() is not a stop word.</summary>
        public override Token Next(/* in */ Token reusableToken)
        {
            System.Diagnostics.Debug.Assert(reusableToken != null);
            // return the first non-stop word found
            int skippedPositions = 0;

            for (Token nextToken = input.Next(reusableToken); nextToken != null; nextToken = input.Next(reusableToken))
            {
                if (!stopWords.Contains(nextToken.TermBuffer(), 0, nextToken.TermLength()))
                {
                    if (enablePositionIncrements)
                    {
                        nextToken.SetPositionIncrement(nextToken.GetPositionIncrement() + skippedPositions);
                    }
                    return(nextToken);
                }
                skippedPositions += nextToken.GetPositionIncrement();
            }
            // reached EOS -- return null
            return(null);
        }
        /// <summary>
        /// Final touch of a shingle token before it is passed on to the consumer from method {@link #next(org.apache.lucene.analysis.Token)}.
        /// 
        /// Calculates and sets type, flags, position increment, start/end offsets and weight.
        /// </summary>
        /// <param name="token">Shingle Token</param>
        /// <param name="shingle">Tokens used to produce the shingle token.</param>
        /// <param name="currentPermutationStartOffset">Start offset in parameter currentPermutationTokens</param>
        /// <param name="currentPermutationRows">index to Matrix.Column.Row from the position of tokens in parameter currentPermutationTokens</param>
        /// <param name="currentPermuationTokens">tokens of the current permutation of rows in the matrix. </param>
        public void UpdateToken(Token token, List<Token> shingle, int currentPermutationStartOffset, List<Row> currentPermutationRows, List<Token> currentPermuationTokens)
        {
            token.SetType(typeof(ShingleMatrixFilter).Name);
            token.SetFlags(0);
            token.SetPositionIncrement(1);
            token.SetStartOffset((shingle[0]).StartOffset());
            token.SetEndOffset(shingle[shingle.Count - 1].EndOffset());

            _settingsCodec.SetWeight(
                token, 
                CalculateShingleWeight(token, shingle, currentPermutationStartOffset, currentPermutationRows, currentPermuationTokens)
                );
        }
        private Token GetNextInputToken(Token token)
        {
            if (!_input.IncrementToken()) return null;

            token.SetTermBuffer(_inTermAtt.TermBuffer(), 0, _inTermAtt.TermLength());
            token.SetPositionIncrement(_inPosIncrAtt.GetPositionIncrement());
            token.SetFlags(_inFlagsAtt.GetFlags());
            token.SetOffset(_inOffsetAtt.StartOffset(), _inOffsetAtt.EndOffset());
            token.SetType(_inTypeAtt.Type());
            token.SetPayload(_inPayloadAtt.GetPayload());
            return token;
        }
Beispiel #7
0
		public override Lucene.Net.Analysis.Token Next ()
		{
			if (parts != null) {
				if (++parts_index < parts.Length) {
					string part = parts [parts_index];
					Lucene.Net.Analysis.Token part_token;
					// FIXME: Searching for google.com will not match www.google.com.
					// If we decide to allow google-style "abcd.1234" which means
					// "abcd 1234" as a consequtive phrase, then adjusting
					// the startOffset and endOffset would enable matching
					// google.com to www.google.com
					int start_offset = (parts_index == 0 && token_type == tokentype_email ?
						0 :
						last_end_offset + 1); // assuming only one separator
					int end_offset = start_offset + part.Length;
					part_token = new Lucene.Net.Analysis.Token (part,
									       start_offset,
									       end_offset,
									       token_type);
					part_token.SetPositionIncrement (0);
					last_end_offset = (parts_index == 0 && token_type == tokentype_email ?
						-1 :
						end_offset); // assuming only one separator
					return part_token;
				} else {
					// clear the array
					parts = null;
					parts_index = -1;
					last_end_offset = -1;
					token_type = null;
				}
			}

			Token token;
			while ( (token = token_stream.Next ()) != null) {
				//Console.WriteLine ("Found token: [{0}]", token.TermText ());
				if (ProcessToken (ref token))
					return token;
			}
			return null;
		}
		public virtual void  TestFilterTokens()
		{
			Token tok = new Token("accents", 2, 7, "wrd");
			tok.SetPositionIncrement(3);
			
			SnowballFilter filter = new SnowballFilter(new AnonymousClassTokenStream(tok, this), "English");
			
			Token newtok = filter.Next();
			
			System.Diagnostics.Trace.Assert("accent" == newtok.TermText()); //// assertEquals("accent", newtok.TermText());
			System.Diagnostics.Trace.Assert(2 == newtok.StartOffset()); //// assertEquals(2, newtok.StartOffset());
			System.Diagnostics.Trace.Assert(7 == newtok.EndOffset()); //// assertEquals(7, newtok.EndOffset());
			System.Diagnostics.Trace.Assert("wrd" == newtok.Type()); //// assertEquals("wrd", newtok.Type());
			System.Diagnostics.Trace.Assert(3 == newtok.GetPositionIncrement()); //// assertEquals(3, newtok.GetPositionIncrement());
		}
Beispiel #9
0
 public void  SetPositionIncrement(int positionIncrement)
 {
     delegate_Renamed.SetPositionIncrement(positionIncrement);
 }
        private static Token TokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset,
                                          TokenPositioner positioner)
        {
            var token = new Token(startOffset, endOffset);

            token.SetTermBuffer(text);
            token.SetPositionIncrement(posIncr);

            ShingleMatrixFilter.DefaultSettingsCodec.SetWeight(token, weight);
            ShingleMatrixFilter.DefaultSettingsCodec.SetTokenPositioner(token, positioner);

            return token;
        }
        private static Token TokenFactory(String text, int posIncr, int startOffset, int endOffset)
        {
            var token = new Token(startOffset, endOffset);

            token.SetTermBuffer(text);
            token.SetPositionIncrement(posIncr);

            return token;
        }
		public virtual void  TestFilterTokens()
		{
			Token tok = new Token("accents", 2, 7, "wrd");
			tok.SetPositionIncrement(3);
			
			SnowballFilter filter = new SnowballFilter(new AnonymousClassTokenStream(tok, this), "English");
			
			Token newtok = filter.Next();
			
			Assert.AreEqual("accent", newtok.TermText());
			Assert.AreEqual(2, newtok.StartOffset());
			Assert.AreEqual(7, newtok.EndOffset());
			Assert.AreEqual("wrd", newtok.Type());
			Assert.AreEqual(3, newtok.GetPositionIncrement());
		}
 private Token GetNextSuffixInputToken(Token token)
 {
     if (!Suffix.IncrementToken()) return null;
     token.SetTermBuffer(_termAtt.TermBuffer(), 0, _termAtt.TermLength());
     token.SetPositionIncrement(_posIncrAtt.GetPositionIncrement());
     token.SetFlags(_flagsAtt.GetFlags());
     token.SetOffset(_offsetAtt.StartOffset(), _offsetAtt.EndOffset());
     token.SetType(_typeAtt.Type());
     token.SetPayload(_payloadAtt.GetPayload());
     return token;
 }
Beispiel #14
0
		/// <summary> Returns the next input Token whose termText() is not a stop word.</summary>
		public override Token Next(Token result)
		{
			// return the first non-stop word found
			int skippedPositions = 0;
			while ((result = input.Next(result)) != null)
			{
				if (!stopWords.Contains(result.TermBuffer(), 0, result.termLength))
				{
					if (enablePositionIncrements)
					{
						result.SetPositionIncrement(result.GetPositionIncrement() + skippedPositions);
					}
					return result;
				}
				skippedPositions += result.GetPositionIncrement();
			}
			// reached EOS -- return null
			return null;
		}
				public override Token Next(Token result)
				{
					if (buffered != null)
					{
						Token t = buffered;
						buffered = null;
						return t;
					}
					Token t2 = input.Next(result);
					if (t2 == null)
						return null;
					if (System.Char.IsDigit(t2.TermBuffer()[0]))
					{
						t2.SetPositionIncrement(t2.TermBuffer()[0] - '0');
					}
					if (first)
					{
						// set payload on first position only
						t2.SetPayload(new Payload(new byte[]{100}));
						first = false;
					}
					
					// index a "synonym" for every token
					buffered = (Token) t2.Clone();
					buffered.SetPayload(null);
					buffered.SetPositionIncrement(0);
					buffered.SetTermBuffer(new char[]{'b'}, 0, 1);
					
					return t2;
				}
 public static Token t(String text, int startOffset, int endOffset, int positionIncrement)
 {
     Token token = new Token(text, startOffset, endOffset);
     token.SetPositionIncrement(positionIncrement);
     return token;
 }