Exemple #1
0
		public virtual void  TestCtor()
		{
			Token t = new Token();
			char[] content = "hello".ToCharArray();
			t.SetTermBuffer(content, 0, content.Length);
			char[] buf = t.TermBuffer();
			Assert.AreNotEqual(t.TermBuffer(), content);
			Assert.AreEqual("hello", t.Term());
			Assert.AreEqual("word", t.Type());
			Assert.AreEqual(0, t.GetFlags());
			
			t = new Token(6, 22);
			t.SetTermBuffer(content, 0, content.Length);
			Assert.AreEqual("hello", t.Term());
			Assert.AreEqual("(hello,6,22)", t.ToString());
			Assert.AreEqual("word", t.Type());
			Assert.AreEqual(0, t.GetFlags());
			
			t = new Token(6, 22, 7);
			t.SetTermBuffer(content, 0, content.Length);
			Assert.AreEqual("hello", t.Term());
			Assert.AreEqual("(hello,6,22)", t.ToString());
			Assert.AreEqual(7, t.GetFlags());
			
			t = new Token(6, 22, "junk");
			t.SetTermBuffer(content, 0, content.Length);
			Assert.AreEqual("hello", t.Term());
			Assert.AreEqual("(hello,6,22,type=junk)", t.ToString());
			Assert.AreEqual(0, t.GetFlags());
		}
        /// <summary>Returns the next token in the stream, or null at EOS.
        /// <p>Removes <tt>'s</tt> from the end of words.
        /// <p>Removes dots from acronyms.
        /// </summary>
        public override Lucene.Net.Analysis.Token Next()
        {
            Lucene.Net.Analysis.Token t = input.Next();

            if (t == null)
            {
                return(null);
            }

            System.String text = t.TermText();
            System.String type = t.Type();

            if (type == APOSTROPHE_TYPE && (text.EndsWith("'s") || text.EndsWith("'S")))
            {
                return(new Lucene.Net.Analysis.Token(text.Substring(0, (text.Length - 2) - (0)), t.StartOffset(), t.EndOffset(), type));
            }
            else if (type == ACRONYM_TYPE)
            {
                // remove dots
                System.Text.StringBuilder trimmed = new System.Text.StringBuilder();
                for (int i = 0; i < text.Length; i++)
                {
                    char c = text[i];
                    if (c != '.')
                    {
                        trimmed.Append(c);
                    }
                }
                return(new Lucene.Net.Analysis.Token(trimmed.ToString(), t.StartOffset(), t.EndOffset(), type));
            }
            else
            {
                return(t);
            }
        }
Exemple #3
0
        public virtual void  TestCtor()
        {
            Token t = new Token();

            char[] content = "hello".ToCharArray();
            t.SetTermBuffer(content, 0, content.Length);
            char[] buf = t.TermBuffer();
            Assert.AreNotEqual(t.TermBuffer(), content);
            Assert.AreEqual("hello", t.Term());
            Assert.AreEqual("word", t.Type());
            Assert.AreEqual(0, t.GetFlags());

            t = new Token(6, 22);
            t.SetTermBuffer(content, 0, content.Length);
            Assert.AreEqual("hello", t.Term());
            Assert.AreEqual("(hello,6,22)", t.ToString());
            Assert.AreEqual("word", t.Type());
            Assert.AreEqual(0, t.GetFlags());

            t = new Token(6, 22, 7);
            t.SetTermBuffer(content, 0, content.Length);
            Assert.AreEqual("hello", t.Term());
            Assert.AreEqual("(hello,6,22)", t.ToString());
            Assert.AreEqual(7, t.GetFlags());

            t = new Token(6, 22, "junk");
            t.SetTermBuffer(content, 0, content.Length);
            Assert.AreEqual("hello", t.Term());
            Assert.AreEqual("(hello,6,22,type=junk)", t.ToString());
            Assert.AreEqual(0, t.GetFlags());
        }
		/// <summary>
		/// 
		/// </summary>
		/// <returns>Returns the next token in the stream, or null at EOS</returns>
		public override Token Next() 
		{
			if ((token = input.Next()) == null)
			{
				return null;
			}
			else
			{
				String s = stemmer.Stem(token.TermText());
				if (!s.Equals(token.TermText()))
				{
					return new Token(s, token.StartOffset(), token.EndOffset(),
						token.Type());
				}
				return token;
			}
		}
Exemple #5
0
        // TypeAttribute:

        public System.String Type()
        {
            return(delegate_Renamed.Type());
        }
Exemple #6
0
        private bool ProcessToken(ref Lucene.Net.Analysis.Token token)
        {
            string type = token.Type();

            if (type == tokentype_number)
            {
                // nobody will remember more than 20 digits
                return(token.TermText().Length <= 20);
            }
            else if (type == tokentype_alphanum)
            {
                string text  = token.TermText();
                int    begin = 0;
                bool   found = false;
                // Check if number, in that case strip 0's from beginning
                foreach (char c in text)
                {
                    if (!Char.IsDigit(c))
                    {
                        begin = 0;
                        break;
                    }
                    else if (!found)
                    {
                        if (c == '0')
                        {
                            begin++;
                        }
                        else
                        {
                            found = true;
                        }
                    }
                }

                if (begin == 0)
                {
                    return(!IsNoise(text));
                }
                token = new Lucene.Net.Analysis.Token(
                    text.Remove(0, begin),
                    begin,
                    token.EndOffset(),
                    type);
                return(true);
            }
            else if (type == tokentype_email)
            {
                if (tokenize_email_hostname)
                {
                    ProcessEmailToken(token);
                }
                return(true);
            }
            else if (type == tokentype_host)
            {
                if (tokenize_email_hostname)
                {
                    ProcessURLToken(token);
                }
                return(true);
            }
            else
            {
                // FIXME: Noise should be only tested on token type alphanum
                return(!IsNoise(token.TermText()));
            }
        }
        public override Token Next()
        {
            Token t = input.Next();

            if (t == null)
            {
                return(null);
            }
            // Return a token with filtered characters.
            return(new Token(RemoveAccents(t.TermText()), t.StartOffset(), t.EndOffset(), t.Type()));
        }
		/// <summary>
		/// Returns the next token in the stream, or null at EOS
		/// </summary>
		/// <returns>
		/// Returns the next token in the stream, or null at EOS
		/// </returns>
		public override Token Next()
		{
			if ( ( token = input.Next() ) == null ) 
			{
				return null;
			}
				// Check the exclusiontable
			else if ( exclusions != null && exclusions.Contains( token.TermText() ) ) 
			{
				return token;
			}
			else 
			{
				String s = stemmer.Stem( token.TermText() );
				// If not stemmed, dont waste the time creating a new token
				if ( !s.Equals( token.TermText() ) ) 
				{
					return new Token( s, 0, s.Length, token.Type() );
				}
				return token;
			}
		}
 private void SetCurrentToken(Token token)
 {
     if (token == null) return;
     ClearAttributes();
     _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength());
     _posIncrAtt.SetPositionIncrement(token.GetPositionIncrement());
     _flagsAtt.SetFlags(token.GetFlags());
     _offsetAtt.SetOffset(token.StartOffset(), token.EndOffset());
     _typeAtt.SetType(token.Type());
     _payloadAtt.SetPayload(token.GetPayload());
 }