public virtual void TestCtor() { Token t = new Token(); char[] content = "hello".ToCharArray(); t.SetTermBuffer(content, 0, content.Length); char[] buf = t.TermBuffer(); Assert.AreNotEqual(t.TermBuffer(), content); Assert.AreEqual("hello", t.Term()); Assert.AreEqual("word", t.Type()); Assert.AreEqual(0, t.GetFlags()); t = new Token(6, 22); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual("hello", t.Term()); Assert.AreEqual("(hello,6,22)", t.ToString()); Assert.AreEqual("word", t.Type()); Assert.AreEqual(0, t.GetFlags()); t = new Token(6, 22, 7); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual("hello", t.Term()); Assert.AreEqual("(hello,6,22)", t.ToString()); Assert.AreEqual(7, t.GetFlags()); t = new Token(6, 22, "junk"); t.SetTermBuffer(content, 0, content.Length); Assert.AreEqual("hello", t.Term()); Assert.AreEqual("(hello,6,22,type=junk)", t.ToString()); Assert.AreEqual(0, t.GetFlags()); }
/// <summary>Returns the next token in the stream, or null at EOS. /// <p>Removes <tt>'s</tt> from the end of words. /// <p>Removes dots from acronyms. /// </summary> public override Lucene.Net.Analysis.Token Next() { Lucene.Net.Analysis.Token t = input.Next(); if (t == null) { return(null); } System.String text = t.TermText(); System.String type = t.Type(); if (type == APOSTROPHE_TYPE && (text.EndsWith("'s") || text.EndsWith("'S"))) { return(new Lucene.Net.Analysis.Token(text.Substring(0, (text.Length - 2) - (0)), t.StartOffset(), t.EndOffset(), type)); } else if (type == ACRONYM_TYPE) { // remove dots System.Text.StringBuilder trimmed = new System.Text.StringBuilder(); for (int i = 0; i < text.Length; i++) { char c = text[i]; if (c != '.') { trimmed.Append(c); } } return(new Lucene.Net.Analysis.Token(trimmed.ToString(), t.StartOffset(), t.EndOffset(), type)); } else { return(t); } }
/// <summary> /// /// </summary> /// <returns>Returns the next token in the stream, or null at EOS</returns> public override Token Next() { if ((token = input.Next()) == null) { return null; } else { String s = stemmer.Stem(token.TermText()); if (!s.Equals(token.TermText())) { return new Token(s, token.StartOffset(), token.EndOffset(), token.Type()); } return token; } }
// TypeAttribute: public System.String Type() { return(delegate_Renamed.Type()); }
private bool ProcessToken(ref Lucene.Net.Analysis.Token token) { string type = token.Type(); if (type == tokentype_number) { // nobody will remember more than 20 digits return(token.TermText().Length <= 20); } else if (type == tokentype_alphanum) { string text = token.TermText(); int begin = 0; bool found = false; // Check if number, in that case strip 0's from beginning foreach (char c in text) { if (!Char.IsDigit(c)) { begin = 0; break; } else if (!found) { if (c == '0') { begin++; } else { found = true; } } } if (begin == 0) { return(!IsNoise(text)); } token = new Lucene.Net.Analysis.Token( text.Remove(0, begin), begin, token.EndOffset(), type); return(true); } else if (type == tokentype_email) { if (tokenize_email_hostname) { ProcessEmailToken(token); } return(true); } else if (type == tokentype_host) { if (tokenize_email_hostname) { ProcessURLToken(token); } return(true); } else { // FIXME: Noise should be only tested on token type alphanum return(!IsNoise(token.TermText())); } }
public override Token Next() { Token t = input.Next(); if (t == null) { return(null); } // Return a token with filtered characters. return(new Token(RemoveAccents(t.TermText()), t.StartOffset(), t.EndOffset(), t.Type())); }
/// <summary> /// Returns the next token in the stream, or null at EOS /// </summary> /// <returns> /// Returns the next token in the stream, or null at EOS /// </returns> public override Token Next() { if ( ( token = input.Next() ) == null ) { return null; } // Check the exclusiontable else if ( exclusions != null && exclusions.Contains( token.TermText() ) ) { return token; } else { String s = stemmer.Stem( token.TermText() ); // If not stemmed, dont waste the time creating a new token if ( !s.Equals( token.TermText() ) ) { return new Token( s, 0, s.Length, token.Type() ); } return token; } }
private void SetCurrentToken(Token token) { if (token == null) return; ClearAttributes(); _termAtt.SetTermBuffer(token.TermBuffer(), 0, token.TermLength()); _posIncrAtt.SetPositionIncrement(token.GetPositionIncrement()); _flagsAtt.SetFlags(token.GetFlags()); _offsetAtt.SetOffset(token.StartOffset(), token.EndOffset()); _typeAtt.SetType(token.Type()); _payloadAtt.SetPayload(token.GetPayload()); }