示例#1
0
		private bool ProcessToken (ref Lucene.Net.Analysis.Token token)
		{
			string type = token.Type ();

			if (type == tokentype_number) {
				// nobody will remember more than 20 digits
				return (token.TermText ().Length <= 20);
			} else if (type == tokentype_alphanum) {
				string text = token.TermText ();
				int begin = 0;
				bool found = false;
				// Check if number, in that case strip 0's from beginning
				foreach (char c in text) {
					if (! Char.IsDigit (c)) {
						begin = 0;
						break;
					} else if (! found) {
						if (c == '0')
							begin ++;
						else
							found = true;
					}
				}

				if (begin == 0)
					return ! IsNoise (text);
				token = new Lucene.Net.Analysis.Token (
					text.Remove (0, begin),
					begin,
					token.EndOffset (),
					type);
				return true;
			} else if (type == tokentype_email) {
				if (tokenize_email_hostname)
					ProcessEmailToken (token);
				return true;
			} else if (type == tokentype_host) {
				if (tokenize_email_hostname)
					ProcessURLToken (token);
				return true;
			} else
				// FIXME: Noise should be only tested on token type alphanum
				return ! IsNoise (token.TermText ());
		}