Пример #1
0
        // Tokenizes the fields of a document into Postings.
        private void  InvertDocument(Document doc)
        {
            foreach (Field field in doc.Fields())
            {
                System.String fieldName   = field.Name();
                int           fieldNumber = fieldInfos.FieldNumber(fieldName);

                int length   = fieldLengths[fieldNumber];   // length of Field
                int position = fieldPositions[fieldNumber]; // position in Field

                if (field.IsIndexed())
                {
                    if (!field.IsTokenized())
                    {
                        // un-tokenized Field
                        AddPosition(fieldName, field.StringValue(), position++);
                        length++;
                    }
                    else
                    {
                        System.IO.TextReader reader; // find or make Reader
                        if (field.ReaderValue() != null)
                        {
                            reader = field.ReaderValue();
                        }
                        else if (field.StringValue() != null)
                        {
                            reader = new System.IO.StringReader(field.StringValue());
                        }
                        else
                        {
                            throw new System.ArgumentException("Field must have either String or Reader value");
                        }

                        // Tokenize Field and add to postingTable
                        TokenStream stream = analyzer.TokenStream(fieldName, reader);
                        try
                        {
                            for (Token t = stream.Next(); t != null; t = stream.Next())
                            {
                                position += (t.GetPositionIncrement() - 1);
                                AddPosition(fieldName, t.TermText(), position++);
                                if (++length > maxFieldLength)
                                {
                                    break;
                                }
                            }
                        }
                        finally
                        {
                            stream.Close();
                        }
                    }

                    fieldLengths[fieldNumber]   = length;   // save Field length
                    fieldPositions[fieldNumber] = position; // save Field position
                    fieldBoosts[fieldNumber]   *= field.GetBoost();
                }
            }
        }
		/// <returns>  Returns the next token in the stream, or null at EOS
		/// </returns>
		public override Token Next()
		{
			if ((token = input.Next()) == null)
			{
				return null;
			}
			else
			{
				System.String s = stemmer.Stem(token.TermText());
				if (!s.Equals(token.TermText()))
				{
					return new Token(s, token.StartOffset(), token.EndOffset(), token.Type());
				}
				return token;
			}
		}
Пример #3
0
 /// <returns>  Returns the next token in the stream, or null at EOS
 /// </returns>
 public override Token Next()
 {
     if ((token = input.Next()) == null)
     {
         return(null);
     }
     else
     {
         System.String s = stemmer.Stem(token.TermText());
         if (!s.Equals(token.TermText()))
         {
             return(new Token(s, token.StartOffset(), token.EndOffset(), token.Type()));
         }
         return(token);
     }
 }
Пример #4
0
		public override Token Next()
		{
			Token t = input.Next();
			
			if (t == null)
				return null;
			
			System.String txt = t.TermText();
			
			char[] chArray = txt.ToCharArray();
			for (int i = 0; i < chArray.Length; i++)
			{
				chArray[i] = RussianCharsets.ToLowerCase(chArray[i], charset);
			}
			
			System.String newTxt = new System.String(chArray);
			// create new token
			Token newToken = new Token(newTxt, t.StartOffset(), t.EndOffset());
			
			return newToken;
		}
 /// <returns>  Returns the next token in the stream, or null at EOS
 /// </returns>
 public override Token Next()
 {
     if ((token = input.Next()) == null)
     {
         return(null);
     }
     // Check the exclusiontable
     else if (exclusionSet != null && exclusionSet.Contains(token.TermText()))
     {
         return(token);
     }
     else
     {
         System.String s = stemmer.Stem(token.TermText());
         // If not stemmed, dont waste the time creating a new token
         if (!s.Equals(token.TermText()))
         {
             return(new Token(s, token.StartOffset(), token.EndOffset(), token.Type()));
         }
         return(token);
     }
 }
Пример #6
0
		/// <returns>  Returns the next token in the stream, or null at EOS
		/// </returns>
		public override Token Next()
		{
			if ((token = input.Next()) == null)
			{
				return null;
			}
			// Check the exclusiontable
			else if (exclusionSet != null && exclusionSet.Contains(token.TermText()))
			{
				return token;
			}
			else
			{
				System.String s = stemmer.Stem(token.TermText());
				// If not stemmed, dont waste the time creating a new token
				if (!s.Equals(token.TermText()))
				{
					return new Token(s, token.StartOffset(), token.EndOffset(), token.Type());
				}
				return token;
			}
		}
Пример #7
0
 public QueryTermVector(System.String queryString, Analyzer analyzer)
 {
     if (analyzer != null)
     {
         TokenStream stream = analyzer.TokenStream("", new System.IO.StringReader(queryString));
         if (stream != null)
         {
             Token next = null;
             System.Collections.ArrayList terms = new System.Collections.ArrayList();
             try
             {
                 while ((next = stream.Next()) != null)
                 {
                     terms.Add(next.TermText());
                 }
                 ProcessTerms((System.String[])terms.ToArray(typeof(System.String)));
             }
             catch (System.IO.IOException)
             {
             }
         }
     }
 }
Пример #8
0
        public override Token Next()
        {
            Token t = input.Next();

            if (t == null)
            {
                return(null);
            }

            System.String txt = t.TermText();

            char[] chArray = txt.ToCharArray();
            for (int i = 0; i < chArray.Length; i++)
            {
                chArray[i] = RussianCharsets.ToLowerCase(chArray[i], charset);
            }

            System.String newTxt = new System.String(chArray);
            // create new token
            Token newToken = new Token(newTxt, t.StartOffset(), t.EndOffset());

            return(newToken);
        }