示例#1
0
        public override bool IncrementToken()
        {
            if (orginal != null)
            {
                termAtt.SetTermBuffer(orginal, 0, orginal.Length);
                orginal = null;
                return(true);
            }

            if (!input.IncrementToken())
            {
                return(false);
            }

            orginal = new char[termAtt.TermLength()];
            Array.Copy(termAtt.TermBuffer(), 0, orginal, 0, termAtt.TermLength());

            if (stemmer.Stem(termAtt.TermBuffer(), 0, termAtt.TermLength()))
            {
                termAtt.SetTermBuffer(stemmer.ResultBuffer, 0, stemmer.ResultLength);

                if (CharArrayIsEqual(orginal, termAtt.TermBuffer()))
                {
                    orginal = null;
                }
            }

            return(true);
        }
示例#2
0
        public override bool IncrementToken()
        {
            if (tokenList != null)
            {
                index++;
                if (index < tokenList.Count)
                {
                    termAtt.SetTermBuffer(tokenList[index].TermBuffer(), 0, tokenList[index].TermLength());
                    termOff.SetOffset(tokenList[index].StartOffset, tokenList[index].EndOffset);
                    return(true);
                }

                tokenList = null;
                return(false);
            }

            tokenList = new List <Token>();
            // First cache result
            while (input.IncrementToken())
            {
                Token newToken = new Token(termAtt.Term, termOff.StartOffset, termOff.EndOffset);
                foreach (Token token in tokenList)
                {
                    if (token.StartOffset == newToken.StartOffset && token.Term == newToken.Term)
                    {
                        token.SetOffset(newToken.StartOffset, newToken.EndOffset);
                        newToken = null;
                        break;
                    }
                } //foreach

                if (newToken != null)
                {
                    tokenList.Add(newToken);
                }
            } // while;

            // now output the tokens!
            if (tokenList.Count > 0)
            {
                index = 0;
                termAtt.SetTermBuffer(tokenList[index].TermBuffer(), 0, tokenList[index].TermLength());
                termOff.SetOffset(tokenList[index].StartOffset, tokenList[index].EndOffset);
                return(true);
            }

            return(false);
        }
示例#3
0
        public override bool IncrementToken()
        {
            if (!done)
            {
                while (true)
                {
                    char[] buffer = new char[1];

                    int length = input.Read(buffer, 0, 1);
                    if (length == 0)
                    {
                        done = true;
                        return(false);
                        // break;
                    }
                    sb.Append(buffer);

                    termAtt.SetTermBuffer(sb.ToString().ToCharArray(), 0, sb.Length);
                    offsetAtt.SetOffset(0, sb.Length);

                    // Skip keywoords met op het eind whitespaces
                    if (!char.IsWhiteSpace(buffer[0]))
                    {
                        break;
                    }
                }

                return(true);
            }

            return(false);
        }
示例#4
0
        /** Returns the next word in the stream.
         * @throws IOException If a problem occurs
         * @return The word
         */
        public override bool IncrementToken()
        {
            while (true)
            {
                if (receivedText.Length == 0)
                {
                    if (input.IncrementToken())
                    {
                        receivedText.Append(termAtt.TermBuffer());
                        receivedText.Length = termAtt.TermLength();
                    }
                }
                if (receivedText.Length == 0)
                {
                    return(false);
                }
                while (true)
                {
                    string emittedText = GetNextPart();
                    if (emittedText.Length > 0 && !seen.Contains(emittedText))
                    {
                        termAtt.SetTermBuffer(emittedText.ToCharArray(), 0, emittedText.Length);
                        offsetAtt.SetOffset(0, emittedText.Length);
                        seen.Add(emittedText);

                        return(true);
                    }


                    if (emittedText.Length <= 0)
                    {
                        break;
                        //return false;
                    }
                }
            }

            /*
             * while (true)
             * {
             *  //New token ?
             *  if (receivedText.Length == 0)
             *  {
             *      receivedToken = input.Next();
             *      newToken = true;
             *      if (receivedToken == null) return false;
             *      receivedText.Append(receivedToken.TermText());
             *  }
             *  String emittedText = GetNextPart();
             *  if (emittedText.Length > 0)
             *  {
             *      termAtt.SetTermBuffer(emittedText.ToString().ToCharArray(), receivedToken.StartOffset(), receivedToken.EndOffset());
             *      offsetAtt.SetOffset(0, emittedText.Length);
             *
             *      if (newToken) posIncrAtt.SetPositionIncrement(receivedToken.GetPositionIncrement());
             *      else posIncrAtt.SetPositionIncrement(0);
             *
             *      return true;
             *  }
             * }
             */
        }
示例#5
0
        public override bool IncrementToken()
        {
            if (savedTerms.Count > 0)
            {
                RestoreState(current);

                SavedTerm savedTerm = savedTerms[0];
                savedTerms.RemoveAt(0);
                termAtt.SetTermBuffer(savedTerm.Term);
                posIncrAtt.PositionIncrement = 0;
                termOff.SetOffset(savedTerm.StartOffset, savedTerm.EndOffset);

                return(true);
            }

            do
            {
                if (!input.IncrementToken())
                {
                    return(false);
                }
            }while (IsNonWhiteSpaceChar(termAtt.Term));

            // A-Ha
            string s   = "";
            int    pos = 0;

            foreach (char c in termAtt.Term)
            {
                if (IsNonWhiteSpaceChar(c))
                {
                    if (s.Length > 0)
                    {
                        SavedTerm savedTerm = new SavedTerm();
                        savedTerm.Term        = s;
                        savedTerm.StartOffset = termOff.StartOffset;
                        savedTerm.EndOffset   = termOff.EndOffset;
                        savedTerms.Add(savedTerm);
                        s = "";
                    }
                }
                else
                {
                    s += c;
                }
                pos++;
            }
            if (s.Length > 0 && s != termAtt.Term)
            {
                SavedTerm savedTerm = new SavedTerm();
                savedTerm.Term        = s;
                savedTerm.StartOffset = termOff.StartOffset;
                savedTerm.EndOffset   = termOff.EndOffset;
                savedTerms.Add(savedTerm);
            }

            if (savedTerms.Count > 0)
            {
                current = CaptureState();
            }

            return(true);
        }