public override bool IncrementToken() { if (orginal != null) { termAtt.SetTermBuffer(orginal, 0, orginal.Length); orginal = null; return(true); } if (!input.IncrementToken()) { return(false); } orginal = new char[termAtt.TermLength()]; Array.Copy(termAtt.TermBuffer(), 0, orginal, 0, termAtt.TermLength()); if (stemmer.Stem(termAtt.TermBuffer(), 0, termAtt.TermLength())) { termAtt.SetTermBuffer(stemmer.ResultBuffer, 0, stemmer.ResultLength); if (CharArrayIsEqual(orginal, termAtt.TermBuffer())) { orginal = null; } } return(true); }
public override bool IncrementToken() { if (tokenList != null) { index++; if (index < tokenList.Count) { termAtt.SetTermBuffer(tokenList[index].TermBuffer(), 0, tokenList[index].TermLength()); termOff.SetOffset(tokenList[index].StartOffset, tokenList[index].EndOffset); return(true); } tokenList = null; return(false); } tokenList = new List <Token>(); // First cache result while (input.IncrementToken()) { Token newToken = new Token(termAtt.Term, termOff.StartOffset, termOff.EndOffset); foreach (Token token in tokenList) { if (token.StartOffset == newToken.StartOffset && token.Term == newToken.Term) { token.SetOffset(newToken.StartOffset, newToken.EndOffset); newToken = null; break; } } //foreach if (newToken != null) { tokenList.Add(newToken); } } // while; // now output the tokens! if (tokenList.Count > 0) { index = 0; termAtt.SetTermBuffer(tokenList[index].TermBuffer(), 0, tokenList[index].TermLength()); termOff.SetOffset(tokenList[index].StartOffset, tokenList[index].EndOffset); return(true); } return(false); }
public override bool IncrementToken() { if (!done) { while (true) { char[] buffer = new char[1]; int length = input.Read(buffer, 0, 1); if (length == 0) { done = true; return(false); // break; } sb.Append(buffer); termAtt.SetTermBuffer(sb.ToString().ToCharArray(), 0, sb.Length); offsetAtt.SetOffset(0, sb.Length); // Skip keywoords met op het eind whitespaces if (!char.IsWhiteSpace(buffer[0])) { break; } } return(true); } return(false); }
/** Returns the next word in the stream. * @throws IOException If a problem occurs * @return The word */ public override bool IncrementToken() { while (true) { if (receivedText.Length == 0) { if (input.IncrementToken()) { receivedText.Append(termAtt.TermBuffer()); receivedText.Length = termAtt.TermLength(); } } if (receivedText.Length == 0) { return(false); } while (true) { string emittedText = GetNextPart(); if (emittedText.Length > 0 && !seen.Contains(emittedText)) { termAtt.SetTermBuffer(emittedText.ToCharArray(), 0, emittedText.Length); offsetAtt.SetOffset(0, emittedText.Length); seen.Add(emittedText); return(true); } if (emittedText.Length <= 0) { break; //return false; } } } /* * while (true) * { * //New token ? * if (receivedText.Length == 0) * { * receivedToken = input.Next(); * newToken = true; * if (receivedToken == null) return false; * receivedText.Append(receivedToken.TermText()); * } * String emittedText = GetNextPart(); * if (emittedText.Length > 0) * { * termAtt.SetTermBuffer(emittedText.ToString().ToCharArray(), receivedToken.StartOffset(), receivedToken.EndOffset()); * offsetAtt.SetOffset(0, emittedText.Length); * * if (newToken) posIncrAtt.SetPositionIncrement(receivedToken.GetPositionIncrement()); * else posIncrAtt.SetPositionIncrement(0); * * return true; * } * } */ }
public override bool IncrementToken() { if (savedTerms.Count > 0) { RestoreState(current); SavedTerm savedTerm = savedTerms[0]; savedTerms.RemoveAt(0); termAtt.SetTermBuffer(savedTerm.Term); posIncrAtt.PositionIncrement = 0; termOff.SetOffset(savedTerm.StartOffset, savedTerm.EndOffset); return(true); } do { if (!input.IncrementToken()) { return(false); } }while (IsNonWhiteSpaceChar(termAtt.Term)); // A-Ha string s = ""; int pos = 0; foreach (char c in termAtt.Term) { if (IsNonWhiteSpaceChar(c)) { if (s.Length > 0) { SavedTerm savedTerm = new SavedTerm(); savedTerm.Term = s; savedTerm.StartOffset = termOff.StartOffset; savedTerm.EndOffset = termOff.EndOffset; savedTerms.Add(savedTerm); s = ""; } } else { s += c; } pos++; } if (s.Length > 0 && s != termAtt.Term) { SavedTerm savedTerm = new SavedTerm(); savedTerm.Term = s; savedTerm.StartOffset = termOff.StartOffset; savedTerm.EndOffset = termOff.EndOffset; savedTerms.Add(savedTerm); } if (savedTerms.Count > 0) { current = CaptureState(); } return(true); }