public override bool IncrementToken() { if (_queue.Count > 0) { SetAttributes(_queue.Dequeue()); return(true); } if (!input.IncrementToken()) { return(false); } string term = _termAttribute.Term; int start = _offsetAttribute.StartOffset; int prevStart = start; int positionIncrement = _positionIncrementAttribute.PositionIncrement; string prev = string.Empty; foreach (string subTerm in TokenizingHelper.CamelCaseSplit(term)) { if (prev != string.Empty) { string shingle = string.Format("{0}{1}", prev, subTerm); _queue.Enqueue(new Tuple <string, int, int, int>(shingle, prevStart, prevStart + shingle.Length, 0)); } _queue.Enqueue(new Tuple <string, int, int, int>(subTerm, start, start + subTerm.Length, positionIncrement)); positionIncrement = 1; prevStart = start; start += subTerm.Length; prev = subTerm; } if (_queue.Count > 0) { SetAttributes(_queue.Dequeue()); return(true); } return(false); }
public override TokenStream TokenStream(string fieldName, TextReader reader) { return(new StopFilter(true, new LowerCaseFilter(new CamelCaseFilter(new DotTokenizer(reader))), TokenizingHelper.GetStopWords())); }
public override bool IncrementToken() { if (_queue.Count > 0) { SetAttributes(_queue.Dequeue()); return(true); } if (!input.IncrementToken()) { return(false); } _queue.Enqueue(new TokenAttributes { TermBuffer = _termAttribute.Term, StartOffset = _offsetAttribute.StartOffset, EndOffset = _offsetAttribute.EndOffset, PositionIncrement = _positionIncrementAttribute.PositionIncrement }); string term = _termAttribute.Term; int start = _offsetAttribute.StartOffset; int prevStart = start; int positionIncrement = 0; string prev = string.Empty; foreach (string subTerm in TokenizingHelper.CamelCaseSplit(term)) { if (prev != string.Empty) { string shingle = string.Format("{0}{1}", prev, subTerm); if (shingle != term) { _queue.Enqueue(new TokenAttributes { TermBuffer = shingle, StartOffset = prevStart, EndOffset = prevStart + shingle.Length, PositionIncrement = 0 }); } } if (subTerm != term) { _queue.Enqueue(new TokenAttributes { TermBuffer = subTerm, StartOffset = start, EndOffset = start + subTerm.Length, PositionIncrement = positionIncrement }); } positionIncrement = 1; prevStart = start; start += subTerm.Length; prev = subTerm; } if (_queue.Count > 0) { SetAttributes(_queue.Dequeue()); return(true); } return(false); }