public override Token Next() { var code = _reader.Read(); if (code.IsNull()) { return(null); } if (code.IsLetterCase() || code.IsNumeralCase()) { _reader.Seek(_reader.Position - 1); return(base.Next()); } else if (code.IsCjkCase()) { var nextCode = _reader.Read(); if (nextCode.IsNull()) { if (_beginState) { return(new Token(code.ToString(), TokenType.CJK)); } return(null); } if (nextCode.IsCjkCase()) { _beginState = false; if (_reader.Peek().IsCjkCase()) { _reader.Seek(_reader.Position - 1); } return(new Token(new string(new char[] { code, nextCode }), TokenType.CJK)); } //may be code is a one of letter&numeral&punc. _reader.Seek(_reader.Position - 2); return(base.Next()); } _beginState = true; return(new Token(code.ToString(), TokenType.PUNC)); }
public virtual Token Next() { var offset = _reader.Position; var code = _reader.Read(); if (code.IsNull()) { return(null); } if (code.IsCjkCase()) { return(new Token(char.ToString(code), TokenType.CJK)); } if (code.IsLetterCase()) { while (!(code = _reader.Read()).IsNull()) { if (code.IsLetterCase() || code.IsNumeralCase()) { continue; } var period = false; if (AlphaNumStops.TryGetValue(code, out period)) { if (period) { var nextCode = _reader.Peek(); if (nextCode.IsLetterCase() || code.IsNumeralCase() || AlphaNumStops.ContainsKey(nextCode)) { continue; } } break; } _reader.Seek(_reader.Position - 1); break; } var length = _reader.Position - offset; _reader.Seek(offset); return(new Token(new string(_reader.Read(length)), TokenType.ALPHANUM)); } else if (code.IsNumeralCase()) { var mixed = false; while (!(code = _reader.Read()).IsNull()) { if (code.IsNumeralCase() || (code.IsLetterCase() && (mixed = true))) { continue; } var period = false; if (NumStops.TryGetValue(code, out period) && period) { var nextCode = _reader.Peek(); if (nextCode.IsNumeralCase()) { continue; } } _reader.Seek(_reader.Position - 1); break; } var length = _reader.Position - offset; _reader.Seek(offset); return(new Token(new string(_reader.Read(length)), mixed ? TokenType.ALPHANUM : TokenType.NUM)); } return(new Token(char.ToString(code), TokenType.PUNC)); }