private Option <IToken> FlushBuffer( StringBuilder buffer, ref uint absolutePosition, uint lineNumber, ref uint lexemeStartPositionInLine ) { if (buffer.Length > 0) { var lexeme = buffer.ToString(); var result = KeywordToken.FromString( lexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) || IdentifierToken.FromString( lexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) || IntegerLiteralToken.FromString( lexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) || RealLiteralToken.FromString( lexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) || new UnrecognizedToken( lexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) ; buffer.Clear(); absolutePosition += (uint)lexeme.Length; lexemeStartPositionInLine += (uint)lexeme.Length; return(result); } else { return(Option <IToken> .None); } }
public IEnumerable <IToken> Tokenize(SafeStreamReader source) { uint lineNumber = 1; uint lexemeStartPositionInLine = 1; uint absolutePosition = 1; var maybeCurrentChar = Option <int> .None; var currentLexemeBuffer = new StringBuilder(); var maybeToken = Option <IToken> .None;; while ((maybeCurrentChar = source.Read()).IsSome) { var currentChar = maybeCurrentChar.Value(); maybeToken = Option <IToken> .None; switch (currentChar) { case var c when string.IsNullOrWhiteSpace(char.ConvertFromUtf32(c)): // if a whitespace was encountered - strip it // and yield whatever in the buffer to the output maybeToken = FlushBuffer( currentLexemeBuffer, ref absolutePosition, lineNumber, ref lexemeStartPositionInLine ); if (maybeToken.IsSome) { yield return(maybeToken.ValueUnsafe()); } switch (c) { case '\r': yield return(source.Read() .Some <IToken>(cn => cn == '\n' ? (IToken) new NewLineSymbolToken( absolutePosition, lineNumber, lexemeStartPositionInLine ) : (IToken) new UnrecognizedToken( $"\r{cn}", absolutePosition, lineNumber, lexemeStartPositionInLine ) ) .None(new UnrecognizedToken( $"\r", absolutePosition, lineNumber, lexemeStartPositionInLine )) ); absolutePosition += 2; lineNumber += 1; lexemeStartPositionInLine = 1; break; case '\n': yield return(new NewLineSymbolToken( absolutePosition, lineNumber, lexemeStartPositionInLine )); absolutePosition += 1; lineNumber += 1; lexemeStartPositionInLine = 1; break; default: absolutePosition += 1; lexemeStartPositionInLine += 1; break; } break; case '.': var currentLexeme = currentLexemeBuffer.ToString(); var maybeBeforeToken = IntegerLiteralToken.FromString( currentLexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) || IdentifierToken.FromString( currentLexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) || UnrecognizedToken.FromString( currentLexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) ; var tokes = source.Peek() .Some <ImmutableList <IToken> >(c => { var result = ImmutableList <IToken> .Empty; IToken tokenToAdd = null; switch (c) { case var _ when IsDigit(char.ConvertFromUtf32(c)): currentLexemeBuffer.Append('.'); return(ImmutableList <IToken> .Empty); case '.': absolutePosition += maybeBeforeToken .Map(t => (uint)t.Lexeme.Length) .IfNone(0); lexemeStartPositionInLine += maybeBeforeToken .Some(t => (uint)t.Lexeme.Length) .None(0u); tokenToAdd = new RangeSymbolToken( absolutePosition, lineNumber, lexemeStartPositionInLine ); result = maybeBeforeToken .ToImmutableList() .Add(tokenToAdd); source.Read(); currentLexemeBuffer.Clear(); lexemeStartPositionInLine += (uint)(tokenToAdd?.Lexeme.Length ?? 0); absolutePosition += (uint)(tokenToAdd?.Lexeme.Length ?? 0); return(result); default: absolutePosition += maybeBeforeToken .Map(t => (uint)t.Lexeme.Length) .IfNone(0); lexemeStartPositionInLine += maybeBeforeToken .Some(t => (uint)t.Lexeme.Length) .None(0u); tokenToAdd = new DotSymbolToken( absolutePosition, lineNumber, lexemeStartPositionInLine ); result = maybeBeforeToken .ToImmutableList() .Add(tokenToAdd); currentLexemeBuffer.Clear(); lexemeStartPositionInLine += (uint)(tokenToAdd?.Lexeme.Length ?? 0); absolutePosition += (uint)(tokenToAdd?.Lexeme.Length ?? 0); return(result); } }) .None(() => { absolutePosition += maybeBeforeToken .Map(t => (uint)t.Lexeme.Length) .IfNone(0); lexemeStartPositionInLine += maybeBeforeToken .Some(t => (uint)t.Lexeme.Length) .None(0u); var tokenToAdd = new DotSymbolToken( absolutePosition, lineNumber, lexemeStartPositionInLine ); var result = maybeBeforeToken .ToImmutableList() .Add(tokenToAdd); currentLexemeBuffer.Clear(); lexemeStartPositionInLine += (uint)(tokenToAdd?.Lexeme.Length ?? 0); absolutePosition += (uint)(tokenToAdd?.Lexeme.Length ?? 0); return(result); }) ; foreach (var token in tokes) { yield return(token); } break; case '/': maybeToken = FlushBuffer( currentLexemeBuffer, ref absolutePosition, lineNumber, ref lexemeStartPositionInLine ); if (maybeToken.IsSome) { yield return(maybeToken.ValueUnsafe()); } yield return(source.Peek() .Some <IToken>(c => { switch (c) { case '/': var commentContent = source.ReadLine(); var commentToken = new CommentToken( $"/{commentContent}", absolutePosition, lineNumber, lexemeStartPositionInLine ); absolutePosition += (uint)commentContent.Length; lineNumber += 1; lexemeStartPositionInLine = 0; return commentToken; case '=': var notEqualsToken = new NotEqualsOperatorToken( absolutePosition, lineNumber, lexemeStartPositionInLine ); source.Read(); absolutePosition += 1; lexemeStartPositionInLine = 1; return notEqualsToken; default: return new DivideOperatorToken( (uint)source.BaseStream.Position, lineNumber, lexemeStartPositionInLine ); } }) .None(() => new DivideOperatorToken( (uint)source.BaseStream.Position, lineNumber, lexemeStartPositionInLine ))); absolutePosition += 1; lexemeStartPositionInLine += 1; break; case ':': maybeToken = FlushBuffer( currentLexemeBuffer, ref absolutePosition, lineNumber, ref lexemeStartPositionInLine ); if (maybeToken.IsSome) { yield return(maybeToken.ValueUnsafe()); } yield return(source.Peek() .Filter(c => c == '=') .Some <IToken>(c => { var result = new AssignmentOperatorToken( absolutePosition, lineNumber, lexemeStartPositionInLine ); source.Read(); absolutePosition += 1; lexemeStartPositionInLine += 1; return result; }) .None(new ColonSymbolToken( absolutePosition, lineNumber, lexemeStartPositionInLine ))); absolutePosition += 1; lexemeStartPositionInLine += 1; break; case '>': maybeToken = FlushBuffer( currentLexemeBuffer, ref absolutePosition, lineNumber, ref lexemeStartPositionInLine ); if (maybeToken.IsSome) { yield return(maybeToken.ValueUnsafe()); } yield return(source.Peek() .Filter(c => c == '=') .Some <IToken>(_ => { var result = new GeOperatorToken( absolutePosition, lineNumber, lexemeStartPositionInLine ); source.Read(); absolutePosition += 1; lexemeStartPositionInLine += 1; return result; }) .None(new GtOperatorToken( (uint)absolutePosition, lineNumber, lexemeStartPositionInLine ))); absolutePosition += 1; lexemeStartPositionInLine += 1; break; case '<': maybeToken = FlushBuffer( currentLexemeBuffer, ref absolutePosition, lineNumber, ref lexemeStartPositionInLine ); if (maybeToken.IsSome) { yield return(maybeToken.ValueUnsafe()); } yield return(source.Peek() .Filter(c => c == '=') .Some <IToken>(_ => { var result = new LeOperatorToken( absolutePosition, lineNumber, lexemeStartPositionInLine ); source.Read(); absolutePosition += 1; lexemeStartPositionInLine += 1; return result; }) .None(new LtOperatorToken( absolutePosition, lineNumber, lexemeStartPositionInLine ))); absolutePosition += 1; lexemeStartPositionInLine += 1; break; case '*': case '%': case '+': case '-': case '=': case ',': case '[': case ']': case '(': case ')': case ';': maybeToken = FlushBuffer( currentLexemeBuffer, ref absolutePosition, lineNumber, ref lexemeStartPositionInLine ); if (maybeToken.IsSome) { yield return(maybeToken.ValueUnsafe()); } yield return(SymbolLexemes .TryGetValue(((char)currentChar).ToString()) .Some(cons => cons( absolutePosition, lineNumber, lexemeStartPositionInLine )) .None(() => new UnrecognizedToken( currentChar.ToString(), absolutePosition, lineNumber, lexemeStartPositionInLine ) )); absolutePosition += 1; lexemeStartPositionInLine += 1; break; default: currentLexemeBuffer.Append(char.ConvertFromUtf32(currentChar)); break; } } maybeToken = FlushBuffer( currentLexemeBuffer, ref absolutePosition, lineNumber, ref lexemeStartPositionInLine ); if (maybeToken.IsSome) { yield return(maybeToken.ValueUnsafe()); } }