private Option <IToken> FlushBuffer( StringBuilder buffer, ref uint absolutePosition, uint lineNumber, ref uint lexemeStartPositionInLine ) { if (buffer.Length > 0) { var lexeme = buffer.ToString(); var result = KeywordToken.FromString( lexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) || IdentifierToken.FromString( lexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) || IntegerLiteralToken.FromString( lexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) || RealLiteralToken.FromString( lexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) || new UnrecognizedToken( lexeme, absolutePosition, lineNumber, lexemeStartPositionInLine ) ; buffer.Clear(); absolutePosition += (uint)lexeme.Length; lexemeStartPositionInLine += (uint)lexeme.Length; return(result); } else { return(Option <IToken> .None); } }
/// <summary> /// /// </summary> /// <param name="stream"></param> /// <returns></returns> /// <remarks> /// /// See https://www.dmtf.org/sites/default/files/standards/documents/DSP0221_3.0.1.pdf /// /// 7.6.1.1 Integer value /// /// No whitespace is allowed between the elements of the rules in this ABNF section. /// /// integerValue = binaryValue / octalValue / hexValue / decimalValue /// /// binaryValue = [ "+" / "-" ] 1*binaryDigit ( "b" / "B" ) /// binaryDigit = "0" / "1" /// /// octalValue = [ "+" / "-" ] unsignedOctalValue /// unsignedOctalValue = "0" 1*octalDigit /// octalDigit = "0" / "1" / "2" / "3" / "4" / "5" / "6" / "7" /// /// hexValue = [ "+" / "-" ] ( "0x" / "0X" ) 1*hexDigit /// hexDigit = decimalDigit / "a" / "A" / "b" / "B" / "c" / "C" / /// "d" / "D" / "e" / "E" / "f" / "F" /// /// decimalValue = [ "+" / "-" ] unsignedDecimalValue /// unsignedDecimalValue = "0" / positiveDecimalDigit *decimalDigit /// /// decimalDigit = "0" / positiveDecimalDigit /// positiveDecimalDigit = "1"..."9" /// /// 7.6.1.2 Real value /// /// No whitespace is allowed between the elements of the rules in this ABNF section. /// /// realValue = [ "+" / "-" ] *decimalDigit "." 1*decimalDigit /// [ ( "e" / "E" ) [ "+" / "-" ] 1*decimalDigit ] /// /// decimalDigit = "0" / positiveDecimalDigit /// positiveDecimalDigit = "1"..."9" /// /// </remarks> public static (Token, Lexer) ReadNumericLiteralToken(SourceReader reader) { int ParseBinaryValueDigits(IEnumerable <SourceChar> binaryChars, SourceChar sign) { return(ParseIntegerValueDigits(new Dictionary <char, int> { { '0', 0 }, { '1', 1 } }, 2, binaryChars, sign)); } int ParseOctalValueDigits(IEnumerable <SourceChar> octalChars, SourceChar sign) { return(ParseIntegerValueDigits(new Dictionary <char, int> { { '0', 0 }, { '1', 1 }, { '2', 2 }, { '3', 3 }, { '4', 4 }, { '5', 5 }, { '6', 6 }, { '7', 7 } }, 8, octalChars, sign)); } int ParseHexValueDigits(IEnumerable <SourceChar> hexChars, SourceChar sign) { return(ParseIntegerValueDigits(new Dictionary <char, int> { { '0', 0 }, { '1', 1 }, { '2', 2 }, { '3', 3 }, { '4', 4 }, { '5', 5 }, { '6', 6 }, { '7', 7 }, { '8', 8 }, { '9', 9 }, { 'a', 10 }, { 'b', 11 }, { 'c', 12 }, { 'd', 13 }, { 'e', 14 }, { 'f', 15 }, { 'A', 10 }, { 'B', 11 }, { 'C', 12 }, { 'D', 13 }, { 'E', 14 }, { 'F', 15 } }, 16, hexChars, sign)); } int ParseDecimalValueDigits(IEnumerable <SourceChar> decimalChars, SourceChar sign) { return(ParseIntegerValueDigits(new Dictionary <char, int> { { '0', 0 }, { '1', 1 }, { '2', 2 }, { '3', 3 }, { '4', 4 }, { '5', 5 }, { '6', 6 }, { '7', 7 }, { '8', 8 }, { '9', 9 } }, 10, decimalChars, sign)); } int ParseIntegerValueDigits(Dictionary <char, int> alphabet, int radix, IEnumerable <SourceChar> chars, SourceChar sign) { var literalValue = 0; foreach (var digit in chars) { var digitValue = alphabet[digit.Value]; literalValue = (literalValue * radix) + digitValue; } if (sign?.Value == '-') { literalValue = -literalValue; } return(literalValue); } const int stateLeadingSign = 1; const int stateFirstDigitBlock = 2; const int stateOctalOrDecimalValue = 3; const int stateBinaryValue = 4; const int stateOctalValue = 5; const int stateHexValue = 6; const int stateDecimalValue = 7; const int stateRealValue = 8; const int stateRealValueFraction = 9; const int stateRealValueExponent = 10; const int stateDone = 99; var thisReader = reader; var sourceChar = default(SourceChar); var sourceChars = new List <SourceChar>(); var token = default(Token); var signChar = default(SourceChar); var firstDigitBlock = new List <SourceChar>(); var currentState = stateLeadingSign; while (currentState != stateDone) { switch (currentState) { case stateLeadingSign: // we're reading the initial optional leading sign // [ "+" / "-" ] sourceChar = thisReader.Peek(); switch (sourceChar.Value) { case '+': case '-': (signChar, thisReader) = thisReader.Read(); sourceChars.Add(signChar); break; } currentState = stateFirstDigitBlock; break; case stateFirstDigitBlock: // we're reading the first block of digits in the value, // but we won't necessarily know which type we're reading // until we've consumed more of the input stream // // binaryValue => 1*binaryDigit // octalValue => "0" 1*octalDigit // hexValue => ( "0x" / "0X" ) // decimalValue => positiveDecimalDigit *decimalDigit // realValue => *decimalDigit // if (thisReader.Peek('.')) { // we're reading a realValue with no "*decimalDigit" characters before the "." // e.g. ".45", "+.45", "-.45", so consume the decimal point (sourceChar, thisReader) = thisReader.Read(); sourceChars.Add(sourceChar); // and go to the next state currentState = stateRealValueFraction; break; } // we don't know which base the value is in yet, but if it's hexadecimal them // we should be reading the "0x" part here, so restrict digits to decimal in // all cases (firstDigitBlock, thisReader) = thisReader.ReadWhile(StringValidator.IsDecimalDigit); sourceChars.AddRange(firstDigitBlock); // now we can do some validation if (firstDigitBlock.Count == 0) { // only realValue allows no digits in the first block, and // we've already handled that at the start of this case throw new UnexpectedCharacterException(sourceChar); } // if we've reached the end of the stream then there's no suffix // (e.g. b, B, x, X, .) so this must be an octalValue or decimalValue if (thisReader.Eof()) { currentState = stateOctalOrDecimalValue; break; } // check the next character to see if it tells us anything // about which type of literal we're reading sourceChar = thisReader.Peek(); switch (sourceChar.Value) { case 'b': case 'B': // binaryValue currentState = stateBinaryValue; break; case 'x': case 'X': // hexValue currentState = stateHexValue; break; case '.': // realValue currentState = stateRealValue; break; default: // by elmination, this must be an octalValue or decimalValue currentState = stateOctalOrDecimalValue; break; } break; case stateOctalOrDecimalValue: // we're reading an octalValue or decimalValue, but we're not sure which yet... if ((firstDigitBlock.First().Value == '0') && (firstDigitBlock.Count > 1)) { currentState = stateOctalValue; } else { currentState = stateDecimalValue; } break; case stateBinaryValue: // we're trying to read a binaryValue, so check all the characters in the digit block are valid, // i.e. 1*binaryDigit if (firstDigitBlock.Any(c => !StringValidator.IsBinaryDigit(c.Value))) { throw new UnexpectedCharacterException(sourceChar); } // all the characters are valid, so consume the suffix (sourceChar, thisReader) = thisReader.Read(c => (c == 'b') || (c == 'B')); sourceChars.Add(sourceChar); // now build the return value var binaryValue = ParseBinaryValueDigits(firstDigitBlock, signChar); token = new IntegerLiteralToken(SourceExtent.From(sourceChars), IntegerKind.BinaryValue, binaryValue); // and we're done currentState = stateDone; break; case stateOctalValue: // we're trying to read an octalValue (since decimalValue can't start with a // leading '0') so check all the characters in the digit block are valid, // i.e. "0" 1*octalDigit if ((firstDigitBlock.Count < 2) || (firstDigitBlock.First().Value != '0')) { throw new UnexpectedCharacterException(sourceChar); } if (firstDigitBlock.Skip(1).Any(c => !StringValidator.IsOctalDigit(c.Value))) { throw new UnexpectedCharacterException(sourceChar); } // now build the return value var octalValue = ParseOctalValueDigits(firstDigitBlock, signChar); token = new IntegerLiteralToken(SourceExtent.From(sourceChars), IntegerKind.OctalValue, octalValue); // and we're done currentState = stateDone; break; case stateHexValue: // we're trying to read a hexValue, so we should have just read a leading zero if ((firstDigitBlock.Count != 1) || (firstDigitBlock.First().Value != '0')) { throw new UnexpectedCharacterException(sourceChar); } // all the characters are valid, so consume the suffix (sourceChar, thisReader) = thisReader.Read(c => (c == 'x') || (c == 'X')); sourceChars.Add(sourceChar); // 1*hexDigit var hexDigits = default(List <SourceChar>); (hexDigits, thisReader) = thisReader.ReadWhile(StringValidator.IsHexDigit); if (hexDigits.Count == 0) { throw new UnexpectedCharacterException(thisReader.Peek()); } sourceChars.AddRange(hexDigits); // build the return value var hexValue = ParseHexValueDigits(hexDigits, signChar); token = new IntegerLiteralToken(SourceExtent.From(sourceChars), IntegerKind.HexValue, hexValue); // and we're done currentState = stateDone; break; case stateDecimalValue: // we're trying to read a decimalValue (since that's the only remaining option), // so check all the characters in the digit block are valid, // i.e. "0" / positiveDecimalDigit *decimalDigit if ((firstDigitBlock.Count == 1) && (firstDigitBlock.First().Value == '0')) { // "0" } else if (!StringValidator.IsPositiveDecimalDigit(firstDigitBlock.First().Value)) { throw new UnexpectedCharacterException(sourceChar); } else if (firstDigitBlock.Skip(1).Any(c => !StringValidator.IsDecimalDigit(c.Value))) { throw new UnexpectedCharacterException(sourceChar); } // build the return value var decimalValue = ParseDecimalValueDigits(firstDigitBlock, signChar); token = new IntegerLiteralToken(SourceExtent.From(sourceChars), IntegerKind.DecimalValue, decimalValue); // and we're done currentState = stateDone; break; case stateRealValue: // we're trying to read a realValue, so check all the characters in the digit block are valid, // i.e. *decimalDigit if (firstDigitBlock.Any(c => !StringValidator.IsDecimalDigit(c.Value))) { throw new UnexpectedCharacterException(sourceChar); } // all the characters are valid, so consume the decimal point (sourceChar, thisReader) = thisReader.Read('.'); sourceChars.Add(sourceChar); // and go to the next state currentState = stateRealValueFraction; break; case stateRealValueFraction: // 1*decimalDigit var realFractionDigits = default(List <SourceChar>); (realFractionDigits, thisReader) = thisReader.ReadWhile(StringValidator.IsHexDigit); if (realFractionDigits.Count == 0) { throw new UnexpectedCharacterException(thisReader.Peek()); } sourceChars.AddRange(realFractionDigits); // ( "e" / "E" ) if (!thisReader.Eof()) { sourceChar = thisReader.Peek(); if ((sourceChar.Value == 'e') || (sourceChar.Value == 'E')) { currentState = stateRealValueExponent; break; } } // build the return value var realIntegerValue = ParseDecimalValueDigits(firstDigitBlock, signChar); var realFractionValue = (double)ParseDecimalValueDigits(realFractionDigits, signChar); if (realFractionDigits.Any()) { realFractionValue = realFractionValue / Math.Pow(10, realFractionDigits.Count); } token = new RealLiteralToken( SourceExtent.From(sourceChars), realIntegerValue + realFractionValue ); // and we're done currentState = stateDone; break; case stateRealValueExponent: throw new InvalidOperationException(); case stateDone: // the main while loop should exit before we ever get here throw new InvalidOperationException(); default: throw new InvalidOperationException(); } } return(token, new Lexer(thisReader)); }
private static void AssertAreEqualInternal(Token expectedToken, Token actualToken, int index = -1) { if ((expectedToken == null) && (actualToken == null)) { return; } if (expectedToken == null) { Assert.Fail(LexerHelper.GetAssertErrorMessage("expected is null, but actual is not null", index)); } if (actualToken == null) { Assert.Fail(LexerHelper.GetAssertErrorMessage("expected is not null, but actual is null", index)); } Assert.AreEqual(expectedToken.GetType(), actualToken.GetType(), LexerHelper.GetAssertErrorMessage($"actual type does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.StartPosition.Position, actualToken.Extent.StartPosition.Position, LexerHelper.GetAssertErrorMessage($"actual Start Position does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.StartPosition.LineNumber, actualToken.Extent.StartPosition.LineNumber, LexerHelper.GetAssertErrorMessage($"actual Start Line does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.StartPosition.ColumnNumber, actualToken.Extent.StartPosition.ColumnNumber, LexerHelper.GetAssertErrorMessage($"actual Start Column does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.EndPosition.Position, actualToken.Extent.EndPosition.Position, LexerHelper.GetAssertErrorMessage($"actual End Position does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.EndPosition.LineNumber, actualToken.Extent.EndPosition.LineNumber, LexerHelper.GetAssertErrorMessage($"actual End Line does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.EndPosition.ColumnNumber, actualToken.Extent.EndPosition.ColumnNumber, LexerHelper.GetAssertErrorMessage($"actual End Column does not match expected value", index)); Assert.AreEqual(expectedToken.Extent.Text, actualToken.Extent.Text, LexerHelper.GetAssertErrorMessage($"actual Text does not match expected value", index)); switch (expectedToken) { case AliasIdentifierToken token: Assert.IsTrue( AliasIdentifierToken.AreEqual((AliasIdentifierToken)expectedToken, (AliasIdentifierToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case AttributeCloseToken token: Assert.IsTrue( AttributeCloseToken.AreEqual((AttributeCloseToken)expectedToken, (AttributeCloseToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case AttributeOpenToken token: Assert.IsTrue( AttributeOpenToken.AreEqual((AttributeOpenToken)expectedToken, (AttributeOpenToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case BlockCloseToken token: Assert.IsTrue( BlockCloseToken.AreEqual((BlockCloseToken)expectedToken, (BlockCloseToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case BlockOpenToken token: Assert.IsTrue( BlockOpenToken.AreEqual((BlockOpenToken)expectedToken, (BlockOpenToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case BooleanLiteralToken token: Assert.IsTrue( BooleanLiteralToken.AreEqual((BooleanLiteralToken)expectedToken, (BooleanLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case ColonToken token: Assert.IsTrue( ColonToken.AreEqual((ColonToken)expectedToken, (ColonToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case CommaToken token: Assert.IsTrue( CommaToken.AreEqual((CommaToken)expectedToken, (CommaToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case CommentToken token: Assert.IsTrue( CommentToken.AreEqual((CommentToken)expectedToken, (CommentToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case DotOperatorToken token: Assert.IsTrue( DotOperatorToken.AreEqual((DotOperatorToken)expectedToken, (DotOperatorToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case EqualsOperatorToken token: Assert.IsTrue( EqualsOperatorToken.AreEqual((EqualsOperatorToken)expectedToken, (EqualsOperatorToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case IdentifierToken token: Assert.IsTrue( IdentifierToken.AreEqual((IdentifierToken)expectedToken, (IdentifierToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case IntegerLiteralToken token: Assert.IsTrue( IntegerLiteralToken.AreEqual((IntegerLiteralToken)expectedToken, (IntegerLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case NullLiteralToken token: Assert.IsTrue( NullLiteralToken.AreEqual((NullLiteralToken)expectedToken, (NullLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case ParenthesisCloseToken token: Assert.IsTrue( ParenthesisCloseToken.AreEqual((ParenthesisCloseToken)expectedToken, (ParenthesisCloseToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case ParenthesisOpenToken token: Assert.IsTrue( ParenthesisOpenToken.AreEqual((ParenthesisOpenToken)expectedToken, (ParenthesisOpenToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case PragmaToken token: Assert.IsTrue( PragmaToken.AreEqual((PragmaToken)expectedToken, (PragmaToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case RealLiteralToken token: Assert.IsTrue( RealLiteralToken.AreEqual((RealLiteralToken)expectedToken, (RealLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case StatementEndToken token: Assert.IsTrue( StatementEndToken.AreEqual((StatementEndToken)expectedToken, (StatementEndToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case StringLiteralToken token: Assert.IsTrue( StringLiteralToken.AreEqual((StringLiteralToken)expectedToken, (StringLiteralToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; case WhitespaceToken token: Assert.IsTrue( WhitespaceToken.AreEqual((WhitespaceToken)expectedToken, (WhitespaceToken)actualToken), LexerHelper.GetAssertErrorMessage($"actual token does not match expected token", index) ); break; default: throw new NotImplementedException($"Cannot compare type '{expectedToken.GetType().Name}'"); } }
public RealValueAst(RealLiteralToken realLiteralToken) { this.RealLiteralToken = realLiteralToken ?? throw new ArgumentNullException(nameof(RealLiteralToken)); this.Value = realLiteralToken.Value; }