}//Run private void flush() { if ( (!isString) && (!isCommentBlock) && (!isCommentLine) && (buffer.Length == 0) ) { return; } string text = buffer.ToString(); object value = null; buffer.Length = 0; var type = JSONTokenType.tUnknown; if (isString) { type = JSONTokenType.tStringLiteral; if (!isVerbatim) { try //expand escapes { text = JSONStrings.UnescapeString(text); } catch (StringEscapeErrorException err) { lexer.EmitMessage(MessageType.Error, (int)JSONMsgCode.eInvalidStringEscape, tagStartPos, null, err.ErroredEscape); return; } } } else if (isCommentLine && isDirective)//directives treated similar to line comments { type = JSONTokenType.tDirective; } else if (isCommentBlock || isCommentLine) { type = JSONTokenType.tComment; } else { try { value = JSONNumbers.Convert(text, out type); } catch (ArgumentException err) { lexer.EmitMessage(MessageType.Error, (int)JSONMsgCode.eValueTooBig, tagStartPos, null, err.Message); return; } if (value == null) //not number { type = JSONKeywords.Resolve(text); if (type == JSONTokenType.tIdentifier) { if (text.StartsWith("$")) { text = text.Remove(0, 1); //take care of verbatim names like: $class, $method, $var etc.. tagStartPos = new SourcePosition(tagStartPos.LineNumber, tagStartPos.ColNumber + 1, tagStartPos.CharNumber + 1); } if (!JSONIdentifiers.Validate(text)) { lexer.EmitMessage(MessageType.Error, (int)JSONMsgCode.eInvalidIdentifier, tagStartPos, null, text); return; } } } //not number } //not comment if (type == JSONTokenType.tStringLiteral) { value = text; } tokens.Add(new JSONToken(lexer, type, tagStartPos, tagEndPos, text, value)); }
public IEnumerable <bool> Run() { const int YIELD_BATCH = 5; var prevTokenCount = 0; tokens.Add(new JSONToken( lexer, JSONTokenType.tBOF, srcPos(), srcPos(), String.Empty)); #region Main walk //======================================================================================================================= while (!source.EOF) { moveNext(); #region CRLF if ((chr == '\n') || (chr == '\r')) { if ((isString) && (!isVerbatim)) { lexer.EmitMessage(MessageType.Error, (int)JSONMsgCode.eUnterminatedString, srcPos()); yield break;//no further parsing } if ((isString && isVerbatim) || (isCommentBlock)) { bufferAdd(chr); } if (chr == '\n') { if ((!isString) && (!isCommentBlock)) { flush(); if (isCommentLine) { isCommentLine = false; isDirective = false; } freshLine = true; } posLine++; } posCol = 0; continue; } #endregion if (isString) { #region Inside String if (isVerbatim || (chr != '\\') || (nchr != '\\'))//take care of 'c:\\dir\\'; { //turn off strings if ( ((isVerbatim) && (chr == stringEnding) && (nchr == stringEnding)) || ((!isVerbatim) && (chr == '\\') && (nchr == stringEnding)) ) { //Verbatim: eat one extra: $"string ""test"" syntax" == string "test" syntax //Regular: eat "\" escape: "string \"test\" syntax" == string "test" syntax moveNext(); if (source.EOF) { lexer.EmitMessage(MessageType.Error, (int)JSONMsgCode.eUnterminatedString, srcPos()); yield break;//stop further processing, as string did not terminate but EOF reached } } else if (chr == stringEnding) { flush(); isString = false; continue; // eat terminating string char } } else//take care of 'c:\\dir\\' { bufferAdd(chr); //preserve \ moveNext(); } #endregion }//in string else { #region Not Inside String if (!isCommentLine) { if (!isCommentBlock) { #region Not inside comments #region Turn On Comments //turn on comment block if (((chr == '/') || (chr == '|')) && (nchr == '*')) { flush(); isCommentBlock = true; commentBlockEnding = chr; moveNext(); continue; } //turn on comment line if ((chr == '/') && (nchr == '/')) { flush(); isCommentLine = true; moveNext(); continue; } //turn on comment line mode for directive //directives MUST be the first non-white char on the line if (freshLine && chr == '#') { flush(); isCommentLine = true; isDirective = true; continue; } #endregion #region Turn On Strings if ((chr == '$') && ((nchr == '"') || (nchr == '\''))) { flush(); isString = true; isVerbatim = true; stringEnding = nchr; moveNext(); continue; } if ((chr == '"') || (chr == '\'')) { flush(); isString = true; isVerbatim = false; stringEnding = chr; continue; } #endregion #region Syntactic Separators - Space, colons and Symbols if ((chr == ' ') || (chr == '\t')) //space or TAB { flush(); continue; //eat it } if ( (chr == ';') || (chr == '{') || (chr == '}') || (chr == '(') || (chr == ')') || (chr == '[') || (chr == ']') || (chr == ',') || (chr == ':') || ((chr == '.') && (!JSONIdentifiers.ValidateDigit(nchr))) ) { flush(); bufferAdd(chr); flush(); continue; } //Scientific numbers like: 2e+30, 45E-10 if (buffer.Length > 0 && JSONIdentifiers.ValidateDigit(buffer[0]) && (chr == 'e' || chr == 'E') && (nchr == '+' || nchr == '-') ) { bufferAdd(chr); //e moveNext(); bufferAdd(chr); //+ or - moveNext(); bufferAdd(chr); // add digit after + or - continue; } //for operators like -- /= += etc... if ((buffer.Length > 0) && (isSymbol(chr) != isSymbol(buffer[0]))) { flush(); } #endregion #endregion } else { #region Turn Off Comment Block if ((chr == '*') && (nchr == commentBlockEnding)) { flush(); isCommentBlock = false; moveNext(); continue; } #endregion } //block comments off } //NOT CommentLine #endregion }//not in string bufferAdd(chr); freshLine = false; //yield the batch of new tokens if (tokens.Count > prevTokenCount + YIELD_BATCH) { prevTokenCount = tokens.Count; yield return(true); } }//while //======================================================================================================================= #endregion flush(); //flush any remains #region Post-walk check if (tokens.Count < 2) { lexer.EmitMessage(MessageType.Error, (int)JSONMsgCode.ePrematureEOF, srcPos()); } if (isCommentBlock) { lexer.EmitMessage(MessageType.Error, (int)JSONMsgCode.eUnterminatedComment, srcPos()); } if (isString) { lexer.EmitMessage(MessageType.Error, (int)JSONMsgCode.eUnterminatedString, srcPos()); } #endregion tokens.Add(new JSONToken(lexer, JSONTokenType.tEOF, new SourcePosition(posLine, posCol, posChar), new SourcePosition(posLine, posCol, posChar), String.Empty)); yield return(true); yield break; }//Run