private Tuple <string, string> GetNextToken() { NNewlines = 0; if (ParserPos >= Input.Length) { return(new Tuple <string, string>("", "TK_EOF")); } WantedNewline = false; var c = Input[ParserPos]; ParserPos += 1; var keepWhitespace = Opts.KeepArrayIndentation && IsArray(Flags.Mode); if (keepWhitespace) { var whitespaceCount = 0; while (Whitespace.Contains(c)) { if (c == '\n') { TrimOutput(); Output.Add("\n"); JustAddedNewline = true; whitespaceCount = 0; } else if (c == '\t') { whitespaceCount += 4; } else if (c == '\r') { } else { whitespaceCount += 1; } if (ParserPos >= Input.Length) { return(new Tuple <string, string>("", "TK_EOF")); } c = Input[ParserPos]; ParserPos += 1; } if (JustAddedNewline) { foreach (var i in Enumerable.Range(0, whitespaceCount)) { Output.Add(" "); } } } else // not keep_whitespace { while (Whitespace.Contains(c)) { if (c == '\n') { if (Opts.MaxPreserveNewlines == 0 || Opts.MaxPreserveNewlines > NNewlines) { NNewlines += 1; } } if (ParserPos >= Input.Length) { return(new Tuple <string, string>("", "TK_EOF")); } c = Input[ParserPos]; ParserPos += 1; } if (Opts.PreserveNewlines && NNewlines > 1) { foreach (var i in Enumerable.Range(0, NNewlines)) { AppendNewline(i == 0); JustAddedNewline = true; } } WantedNewline = NNewlines > 0; } var cc = c.ToString(); if (Wordchar.Contains(c)) { if (ParserPos < Input.Length) { cc = c.ToString(); while (Wordchar.Contains(Input[ParserPos])) { cc += Input[ParserPos]; ParserPos += 1; if (ParserPos == Input.Length) { break; } } } // small and surprisingly unugly hack for 1E-10 representation if (ParserPos != Input.Length && "+-".Contains(Input[ParserPos]) && Regex.IsMatch(cc, "^[0-9]+[Ee]$")) { var sign = Input[ParserPos]; ParserPos++; var t = GetNextToken(); cc += sign + t.Item1; return(new Tuple <string, string>(cc, "TK_WORD")); } if (cc == "in") // in is an operator, need to hack { return(new Tuple <string, string>(cc, "TK_OPERATOR")); } if (WantedNewline && LastType != "TK_OPERATOR" && LastType != "TK_EQUALS" && !Flags.IfLine && (Opts.PreserveNewlines || LastText != "var")) { AppendNewline(); } return(new Tuple <string, string>(cc, "TK_WORD")); } if ("([".Contains(c)) { return(new Tuple <string, string>(c.ToString(), "TK_START_EXPR")); } if (")]".Contains(c)) { return(new Tuple <string, string>(c.ToString(), "TK_END_EXPR")); } if (c == '{') { return(new Tuple <string, string>(c.ToString(), "TK_START_BLOCK")); } if (c == '}') { return(new Tuple <string, string>(c.ToString(), "TK_END_BLOCK")); } if (c == ';') { return(new Tuple <string, string>(c.ToString(), "TK_SEMICOLON")); } if (c == '/') { var comment = ""; var commentMode = "TK_INLINE_COMMENT"; if (Input[ParserPos] == '*') // peek /* .. */ comment { ParserPos += 1; if (ParserPos < Input.Length) { while (!(Input[ParserPos] == '*' && ParserPos + 1 < Input.Length && Input[ParserPos + 1] == '/') && ParserPos < Input.Length) { c = Input[ParserPos]; comment += c; if ("\r\n".Contains(c)) { commentMode = "TK_BLOCK_COMMENT"; } ParserPos += 1; if (ParserPos >= Input.Length) { break; } } } ParserPos += 2; return(new Tuple <string, string>("/*" + comment + "*/", commentMode)); } if (Input[ParserPos] == '/') // peek // comment { comment = c.ToString(); while (!"\r\n".Contains(Input[ParserPos])) { comment += Input[ParserPos]; ParserPos += 1; if (ParserPos >= Input.Length) { break; } } if (WantedNewline) { AppendNewline(); } return(new Tuple <string, string>(comment, "TK_COMMENT")); } } if (c == '\'' || c == '"' || c == '/' && (LastType == "TK_WORD" && IsSpecialWord(LastText) || LastType == "TK_END_EXPR" && (Flags.PreviousMode == "(FOR-EXPRESSION)" || Flags.PreviousMode == "(COND-EXPRESSION)") || new[] { "TK_COMMENT", "TK_START_EXPR", "TK_START_BLOCK", "TK_END_BLOCK", "TK_OPERATOR", "TK_EQUALS", "TK_EOF", "TK_SEMICOLON", "TK_COMMA" }.Contains(LastType))) { var sep = c; var esc = false; var esc1 = 0; var esc2 = 0; var resultingString = c.ToString(); var inCharClass = false; if (ParserPos < Input.Length) { if (sep == '/') { // handle regexp inCharClass = false; while (esc || inCharClass || Input[ParserPos] != sep) { resultingString += Input[ParserPos]; if (!esc) { esc = Input[ParserPos] == '\\'; if (Input[ParserPos] == '[') { inCharClass = true; } else if (Input[ParserPos] == ']') { inCharClass = false; } } else { esc = false; } ParserPos += 1; if (ParserPos >= Input.Length) { // ncomplete regex when end-of-file reached // bail out with what has received so far return(new Tuple <string, string>(resultingString, "TK_STRING")); } } } else { // handle string while (esc || Input[ParserPos] != sep) { resultingString += Input[ParserPos]; if (esc1 != 0 && esc1 >= esc2) { if (!int.TryParse( new string(resultingString.Skip(Math.Max(0, resultingString.Count() - esc2)) .Take(esc2).ToArray()), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out esc1)) { esc1 = 0; } if (esc1 != 0 && esc1 >= 0x20 && esc1 <= 0x7e) { // FIXME resultingString = new string(resultingString.Take(2 + esc2).ToArray()); if ((char)esc1 == sep || (char)esc1 == '\\') { resultingString += '\\'; } resultingString += (char)esc1; } esc1 = 0; } if (esc1 != 0) { ++esc1; } else if (!esc) { esc = Input[ParserPos] == '\\'; } else { esc = false; } ParserPos += 1; if (ParserPos >= Input.Length) { // incomplete string when end-of-file reached // bail out with what has received so far return(new Tuple <string, string>(resultingString, "TK_STRING")); } } } } ParserPos += 1; resultingString += sep; if (sep == '/') { while (ParserPos < Input.Length && Wordchar.Contains(Input[ParserPos])) { resultingString += Input[ParserPos]; ParserPos += 1; } } return(new Tuple <string, string>(resultingString, "TK_STRING")); } if (c == '#') { var resultString = ""; // she-bang if (Output.Count == 0 && Input.Length > 1 && Input[ParserPos] == '!') { resultString = c.ToString(); while (ParserPos < Input.Length && c != '\n') { c = Input[ParserPos]; resultString += c; ParserPos += 1; } Output.Add(resultString.Trim() + '\n'); AppendNewline(); return(GetNextToken()); } // Spidermonkey-specific sharp variables for circular references // https://developer.mozilla.org/En/Sharp_variables_in_JavaScript // http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp around line 1935 var sharp = "#"; if (ParserPos < Input.Length && Digits.Contains(Input[ParserPos])) { while (true) { c = Input[ParserPos]; sharp += c; ParserPos += 1; if (ParserPos >= Input.Length || c == '#' || c == '=') { break; } } } if (c == '#' || ParserPos >= Input.Length) { // pass } else if (Input[ParserPos] == '[' && Input[ParserPos + 1] == ']') { sharp += "[]"; ParserPos += 2; } else if (Input[ParserPos] == '{' && Input[ParserPos + 1] == '}') { sharp += "{}"; ParserPos += 2; } return(new Tuple <string, string>(sharp, "TK_WORD")); } if (c == '<' && Input.Substring(ParserPos - 1, Math.Min(4, Input.Length - ParserPos + 1)) == "<!--") { ParserPos += 3; var ss = "<!--"; while (ParserPos < Input.Length && Input[ParserPos] != '\n') { ss += Input[ParserPos]; ParserPos += 1; } Flags.InHtmlComment = true; return(new Tuple <string, string>(ss, "TK_COMMENT")); } if (c == '-' && Flags.InHtmlComment && Input.Substring(ParserPos - 1, 3) == "-->") { Flags.InHtmlComment = false; ParserPos += 2; if (WantedNewline) { AppendNewline(); } return(new Tuple <string, string>("-->", "TK_COMMENT")); } if (c == '.') { return(new Tuple <string, string>(".", "TK_DOT")); } if (Punct.Contains(c.ToString())) { var ss = c.ToString(); while (ParserPos < Input.Length && Punct.Contains(ss + Input[ParserPos])) { ss += Input[ParserPos]; ParserPos += 1; if (ParserPos >= Input.Length) { break; } } if (ss == "=") { return(new Tuple <string, string>("=", "TK_EQUALS")); } if (ss == ",") { return(new Tuple <string, string>(",", "TK_COMMA")); } return(new Tuple <string, string>(ss, "TK_OPERATOR")); } return(new Tuple <string, string>(c.ToString(), "TK_UNKNOWN")); }