/// <summary> /// Reads and tokenizes a line of input. /// </summary> /// <returns>None if we're at the end of the input stream; otherwise, the /// possibly empty list of tokens.</returns> private Maybe <IReadOnlyList <string> > ReadInput() { const TokenizerOptions tokenizerOptions = TokenizerOptions.HandleDoubleQuoteAsTokenDelimiter; var line = _client.ReadLine(); // Return None if we're at the end of the input stream. if (line == null) { return(new None()); } // Preprocess the line. line = Preprocess(line); try { // Parse the string into tokens. return(StringUtilities.Tokenize(line, tokenizerOptions) .Select(token => token.ToString()) .ToArray()); } catch (ArgumentException ex) { _client.OnError(string.Format(CultureInfo.CurrentCulture, Strings.ExceptionWasThrownParsingInputLine, ex)); return(Array.Empty <string>()); } }
/// <summary> /// Generate possible completions for the specified command line. /// </summary> /// <param name="type">Type of the parsed arguments object.</param> /// <param name="commandLineToComplete">The command line to complete. Both single and double quote /// characters are interpreted as denoting single tokens that may contain embedded whitespace.</param> /// <param name="charIndexOfCursor">Character index of the completion cursor. /// </param> /// <param name="tokensToSkip">Number of tokens to skip from start of command /// line.</param> /// <param name="options">Parsing options.</param> /// <returns>The candidate completions for the specified token. /// </returns> public static IEnumerable <string> GetCompletions(Type type, string commandLineToComplete, int charIndexOfCursor, int tokensToSkip, CommandLineParserOptions options) { const TokenizerOptions tokenizerOptions = TokenizerOptions.AllowPartialInput | TokenizerOptions.HandleDoubleQuoteAsTokenDelimiter | TokenizerOptions.HandleSingleQuoteAsTokenDelimiter; var tokens = StringUtilities.Tokenize(commandLineToComplete, tokenizerOptions).ToList(); int index; for (index = 0; index < tokens.Count; ++index) { var token = tokens[index]; if (charIndexOfCursor >= token.OuterStartingOffset && charIndexOfCursor <= token.OuterEndingOffset) { break; } } if (index < tokensToSkip) { return(Enumerable.Empty <string>()); } return(GetCompletions( type, tokens.Skip(tokensToSkip).Select(t => t.ToString()), index - tokensToSkip, options)); }
private Tokenizer MakeTokenizer(PythonLanguageVersion version, TokenizerOptions optionSet, StringReader reader, SourceLocation?initialSourceLocation = null) { var tokenizer = new Tokenizer(version, options: optionSet); tokenizer.Initialize(null, reader, initialSourceLocation ?? SourceLocation.MinValue); return(tokenizer); }
/// <summary> /// Initializes a new instance of the <see cref="StringTokenizer"/> class. /// </summary> /// <param name="text">The text for tokenizing.</param> /// <param name="start">The start index.</param> /// <param name="end">The end index.</param> /// <param name="localTextOffset">The local text offset.</param> /// <param name="options">The options.</param> public StringTokenizer(string text, int start, int end, int localTextOffset, TokenizerOptions options) : base(localTextOffset, options) { this.Text = text; this.Start = start; this.End = end; this.Next = this.Start; }
/// <summary> /// Initializes a new instance of the <see cref="TokenizingEnumerator"/> struct. /// </summary> /// <param name="value">The value to tokenize.</param> /// <param name="tokenizerOptions">The tokenizer options.</param> public TokenizingEnumerator(ReadOnlySpan <char> value, TokenizerOptions?tokenizerOptions = null) { _tokenizerOptions = tokenizerOptions ?? new TokenizerOptions(); _isInCombinedShortNameSegment = default; _segment = default; _splitEnumerator = new SpanSplitEnumerator(value, _tokenizerOptions); _current = default; }
public void Test1() { var input = "ปลาที่ใหญ่ที่สุดในโลกคือปารีสชุบแป้งทอด"; var expected = GlobalExpectedResult.GetExpectedResult1(); var options = new TokenizerOptions { MatchingMode = MatchingMode.Longest }; var tokenizer = new ThaiTokenizer(options); Verify(tokenizer, input, expected); }
public void Test2() { var input = "เจริญ"; var expected = new List <string> { "เจริญ" }; var options = new TokenizerOptions { MatchingMode = MatchingMode.Shortest, PreferDecodableWord = true }; var tokenizer = new ThaiTokenizer(options); Verify(tokenizer, input, expected); }
/// <summary> /// Initializes a new instance of the <see cref="ConsentCheckingPreExecutionEvent"/> class. /// </summary> /// <param name="privacy">The privacy service.</param> /// <param name="commandService">The command service.</param> /// <param name="options">The responder options.</param> /// <param name="interactionAPI">The interaction API.</param> /// <param name="tokenizerOptions">The tokenizer options.</param> /// <param name="treeSearchOptions">The tree search options.</param> /// <param name="feedback">The feedback service.</param> public ConsentCheckingPreExecutionEvent ( PrivacyService privacy, CommandService commandService, IOptions <CommandResponderOptions> options, IDiscordRestInteractionAPI interactionAPI, IOptions <TokenizerOptions> tokenizerOptions, IOptions <TreeSearchOptions> treeSearchOptions, FeedbackService feedback ) { _privacy = privacy; _commandService = commandService; _interactionAPI = interactionAPI; _feedback = feedback; _options = options.Value; _tokenizerOptions = tokenizerOptions.Value; _treeSearchOptions = treeSearchOptions.Value; }
public TokenParser(TokenizerOptions options) { log = LogProvider.For <TokenParser>(); Options = options; transformers = new List <Type>(); validators = new List <Type>(); // Add default transformers/validators RegisterTransformer <ToDateTimeTransformer>(); RegisterTransformer <ToDateTimeUtcTransformer>(); RegisterTransformer <ToLowerTransformer>(); RegisterTransformer <ToUpperTransformer>(); RegisterTransformer <TrimTransformer>(); RegisterTransformer <SubstringAfterTransformer>(); RegisterTransformer <SubstringBeforeTransformer>(); RegisterTransformer <SetTransformer>(); RegisterTransformer <ReplaceTransformer>(); RegisterTransformer <RemoveTransformer>(); RegisterTransformer <SubstringAfterLastTransformer>(); RegisterTransformer <SubstringBeforeLastTransformer>(); RegisterTransformer <RemoveEndTransformer>(); RegisterTransformer <RemoveStartTransformer>(); RegisterTransformer <SplitTransformer>(); RegisterValidator <IsNumericValidator>(); RegisterValidator <MaxLengthValidator>(); RegisterValidator <MinLengthValidator>(); RegisterValidator <IsDomainNameValidator>(); RegisterValidator <IsPhoneNumberValidator>(); RegisterValidator <IsEmailValidator>(); RegisterValidator <IsUrlValidator>(); RegisterValidator <IsLooseUrlValidator>(); RegisterValidator <IsLooseAbsoluteUrlValidator>(); RegisterValidator <IsDateTimeValidator>(); RegisterValidator <IsNotEmptyValidator>(); RegisterValidator <IsNotValidator>(); RegisterValidator <StartsWithValidator>(); RegisterValidator <EndsWithValidator>(); RegisterValidator <ContainsValidator>(); }
internal void RetainsQuotationMarksCorrectly ( string value, IEnumerable <TokenType> expectedTokenTypes, IEnumerable <string> expectedTokenValues ) { var actualTokenTypes = new List <TokenType>(); var actualTokenValues = new List <string>(); var tokenizerOptions = new TokenizerOptions(RetainQuotationMarks: true); foreach (var token in new TokenizingEnumerator(value, tokenizerOptions)) { actualTokenTypes.Add(token.Type); actualTokenValues.Add(token.Value.ToString()); } Assert.Equal(expectedTokenTypes, actualTokenTypes); Assert.Equal(expectedTokenValues, actualTokenValues); }
public TokenParser(TokenizerOptions options) { log = LogProvider.For <TokenParser>(); Options = options; transformers = new List <Type>(); validators = new List <Type>(); // Add default transformers/validators RegisterTransformer <ToDateTimeTransformer>(); RegisterTransformer <ToDateTimeUtcTransformer>(); RegisterTransformer <ToLowerTransformer>(); RegisterTransformer <ToUpperTransformer>(); RegisterTransformer <TrimTransformer>(); RegisterTransformer <SubstringAfterTransformer>(); RegisterTransformer <SubstringBeforeTransformer>(); RegisterValidator <IsNumericValidator>(); RegisterValidator <MaxLengthValidator>(); RegisterValidator <MinLengthValidator>(); }
private static List <TokenWithSpan> TestOneString(PythonLanguageVersion version, TokenizerOptions optionSet, string originalText) { StringBuilder output = new StringBuilder(); var tokenizer = new Tokenizer(version, options: optionSet); tokenizer.Initialize(new StringReader(originalText)); Token token; int prevOffset = 0; List <TokenWithSpan> tokens = new List <TokenWithSpan>(); while ((token = tokenizer.GetNextToken()) != Tokens.EndOfFileToken) { tokens.Add(new TokenWithSpan(token, tokenizer.TokenSpan)); output.Append(tokenizer.PrecedingWhiteSpace); output.Append(token.VerbatimImage); const int contextSize = 50; for (int i = prevOffset; i < originalText.Length && i < output.Length; i++) { if (originalText[i] != output[i]) { // output some context StringBuilder x = new StringBuilder(); StringBuilder y = new StringBuilder(); StringBuilder z = new StringBuilder(); for (int j = Math.Max(0, i - contextSize); j < Math.Min(Math.Min(originalText.Length, output.Length), i + contextSize); j++) { x.AppendRepr(originalText[j]); y.AppendRepr(output[j]); if (j == i) { z.Append("^"); } else { z.Append(" "); } } Console.WriteLine("Mismatch context at {0}:", i); Console.WriteLine("Original: {0}", x.ToString()); Console.WriteLine("New : {0}", y.ToString()); Console.WriteLine("Differs : {0}", z.ToString()); Console.WriteLine("Token : {0}", token); Assert.AreEqual(originalText[i], output[i], String.Format("Characters differ at {0}, got {1}, expected {2}", i, output[i], originalText[i])); } } prevOffset = output.Length; } output.Append(tokenizer.PrecedingWhiteSpace); Assert.AreEqual(originalText.Length, output.Length); return(tokens); }
private void ParseTokenName(PreTemplate template, ref PreToken token, PreTokenEnumerator enumerator, ref FlatTokenParserState state, ref bool inFrontMatterToken, ref StringBuilder tokenContent, TokenizerOptions options) { var next = enumerator.Next(); var peek = enumerator.Peek(); tokenContent.Append(next); switch (next) { case "{": throw new ParsingException($"Unexpected character '{{' in token '{token.Name}'", enumerator); case "}": if (inFrontMatterToken) { throw new ParsingException($"Invalid character '{next}' in token '{token.Name}'", enumerator); } else { AppendToken(template, token, ref tokenContent, options); token = new PreToken(); state = FlatTokenParserState.InPreamble; } break; case "$": token.TerminateOnNewline = true; switch (peek) { case " ": case "?": case "*": case "}": case ":": case "!": break; default: throw new ParsingException($"Invalid character '{peek}' in token '{token.Name}'", enumerator); } break; case "?": token.Optional = true; switch (peek) { case " ": case "$": case "*": case "}": case ":": case "!": break; default: throw new ParsingException($"Invalid character '{peek}' in token '{token.Name}'", enumerator); } if (token.Required) { throw new ParsingException($"Required token {token.Name} can't be Optional", enumerator); } break; case "*": token.Repeating = true; token.Optional = true; switch (peek) { case " ": case "$": case "?": case "}": case ":": case "!": break; default: throw new ParsingException($"Invalid character '{peek}' in token '{token.Name}'", enumerator); } break; case "!": token.Required = true; switch (peek) { case " ": case "*": case "$": case "?": case "}": case ":": break; default: throw new ParsingException($"Invalid character '{peek}' in token '{token.Name}'", enumerator); } if (token.Optional) { throw new ParsingException($"Optional token {token.Name} can't be Required", enumerator); } break; case ":": state = FlatTokenParserState.InDecorator; break; case "=": state = FlatTokenParserState.InTokenValue; break; case " ": switch (peek) { case " ": case "*": case "$": case "?": case "}": case ":": case "!": case "=": break; case "\n" when inFrontMatterToken: break; default: if (string.IsNullOrWhiteSpace(token.Name) == false) { throw new ParsingException($"Invalid character '{peek}' in token '{token.Name}'", enumerator); } break; } break; case "\n": if (inFrontMatterToken) { token.IsFrontMatterToken = true; AppendToken(template, token, ref tokenContent, options); token = new PreToken(); inFrontMatterToken = false; state = FlatTokenParserState.InFrontMatter; } else { throw new ParsingException($"Invalid character '{next}' in token '{token.Name}'", enumerator); } break; default: if (ValidTokenNameCharacters.Contains(next)) { token.AppendName(next); } else { throw new ParsingException($"Invalid character '{next}' in token '{token.Name}'", enumerator); } break; } }
private void ParseDecorator(PreTemplate template, ref PreToken token, PreTokenEnumerator enumerator, ref FlatTokenParserState state, ref PreTokenDecorator decorator, ref bool inFrontMatterToken, ref StringBuilder tokenContent, TokenizerOptions options) { var next = enumerator.Next(); tokenContent.Append(next); if (string.IsNullOrWhiteSpace(next)) { if (inFrontMatterToken == false) { return; } if (next != "\n") { return; } } switch (next) { case "}" when inFrontMatterToken == false: case "\n" when inFrontMatterToken: token.IsFrontMatterToken = inFrontMatterToken; AppendDecorator(enumerator, token, decorator); AppendToken(template, token, ref tokenContent, options); token = new PreToken(); decorator = new PreTokenDecorator(); if (inFrontMatterToken) { inFrontMatterToken = false; state = FlatTokenParserState.InFrontMatter; } else { state = FlatTokenParserState.InPreamble; } break; case ",": AppendDecorator(enumerator, token, decorator); decorator = new PreTokenDecorator(); break; case "(": state = FlatTokenParserState.InDecoratorArgument; break; case "}" when inFrontMatterToken: case "\n" when inFrontMatterToken == false: throw new ParsingException($"'{decorator.Name}' unexpected character: {next}", enumerator); case "!": if (string.IsNullOrWhiteSpace(decorator.Name)) { decorator.IsNotDecorator = true; } else { throw new ParsingException($"'{decorator.Name}' unexpected character: {next}", enumerator); } break; default: decorator.AppendName(next); break; } }
private static void TestOneFile(string filename, PythonLanguageVersion version, TokenizerOptions optionSet) { var originalText = File.ReadAllText(filename); TestOneString(version, optionSet, originalText); }
/// <summary> /// Generate completions for the "current" token in the specified input /// text. /// </summary> /// <param name="inputText">The input text string.</param> /// <param name="cursorIndex">The current cursor index into the string. /// </param> /// <param name="tokenCompleter">Token completion handler to invoke. /// </param> /// <param name="existingTokenStartIndex">Receives the start index of /// the current token.</param> /// <param name="existingTokenLength">Receives the length of the current /// token.</param> /// <returns>The generated completions.</returns> private static IReadOnlyList <string> Create(string inputText, int cursorIndex, ITokenCompleter tokenCompleter, out int existingTokenStartIndex, out int existingTokenLength) { const TokenizerOptions tokenizerOptions = TokenizerOptions.AllowPartialInput | TokenizerOptions.HandleDoubleQuoteAsTokenDelimiter; // // Try to parse the line. If we fail to parse it, then just // return immediately. // var tokens = StringUtilities.Tokenize(inputText, tokenizerOptions).ToList(); // // Figure out which token we're in // int tokenIndex; for (tokenIndex = 0; tokenIndex < tokens.Count; ++tokenIndex) { var token = tokens[tokenIndex]; if (cursorIndex > token.OuterEndingOffset) { continue; } if (cursorIndex >= token.OuterStartingOffset) { break; } // Insert an empty token here. tokens.Insert( tokenIndex, new Token(new Substring(inputText, cursorIndex, 0))); break; } if (tokenIndex < tokens.Count) { var token = tokens[tokenIndex]; existingTokenStartIndex = token.OuterStartingOffset; existingTokenLength = token.OuterLength; } else { existingTokenStartIndex = cursorIndex; existingTokenLength = 0; } // // Ask for completions. // var tokenStrings = tokens.Select(token => RemoveQuotes(token.ToString())).ToArray(); var completions = tokenCompleter.GetCompletions(tokenStrings, tokenIndex).ToList(); // If necessary quote! for (var j = 0; j < completions.Count; j++) { var completion = completions[j]; if (!completion.StartsWith(QuoteStr, StringComparison.OrdinalIgnoreCase)) { completions[j] = StringUtilities.QuoteIfNeeded(completions[j], QuoteChar); } } return(completions); }
public RawTemplate Parse(string pattern, TokenizerOptions options) { var template = new RawTemplate(); template.Options = options.Clone(); var enumerator = new RawTokenEnumerator(pattern); if (enumerator.IsEmpty) { return(template); } var state = FlatTokenParserState.AtStart; var token = new RawToken(); var decorator = new RawTokenDecorator(); var argument = string.Empty; var frontMatterName = new StringBuilder(); var frontMatterValue = new StringBuilder(); while (enumerator.IsEmpty == false) { switch (state) { case FlatTokenParserState.AtStart: ParseStart(enumerator, ref state); break; case FlatTokenParserState.InFrontMatter: ParseFrontMatter(enumerator, ref frontMatterName, ref state); break; case FlatTokenParserState.InFrontMatterComment: ParseFrontMatterComment(enumerator, ref state); break; case FlatTokenParserState.InFrontMatterOption: ParseFrontMatterOption(enumerator, ref frontMatterName, ref state); break; case FlatTokenParserState.InFrontMatterOptionValue: ParseFrontMatterOptionValue(template, enumerator, ref frontMatterName, ref frontMatterValue, ref state); break; case FlatTokenParserState.InPreamble: ParsePreamble(ref token, enumerator, ref state); break; case FlatTokenParserState.InTokenName: ParseTokenName(template, ref token, enumerator, ref state); break; case FlatTokenParserState.InDecorator: ParseDecorator(template, ref token, enumerator, ref state, ref decorator); break; case FlatTokenParserState.InDecoratorArgument: ParseDecoratorArgument(enumerator, ref state, ref decorator, ref argument); break; case FlatTokenParserState.InDecoratorArgumentSingleQuotes: ParseDecoratorArgumentInSingleQuotes(enumerator, ref state, ref decorator, ref argument); break; case FlatTokenParserState.InDecoratorArgumentDoubleQuotes: ParseDecoratorArgumentInDoubleQuotes(enumerator, ref state, ref decorator, ref argument); break; case FlatTokenParserState.InDecoratorArgumentRunOff: ParseDecoratorArgumentRunOff(enumerator, ref state, ref decorator, ref argument); break; default: throw new TokenizerException($"Unknown FlatTokenParserState: {state}"); } } // Append current token if it has contents // Note: allow empty token values, as these will serve to truncate the last // token in the template if (string.IsNullOrWhiteSpace(token.Preamble) == false) { template.Tokens.Add(token); } return(template); }
private void AppendToken(PreTemplate template, PreToken token, ref StringBuilder tokenContent, TokenizerOptions options) { token.Content = tokenContent.ToString(); token.Id = template.Tokens.Count + 1; token.IsNull = string.Compare(token.Name, "null", StringComparison.InvariantCultureIgnoreCase) == 0; if (options.TrimPreambleBeforeNewLine) { token.TrimPreambleBeforeNewLine(); } if (options.TerminateOnNewline) { token.TerminateOnNewline = true; } tokenContent.Clear(); var preamble = GetRepeatingMultilinePreamble(token); if (string.IsNullOrEmpty(preamble) == false && token.Repeating) { token.Repeating = false; template.Tokens.Add(token); var repeat = new PreToken { Optional = true, Repeating = true, TerminateOnNewline = token.TerminateOnNewline, Content = token.Content }; repeat.AppendName(token.Name); repeat.AppendPreamble(preamble); repeat.AppendDecorators(token.Decorators); repeat.Id = template.Tokens.Count + 1; repeat.DependsOnId = token.Id; template.Tokens.Add(repeat); } else { template.Tokens.Add(token); } }
/// <summary> /// Initializes a new instance of the <see cref="StringTokenizer"/> class. /// </summary> /// <param name="text">The text for tokenizing.</param> /// <param name="localTextOffset">The local text offset.</param> /// <param name="options">The options.</param> public StringTokenizer(string text, int localTextOffset, TokenizerOptions options) : this(text, 0, text.Length, localTextOffset, options) { }
/// <summary> /// Tokenizes the provided input text line, observing quotes. /// </summary> /// <param name="line">Input line to parse.</param> /// <param name="options">Options for tokenizing.</param> /// <returns>Enumeration of tokens.</returns> public static IEnumerable <Token> Tokenize(string line, TokenizerOptions options) { // // State variables. // // This should be true if the current token started with a quote // character, regardless of whether we're still "inside" the quotes. var quoted = false; // This should be non-null only if we're in a quoted token and we // haven't yet seen the end quote character. When non-null, its // value should be the specific quote character that opened the // token. char?inQuotes = null; // This should be true if an end quote was present in this token. // It would be false if "partial input" is allowed and the current // token starts with a quote character but has no end quote. var endQuotePresent = false; // The start index for the current token, or null if we haven't // yet seen any part of the next token. int?tokenStartIndex = null; // The end index for the current token, or null if we haven't seen // the end (yet). int?tokenEndIndex = null; // // Main loop. // // Iterate through each character of the input string, and then once // more after having reached the end of string so we can finalize // any last token in progress. for (var index = 0; index <= line.Length; ++index) { // If we've reached the end of the input string or a whitespace // character, then this may be the end of the token. Remember, // though, that we need to skip past whitespace embedded within // a quoted token. if ((index == line.Length) || char.IsWhiteSpace(line[index])) { var completeToken = false; // If we're in the middle of parsing a token (i.e. we've // either seen the open quotes for a quoted token, or // we've seen at least one non-whitespace character for // all other tokens), and if we're not currently still // inside the quotes of a quoted token, then this must // be the end of the token. if (tokenStartIndex.HasValue && !inQuotes.HasValue) { completeToken = true; endQuotePresent = quoted; } // Otherwise, if we're at the end of the input string, // we're still inside the quotes from the last token, // and we were told by our caller to allow partial input, // then end the token here but make a note that we did // *not* see the end quote for this last token. else if ((index == line.Length) && inQuotes.HasValue && options.HasFlag(TokenizerOptions.AllowPartialInput)) { Debug.Assert(tokenStartIndex.HasValue); completeToken = true; } // If this is the end of a token, then it's time to yield // it to our caller and reset our internal state for the // next iteration. if (completeToken) { if (!tokenEndIndex.HasValue) { tokenEndIndex = index; } yield return(new Token( new Substring(line, tokenStartIndex.Value, tokenEndIndex.Value - tokenStartIndex.Value), quoted, endQuotePresent)); tokenStartIndex = null; tokenEndIndex = null; quoted = false; inQuotes = null; endQuotePresent = false; } } // Otherwise, specially handle quote characters. We'll need // to decide whether the quote character marks the beginning // or end of a quoted token, or if it's embedded in the middle // of an unquoted token, or if it's errant. else if ((line[index] == '\"' && options.HasFlag(TokenizerOptions.HandleDoubleQuoteAsTokenDelimiter)) || (line[index] == '\'' && options.HasFlag(TokenizerOptions.HandleSingleQuoteAsTokenDelimiter))) { // If we're not in the midst of parsing a token, then this // must be the start of a new token. Update the parse state // appropriately to reflect this. if (!tokenStartIndex.HasValue) { Debug.Assert(!inQuotes.HasValue); inQuotes = line[index]; quoted = true; tokenStartIndex = index + 1; } // Otherwise, we must be in the midst of parsing a token. // If we're still inside the quotes for the token, then // this may be the terminating quotes. Otherwise, we'll // fall through and just consider the quote character // a normal character embedded within the current token. else if (quoted) { Debug.Assert(inQuotes.HasValue); // If this quote character is different from the one // that opened this token, then we consider it a normal // character. if (inQuotes.Value != line[index]) { // Nothing to do here. } // If this quote character isn't the last in the // input string, and if the character following this // one is *not* a whitespace character, then we've // encountered something wrong. Unless we were told // to allow "partial input" (i.e. ignore errors like // these), we'll throw an exception. else if ((index + 1 != line.Length) && !char.IsWhiteSpace(line[index + 1])) { if (!options.HasFlag(TokenizerOptions.AllowPartialInput)) { throw new ArgumentOutOfRangeException(nameof(line), Strings.TerminatingQuotesNotEndOfToken); } } else { // Okay, this was the end quote for the token. // Mark it as such. inQuotes = null; endQuotePresent = true; tokenEndIndex = index; } } } // Otherwise, it's a normal character. It will end up in the // current token. If we're not in the midst of a token, then // it's time to start a new one here. else if (!tokenStartIndex.HasValue) { tokenStartIndex = index; } } // Now that we've gone past the end of the input string, check to // make sure we're not still inside quotes. If we are, and if we're // not allowing partial input, then we throw an exception. It's // bogus. if (tokenStartIndex.HasValue) { Debug.Assert(inQuotes.HasValue); Debug.Assert(!options.HasFlag(TokenizerOptions.AllowPartialInput)); throw new ArgumentOutOfRangeException(nameof(line), Strings.UnterminatedQuotes); } }
private void ParseTokenValueRunOff(PreTokenEnumerator enumerator, ref PreTemplate template, ref PreToken token, ref FlatTokenParserState state, ref bool inFrontMatterToken, ref StringBuilder tokenContent, TokenizerOptions options) { var next = enumerator.Next(); tokenContent.Append(next); if (string.IsNullOrWhiteSpace(next)) { if (inFrontMatterToken == false) { return; } if (next != "\n") { return; } } switch (next) { case ":": state = FlatTokenParserState.InDecorator; break; case "}" when inFrontMatterToken == false: case "\n" when inFrontMatterToken: token.IsFrontMatterToken = inFrontMatterToken; AppendToken(template, token, ref tokenContent, options); token = new PreToken(); if (inFrontMatterToken) { inFrontMatterToken = false; state = FlatTokenParserState.InFrontMatter; } else { state = FlatTokenParserState.InPreamble; } break; default: throw new TokenizerException($"Unexpected character: '{next}'"); } }
private Tokenizer MakeTokenizer(PythonLanguageVersion version, TokenizerOptions optionSet, string text, SourceLocation?initialSourceLocation = null) { return(MakeTokenizer(version, optionSet, new StringReader(text), initialSourceLocation)); }
private static List<TokenWithSpan> TestOneString(PythonLanguageVersion version, TokenizerOptions optionSet, string originalText) { StringBuilder output = new StringBuilder(); var tokenizer = new Tokenizer(version, options: optionSet); tokenizer.Initialize(new StringReader(originalText)); Token token; int prevOffset = 0; List<TokenWithSpan> tokens = new List<TokenWithSpan>(); while ((token = tokenizer.GetNextToken()) != Tokens.EndOfFileToken) { tokens.Add(new TokenWithSpan(token, tokenizer.TokenSpan)); output.Append(tokenizer.PreceedingWhiteSpace); output.Append(token.VerbatimImage); const int contextSize = 50; for (int i = prevOffset; i < originalText.Length && i < output.Length; i++) { if (originalText[i] != output[i]) { // output some context StringBuilder x = new StringBuilder(); StringBuilder y = new StringBuilder(); StringBuilder z = new StringBuilder(); for (int j = Math.Max(0, i - contextSize); j < Math.Min(Math.Min(originalText.Length, output.Length), i + contextSize); j++) { x.AppendRepr(originalText[j]); y.AppendRepr(output[j]); if (j == i) { z.Append("^"); } else { z.Append(" "); } } Console.WriteLine("Mismatch context at {0}:", i); Console.WriteLine("Original: {0}", x.ToString()); Console.WriteLine("New : {0}", y.ToString()); Console.WriteLine("Differs : {0}", z.ToString()); Console.WriteLine("Token : {0}", token); Assert.AreEqual(originalText[i], output[i], String.Format("Characters differ at {0}, got {1}, expected {2}", i, output[i], originalText[i])); } } prevOffset = output.Length; } output.Append(tokenizer.PreceedingWhiteSpace); Assert.AreEqual(originalText.Length, output.Length); return tokens; }
/// <summary> /// Initializes a new instance of the <see cref="ITokenizerBase"/> class. /// </summary> /// <param name="localTokenOffset">The local token offset.</param> /// <param name="options">The options.</param> protected ITokenizerBase(int localTokenOffset, TokenizerOptions options) { this.LocalTokenOffset = localTokenOffset; this.Options = options; }
private void ParseTokenValue(PreTemplate template, ref PreToken token, PreTokenEnumerator enumerator, ref FlatTokenParserState state, ref bool inFrontMatterToken, ref StringBuilder tokenContent, TokenizerOptions options) { var next = enumerator.Next(); var peek = enumerator.Peek(); tokenContent.Append(next); switch (next) { case "{": throw new ParsingException($"Unexpected character '{{' in token '{token.Name}'", enumerator); case "}" when inFrontMatterToken == false: case "\n" when inFrontMatterToken: token.IsFrontMatterToken = inFrontMatterToken; AppendToken(template, token, ref tokenContent, options); token = new PreToken(); if (inFrontMatterToken) { inFrontMatterToken = false; state = FlatTokenParserState.InFrontMatter; } else { state = FlatTokenParserState.InPreamble; } break; case ":": state = FlatTokenParserState.InDecorator; break; case "'": state = FlatTokenParserState.InTokenValueSingleQuotes; break; case "\"": state = FlatTokenParserState.InTokenValueDoubleQuotes; break; case " ": switch (peek) { case " ": case "}" when inFrontMatterToken == false: case "\n" when inFrontMatterToken: case ":": break; default: if (token.HasValue) { throw new ParsingException($"Invalid character '{peek}' in token '{token.Name}'", enumerator); } break; } break; case "}" when inFrontMatterToken: case "\n" when inFrontMatterToken == false: throw new ParsingException($"'{token.Name}' unexpected character: {next}", enumerator); default: token.AppendValue(next); break; } }