/// <summary> /// Converts R code block as it appears in R Markdown to legal R. /// Drops options setting block '{r, x = FALSE, ...}. /// Allows for curly brace nesting. /// </summary> /// <param name="content"></param> /// <remarks> /// http://rmarkdown.rstudio.com/developer_parameterized_reports.html#accessing_from_r /// </remarks> public static string GetRContentFromMarkdownCodeBlock(string content) { // Locate start of the block var start = content.IndexOfIgnoreCase("{r"); if (start >= 0) { // Locate the closing curly brace var bc = new BraceCounter<char>('{', '}'); var end = start; bc.CountBrace(content[end]); while (bc.Count > 0 && end < content.Length) { end++; bc.CountBrace(content[end]); } // Remove {r ... } if (end < content.Length && end > start) { content = content.Remove(start, end - start + 1); // Remove parameter lines like params$x as well var lines = content.Split(CharExtensions.LineBreakChars, StringSplitOptions.RemoveEmptyEntries); var sb = new StringBuilder(); for (int i = 0; i < lines.Length; i++) { var index = lines[i].IndexOfOrdinal("params$"); if (index < 0) { sb.Append(lines[i]); sb.Append(Environment.NewLine); } } content = sb.ToString(); } } return content; }
/// <summary> /// Converts R code block as it appears in R Markdown to legal R. /// Drops options setting block '{r, x = FALSE, ...}. /// Allows for curly brace nesting. /// </summary> /// <param name="content"></param> /// <remarks> /// http://rmarkdown.rstudio.com/developer_parameterized_reports.html#accessing_from_r /// </remarks> public static string GetRContentFromMarkdownCodeBlock(string content) { // Locate start of the block if (GetRCodeBlockSeparatorLength(content, out int start)) { // Locate the closing curly brace var bc = new BraceCounter <char>('{', '}'); var end = start; bc.CountBrace(content[end]); while (bc.Count > 0 && end < content.Length) { end++; bc.CountBrace(content[end]); } // Remove {r ... } if (end < content.Length && end > start) { content = content.Remove(start, end - start + 1); // Remove parameter lines like params$x as well var lines = content.Split(CharExtensions.LineBreakChars, StringSplitOptions.RemoveEmptyEntries); var sb = new StringBuilder(); for (int i = 0; i < lines.Length; i++) { var index = lines[i].IndexOfOrdinal("params$"); if (index < 0) { sb.Append(lines[i]); sb.Append(Environment.NewLine); } } content = sb.ToString(); } } return(content); }
public void BraceCounterTest_MultipleBraces() { BraceCounter<char> braceCounter = new BraceCounter<char>(new[] { '{', '}', '[', ']' }); string testString = " {[ { ] } } "; int[] expectedCount = { 0, 1, 2, 2, 3, 3, 2, 2, 1, 1, 0, 0 }; for (var i = 0; i < testString.Length; i++) { braceCounter.CountBrace(testString[i]); braceCounter.Count.Should().Be(expectedCount[i]); } }
/// <summary> /// Handles verbatim text content where there are no special characters /// apart from braces and pragmas. /// /// Verbatim text within an Rd file is a pure stream of text, uninterpreted /// by the parser, with the exceptions that braces must balance or be escaped, /// and % comments are recognized, and backslashes that could be interpreted /// as escapes must themselves be escaped. No markup macros are recognized /// within verbatim text. /// /// OK, here is a problem. "Could be interpreted as escapes". Such as when? /// What about % inside C-like printf formats? I think we just will ignore % /// and handle \ as keywords... /// /// NOTE: since % is confusing and can be a C-like format specification /// in \examples{ } that as far as I can see don't get % escaped, /// we won't be really handling % as comments here, /// /// https://developer.r-project.org/parseRd.pdf /// Verbatim text within an Rd file is a pure stream of text, uninterpreted /// by the parser, with the exceptions that braces must balance or be escaped, /// and % comments are recognized, and backslashes that could be interpreted /// as escapes must themselves be escaped. /// </summary> private void HandleVerbatimContent() { var braceCounter = new BraceCounter <char>(new[] { '{', '}', '[', ']' }); while (!_cs.IsEndOfStream()) { var handled = false; switch (_cs.CurrentChar) { case '\\': handled = IsEscape(); if (handled) { _cs.Advance(2); } else { handled = HandleKeyword(); } break; case '%': // In 'verbatim' text we handke % as comment // when it is in the beginning of the file if (_cs.Position == 0 || _cs.PrevChar == '\r' || _cs.PrevChar == '\n') { handled = HandleComment(); } break; default: if (braceCounter.CountBrace(_cs.CurrentChar)) { handled = AddBraceToken(); if (braceCounter.Count == 0) { return; } } else if (_cs.CurrentChar == '#' && HandlePragma()) { continue; } break; } if (!handled) { _cs.MoveToNextChar(); } } }
public void BraceCounterTest_MultipleBraces() { BraceCounter <char> braceCounter = new BraceCounter <char>(new[] { '{', '}', '[', ']' }); string testString = " {[ { ] } } "; int[] expectedCount = { 0, 1, 2, 2, 3, 3, 2, 2, 1, 1, 0, 0 }; for (var i = 0; i < testString.Length; i++) { braceCounter.CountBrace(testString[i]); braceCounter.Count.Should().Be(expectedCount[i]); } }
private void HandleLatexContent(bool block) { BraceCounter <char> braceCounter = block ? new BraceCounter <char>(new char[] { '{', '}', '[', ']' }) : null; while (!_cs.IsEndOfStream()) { bool handled = false; // Regular content is Latex-like switch (_cs.CurrentChar) { case '%': handled = HandleComment(); break; case '\\': if (IsEscape()) { _cs.Advance(2); handled = true; } else { handled = HandleKeyword(); } break; case '#': handled = HandlePragma(); break; default: if (braceCounter != null && braceCounter.CountBrace(_cs.CurrentChar)) { handled = AddBraceToken(); if (braceCounter.Count == 0) { return; } } break; } if (!handled) { _cs.MoveToNextChar(); } } }
/// <summary> /// Locates boundaries of '{ }' block in the text. /// Takes into account curly brace nesting. /// </summary> /// <param name="text">Text to look into</param> public static ITextRange GetScopeBlockRange(this string text) { var start = text.IndexOf('{'); if (start < 0) { return TextRange.FromBounds(0, 0); } var bc = new BraceCounter<char>('{', '}'); var end = start; bc.CountBrace(text[end]); while (bc.Count > 0 && end < text.Length - 1) { end++; bc.CountBrace(text[end]); } return TextRange.FromBounds(start, end + 1); }
private static int SkipS3Method(RdParseContext context, ref int index) { var token = context.Tokens[index]; Debug.Assert(token.TokenType == RdTokenType.Keyword && context.TextProvider.GetText(token) == "\\method"); index++; for (var i = 0; i < 2; i++) { if (context.Tokens[index].TokenType == RdTokenType.OpenCurlyBrace) { index++; } if (context.Tokens[index].TokenType == RdTokenType.CloseCurlyBrace) { index++; } } // Should be past \method{...}{...}. Now skip signature var bc = new BraceCounter <char>(new [] { '(', ')' }); for (var i = context.Tokens[index - 1].End; i < context.TextProvider.Length; i++) { if (bc.CountBrace(context.TextProvider[i])) { if (bc.Count == 0) { // Calculate index of the next token after text position 'i' index = context.Tokens.Length - 1; for (var j = index; j < context.Tokens.Length; j++) { if (context.Tokens[j].Start >= i) { index = j; break; } } return(i + 1); } } } return(context.Tokens[index].End); }
/// <summary> /// Handles R-like content in RD. This includes handling # and ## /// as comments, counting brace nesting, handling "..." as string /// (not true in plain RD LaTeX-like content) and colorizing numbers /// by using actual R tokenizer. Now. there is a confusing part: /// "There are two types of comments in R-like mode. As elsewhere in /// Rd files, Rd comments start with %, and run to the end of the line." /// If that is so then $ in sprintf will beging RD comment which frankly /// doesn't make any sense fron the authoring/editing point of view. /// "% characters must be escaped even within strings, or they will be /// taken as Rd comments." Sure, but R engine doesn't do that when /// requesting help in Rd format. /// </summary> private void HandleRContent() { var braceCounter = new BraceCounter <char>(new[] { '{', '}', '[', ']' }); while (!_cs.IsEndOfStream()) { var handled = false; switch (_cs.CurrentChar) { case '\"': case '\'': handled = HandleRString(_cs.CurrentChar); break; case '\\': handled = IsEscape(); if (handled) { _cs.Advance(2); } else { handled = HandleKeyword(); } break; case '#': handled = HandlePragma(); if (!handled) { if (_cs.NextChar == '#') { // ## is always comment in R-like content handled = HandleComment(); } else { // With a sinle # it may or may not be comment. // For example, there are statements like \code{#}. // Heuristic is to treat text that contains {} or \ // as NOT a comment. var commentStart = _cs.Position; _cs.SkipToEol(); var commentText = _cs.Text.GetText(TextRange.FromBounds(commentStart, _cs.Position)); _cs.Position = commentStart; if (commentText.IndexOfAny(new[] { '{', '\\', '}' }) < 0) { handled = HandleComment(); } } } break; default: if (braceCounter.CountBrace(_cs.CurrentChar)) { handled = AddBraceToken(); if (braceCounter.Count == 0) { return; } } else { // Check if sequence is a candidate for a number. // The check is not perfect but numbers in R-like content // are typically very simple as R blocks are usually // code examples and do not contain exotic sequences. if (!char.IsLetter(_cs.PrevChar) && (_cs.IsDecimal() || _cs.CurrentChar == '-' || _cs.CurrentChar == '.')) { var sequenceStart = _cs.Position; _cs.SkipToWhitespace(); if (_cs.Position > sequenceStart) { var rt = new RTokenizer(); var candidate = _cs.Text.GetText(TextRange.FromBounds(sequenceStart, _cs.Position)); var rTokens = rt.Tokenize(candidate); if (rTokens.Count > 0 && rTokens[0].TokenType == RTokenType.Number) { if (_tokenizeRContent) { AddToken(RdTokenType.Number, sequenceStart + rTokens[0].Start, rTokens[0].Length); } _cs.Position = sequenceStart + rTokens[0].End; continue; } } _cs.Position = sequenceStart; } } break; } if (!handled) { _cs.MoveToNextChar(); } } }
public virtual List <AsbtractRenderCommand> Parse(FileLine line, IMessageHandler messageHandler) { var commands = new List <AsbtractRenderCommand>(); var beginSpan = line.CreateOriginalTextSpan(line.BeginNonSpace, line.BeginNonSpace + 1); //skip matched character var index = line.Text.IndexOfNot(beginSpan.End, line.End, CharExtensions.IsSpace); var indentationSpan = line.CreateOriginalTextSpan(beginSpan.End, index); if (indentationSpan.IsEmpty == false) { var endSpan = line.CreateOriginalTextSpan(indentationSpan.End, indentationSpan.End); commands.Add(new PushIndentationCommand(beginSpan, indentationSpan, endSpan)); } var end = index; while (index < line.End) { index = line.Text.IndexOf(index, line.End, (ch) => ch == '#'); if (index == line.End) { // reached line end break; } if ((index + 1) >= line.End) { break; } switch (line.Text[index + 1]) { case '#': { var textSpan = line.CreateOriginalTextSpan(end, index); if (textSpan.IsEmpty == false) { var textEndSpan = line.CreateOriginalTextSpan(index, index); commands.Add(new TextCommand(textSpan, textEndSpan)); } var escapeBegin = (index + 1); //skip # var escapeSpan = line.CreateOriginalTextSpan(escapeBegin, escapeBegin + 1); if (escapeSpan.IsEmpty == false) { var escapeEndSpan = line.CreateOriginalTextSpan(escapeSpan.End, escapeSpan.End); commands.Add(new TextCommand(escapeSpan, escapeEndSpan)); } index = end = (escapeBegin + 1); continue; } case '{': { var textSpan = line.CreateOriginalTextSpan(end, index); if (textSpan.IsEmpty == false) { var textEndSpan = line.CreateOriginalTextSpan(index, index); commands.Add(new TextCommand(textSpan, textEndSpan)); } var expressionBegin = (index + 1); var expressionEnd = BraceCounter.MatchBraces(line.Text, expressionBegin, line.End); if (expressionEnd == line.End) { index = end = line.End; var errorPosition = new TextPosition(line.Position.Line, 1 + (line.End - line.Begin)); messageHandler.Message(TraceLevel.Error, "Missing '}'.", line.Position.Name, errorPosition); break; } var expressionBeginSpan = line.CreateOriginalTextSpan(index, expressionBegin + 1); var expressionSpan = line.CreateOriginalTextSpan(expressionBeginSpan.End, expressionEnd); if (expressionSpan.IsEmpty == false) { var expressionEndSpan = line.CreateOriginalTextSpan(expressionSpan.End, expressionEnd + 1); commands.Add(new ExpressionCommand(expressionBeginSpan, expressionSpan, expressionEndSpan)); } index = end = (expressionEnd + 1); continue; } default: { index = end = (index + 2); continue; } } } var lastTextSpan = line.CreateOriginalTextSpan(end, line.End); if (lastTextSpan.IsEmpty == false) { var textEndSpan = line.CreateOriginalTextSpan(line.End, line.End); commands.Add(new TextCommand(lastTextSpan, textEndSpan)); } if (indentationSpan.IsEmpty == false) { var popIndentationSpan = line.CreateOriginalTextSpan(line.End, line.End); commands.Add(new PopIndentationCommand(popIndentationSpan)); } return(commands); }
public List <AsbtractRenderCommand> Parse(FileLine line, IMessageHandler messageHandler) { var commands = new List <AsbtractRenderCommand>(); while (true) { var index = (line.BeginNonSpace + 1); // Skip # // Find 'pragma' string preproDirective; index = this.MatchNextToken(line.Text, index, line.End, out preproDirective); if (index == line.End) { break; } if (string.Compare(preproDirective, "pragma", StringComparison.Ordinal) != 0) { break; } // Find 'include' string pragmaName; index = this.MatchNextToken(line.Text, index, line.End, out pragmaName); if (index == line.End) { break; } if (string.Compare(pragmaName, "include", StringComparison.Ordinal) != 0) { break; } // Find '"<Filename>"' var pragmaArgBegin = line.Text.IndexOf(index, line.End, ch => ch == '"'); if (pragmaArgBegin == line.End) { break; } var pragmaArgEnd = BraceCounter.FindQuoteEnd(line.Text, index, line.End); if (pragmaArgEnd == line.End) { break; } // Extract <Filename> from '"<Filename>"' string pragmaArgument = line.Text.Substring(pragmaArgBegin + 1, (pragmaArgEnd - pragmaArgBegin - 1)); var pragmaSpan = line.CreateOriginalTextSpan(line.Begin, line.End); if (pragmaSpan.IsEmpty == false) { var endSpan = line.CreateOriginalTextSpan(line.End, line.End); commands.Add(new PragmaCommand(pragmaSpan, endSpan, pragmaName, pragmaArgument)); } index = (pragmaArgEnd + 1); break; } // No pragma detected, pass line through if (commands.Count == 0) { var scriptSpan = line.CreateOriginalTextSpan(line.Begin, line.End); if (scriptSpan.IsEmpty == false) { var scriptEndSpan = line.CreateOriginalTextSpan(line.End, line.End); commands.Add(new ScriptCommand(scriptSpan, scriptEndSpan)); } } return(commands); }
private static int SkipS3Method(RdParseContext context, ref int index) { RdToken token = context.Tokens[index]; Debug.Assert(token.TokenType == RdTokenType.Keyword && context.TextProvider.GetText(token) == "\\method"); index++; for (int i = 0; i < 2; i++) { if (context.Tokens[index].TokenType == RdTokenType.OpenCurlyBrace) { index++; } if (context.Tokens[index].TokenType == RdTokenType.CloseCurlyBrace) { index++; } } // Should be past \method{...}{...}. Now skip signature BraceCounter<char> bc = new BraceCounter<char>(new char[] { '(', ')' }); for (int i = context.Tokens[index - 1].End; i < context.TextProvider.Length; i++) { if (bc.CountBrace(context.TextProvider[i])) { if (bc.Count == 0) { // Calculate index of the next token after text position 'i' index = context.Tokens.Length - 1; for (int j = index; j < context.Tokens.Length; j++) { if (context.Tokens[j].Start >= i) { index = j; break; } } return i + 1; } } } return context.Tokens[index].End; }