public static string GetText(RdParseContext context) { var text = string.Empty; if (RdParseUtility.GetKeywordArgumentBounds(context.Tokens, out var startTokenIndex, out var endTokenIndex)) { text = RdText.FromTokens(context, startTokenIndex, endTokenIndex); context.Tokens.Position = endTokenIndex; } text = text.Trim(); var index = text.IndexOf(@"\href", StringComparison.Ordinal); if (index >= 0) { var openCurlyIndex = text.IndexOf('{', index); var closeCurlyIndex = text.LastIndexOf('}', openCurlyIndex); openCurlyIndex = text.IndexOf('{', closeCurlyIndex); closeCurlyIndex = text.LastIndexOf('}', openCurlyIndex); if (openCurlyIndex >= 0 && closeCurlyIndex >= 0 && openCurlyIndex < closeCurlyIndex) { var name = text.Substring(openCurlyIndex + 1, closeCurlyIndex - openCurlyIndex - 1); text = text.Substring(0, index) + name + text.Substring(closeCurlyIndex + 1); } } return(text); }
/// <summary> /// Given RD data and function name parses the data and creates structured /// information about the function. Method returns multiple functions since /// RD data often provides information on several functions so in order /// to avoid processing same data multiple times parser extracts information /// on all related functions. /// </summary> public static IReadOnlyList <IFunctionInfo> GetFunctionInfos(string packageName, string rdHelpData) { var tokenizer = new RdTokenizer(false); var textProvider = new TextStream(rdHelpData); var tokens = tokenizer.Tokenize(textProvider, 0, textProvider.Length); var context = new RdParseContext(packageName, tokens, textProvider); return(ParseFunctions(context)); }
public static string GetHyperlinkName(RdParseContext context) { // \href{{http://rlang.tidyverse.org/articles/tidy-evaluation.html}{tidy evaluationframework}} var text = GetText(context); // yields {http://rlang.tidyverse.org/articles/tidy-evaluation.html}{tidy evaluationframework} var lastOpenCurlyIndex = text.LastIndexOf('{'); var lastCloseCurlyIndex = text.LastIndexOf('}'); if (lastOpenCurlyIndex >= 0 && lastCloseCurlyIndex >= 0 && lastOpenCurlyIndex < lastCloseCurlyIndex) { return(text.Substring(lastOpenCurlyIndex + 1, lastCloseCurlyIndex - lastOpenCurlyIndex - 1)); } return(string.Empty); }
public static string GetText(RdParseContext context) { string text = string.Empty; int startTokenIndex, endTokenIndex; if (RdParseUtility.GetKeywordArgumentBounds(context.Tokens, out startTokenIndex, out endTokenIndex)) { text = RdText.FromTokens(context, startTokenIndex, endTokenIndex); context.Tokens.Position = endTokenIndex; } return(text.Trim()); }
private static IEnumerable <IArgumentInfo> ParseArgumentItem(RdParseContext context) { List <IArgumentInfo> arguments = null; TokenStream <RdToken> tokens = context.Tokens; tokens.Advance(1); // Past '\item'. Inside { } we can find any number of '\dots' which are keywords. Debug.Assert(tokens.CurrentToken.TokenType == RdTokenType.OpenCurlyBrace); if (tokens.CurrentToken.TokenType == RdTokenType.OpenCurlyBrace) { int startTokenIndex, endTokenIndex; if (RdParseUtility.GetKeywordArgumentBounds(tokens, out startTokenIndex, out endTokenIndex)) { TextRange range = TextRange.FromBounds(tokens[startTokenIndex].End, tokens[endTokenIndex].Start); string argumentsText = context.TextProvider.GetText(range); string[] argumentNames = argumentsText.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries); arguments = new List <IArgumentInfo>(); // Move past \item{} tokens.Position = endTokenIndex + 1; Debug.Assert(tokens.CurrentToken.TokenType == RdTokenType.OpenCurlyBrace); if (tokens.CurrentToken.TokenType == RdTokenType.OpenCurlyBrace) { string description = RdText.GetText(context); foreach (string n in argumentNames) { string name = n.Trim(); if (name == @"\dots") { name = "..."; } ArgumentInfo info = new ArgumentInfo(name, description.Trim()); arguments.Add(info); } } } } return(arguments); }
public static string FromTokens(RdParseContext context, int startTokenIndex, int endTokenIndex) { Debug.Assert(startTokenIndex >= 0 && startTokenIndex < endTokenIndex); // Clean descripton so it only consists of plain text var sb = new StringBuilder(); for (int i = startTokenIndex; i < endTokenIndex; i++) { var range = TextRange.FromBounds(context.Tokens[i].End, context.Tokens[i + 1].Start); var s = context.TextProvider.GetText(range); s = CleanRawRdText(s); sb.Append(s); } return(sb.ToString()); }
/// <summary> /// Extracts R-parseable text from RD \usage{...} block. /// RD text may contain \dots sequence which denotes ellipsis. /// R parser does not know about it and hence we must replace \dots by ... /// Also, signatures may contain S3 method info like /// '\method{as.matrix}{data.frame}(x, rownames.force = NA, \dots)' /// which we need to filter out since they are irrelevant to intellisense. /// </summary> private static string GetRText(RdParseContext context, int startTokenIndex, int endTokenIndex) { var sb = new StringBuilder(); for (var i = startTokenIndex; i < endTokenIndex; i++) { int fragmentStart; int fragmentEnd; var token = context.Tokens[i]; if (token.TokenType == RdTokenType.Keyword && context.TextProvider.GetText(token) == "\\method") { fragmentStart = SkipS3Method(context, ref i); fragmentEnd = context.Tokens[i].Start; } else { if (token.TokenType == RdTokenType.Keyword && context.TextProvider.GetText(token) == "\\dots") { sb.Append("..."); } else if (token.TokenType == RdTokenType.OpenSquareBracket || token.TokenType == RdTokenType.CloseSquareBracket) { // Copy verbatim sb.Append(context.TextProvider.GetText(token)); } fragmentStart = context.Tokens[i].End; fragmentEnd = context.Tokens[i + 1].Start; } Debug.Assert(fragmentStart <= fragmentEnd); if (fragmentStart <= fragmentEnd) { var range = TextRange.FromBounds(fragmentStart, fragmentEnd); var fragment = context.TextProvider.GetText(range); sb.Append(fragment); } else { break; // Something went wrong; } } return(sb.ToString().Trim()); }
private static int SkipS3Method(RdParseContext context, ref int index) { var token = context.Tokens[index]; Debug.Assert(token.TokenType == RdTokenType.Keyword && context.TextProvider.GetText(token) == "\\method"); index++; for (var i = 0; i < 2; i++) { if (context.Tokens[index].TokenType == RdTokenType.OpenCurlyBrace) { index++; } if (context.Tokens[index].TokenType == RdTokenType.CloseCurlyBrace) { index++; } } // Should be past \method{...}{...}. Now skip signature var bc = new BraceCounter <char>(new [] { '(', ')' }); for (var i = context.Tokens[index - 1].End; i < context.TextProvider.Length; i++) { if (bc.CountBrace(context.TextProvider[i])) { if (bc.Count == 0) { // Calculate index of the next token after text position 'i' index = context.Tokens.Length - 1; for (var j = index; j < context.Tokens.Length; j++) { if (context.Tokens[j].Start >= i) { index = j; break; } } return(i + 1); } } } return(context.Tokens[index].End); }
public static IReadOnlyList <ISignatureInfo> ExtractSignatures(RdParseContext context) { // \usage{ // loglm1(formula, data, \dots) // \method{loglm1}{xtabs}(formula, data, \dots) // \method{loglm1}{data.frame}(formula, data, \dots) // \method{loglm1}{default}(formula, data, start = rep(1, length(data)), fitted = FALSE, // keep.frequencies = fitted, param = TRUE, eps = 1 / 10, // iter = 40, print = FALSE, \dots) // } // // Signatures can be for multiple related functions // }\usage{ // lockEnvironment(env, bindings = FALSE) // environmentIsLocked(env) // lockBinding(sym, env) // unlockBinding(sym, env) // bindingIsLocked(sym, env) var tokens = context.Tokens; var signatures = new List <ISignatureInfo>(); // Must be at '\usage{' int startTokenIndex, endTokenIndex; if (RdParseUtility.GetKeywordArgumentBounds(tokens, out startTokenIndex, out endTokenIndex)) { // Get inner content of the \usage{...} block cleaned up for R parsing var usage = GetRText(context, startTokenIndex, endTokenIndex); var sigs = ParseSignatures(usage); if (sigs != null) { signatures.AddRange(sigs); } tokens.Position = endTokenIndex; } return(signatures); }
private static IReadOnlyList <IFunctionInfo> ParseFunctions(RdParseContext context) { IReadOnlyList <ISignatureInfo> signatureInfos = null; IReadOnlyDictionary <string, string> argumentDescriptions = null; var aliases = new List <string>(); string functionDescription = null; // Description is normally one for all similar functions var isInternal = false; string returnValue = null; string primaryName = null; while (!context.Tokens.IsEndOfStream() && (functionDescription == null || argumentDescriptions == null || signatureInfos == null || returnValue == null)) { var token = context.Tokens.CurrentToken; if (context.IsAtKeywordWithParameters()) { if (string.IsNullOrEmpty(functionDescription) && context.IsAtKeyword(@"\description")) { functionDescription = RdText.GetText(context); } else if (context.IsAtKeyword(@"\keyword")) { var keyword = RdText.GetText(context); if (!string.IsNullOrEmpty(keyword) && keyword.Contains("internal")) { isInternal = true; } } else if (string.IsNullOrEmpty(returnValue) && context.IsAtKeyword(@"\value")) { returnValue = RdText.GetText(context); } else if (argumentDescriptions == null && context.IsAtKeyword(@"\arguments")) { // Extract arguments and their descriptions argumentDescriptions = RdArgumentDescription.ExtractArgumentDecriptions(context); } else if (signatureInfos == null && context.IsAtKeyword(@"\usage")) { // Extract signatures with function names signatureInfos = RdFunctionSignature.ExtractSignatures(context); } else if (context.IsAtKeyword(@"\alias")) { var alias = RdText.GetText(context); if (!string.IsNullOrWhiteSpace(alias)) { aliases.Add(alias); } } else if (primaryName == null && context.IsAtKeyword(@"\name")) { primaryName = RdText.GetText(context); } else { context.Tokens.Advance(2); } } if (token == context.Tokens.CurrentToken) { // If token is not recognized, move on context.Tokens.MoveToNextToken(); } } // Merge descriptions into signatures if (argumentDescriptions != null && signatureInfos != null) { foreach (var sigInfo in signatureInfos) { // Add missing arguments from the \arguments{} section foreach (var arg in sigInfo.Arguments) { if (argumentDescriptions.TryGetValue(arg.Name, out var description)) { ((NamedItemInfo)arg).Description = description ?? string.Empty; } } } } // Merge signatures into function infos var functionInfos = new Dictionary <string, FunctionInfo>(); if (signatureInfos != null) { var functionSignatures = new Dictionary <string, List <ISignatureInfo> >(); foreach (var sigInfo in signatureInfos) { List <ISignatureInfo> sigList; if (!functionInfos.TryGetValue(sigInfo.FunctionName, out FunctionInfo functionInfo)) { // Create function info functionInfo = CreateFunctionInfo(sigInfo.FunctionName, context.PackageName, functionDescription, returnValue, isInternal); functionInfos[sigInfo.FunctionName] = functionInfo; // Create list of signatures for this function sigList = new List <ISignatureInfo>(); functionSignatures[sigInfo.FunctionName] = sigList; functionInfo.Signatures = sigList; } else { sigList = functionSignatures[sigInfo.FunctionName]; } sigList.Add(sigInfo); } } // Propage to aliases if (!string.IsNullOrWhiteSpace(primaryName)) { FunctionInfo functionInfo; if (functionInfos.TryGetValue(primaryName, out functionInfo)) { foreach (var alias in aliases) { if (!functionInfos.ContainsKey(alias)) { functionInfos[alias] = new FunctionInfo(alias, functionInfo); } } } } return(functionInfos.Values.ToList()); }
/// <summary> /// Extracts argument names and descriptions from /// the RD '\arguments{...} construct /// </summary> public static IReadOnlyDictionary <string, string> ExtractArgumentDecriptions(RdParseContext context) { // \arguments{ // \item{formula}{ // A linear model formula specifying the log - linear model. // See \code{\link{ loglm} } for its interpretation. // } // \item{data}{ // Numeric array or data frame.In the first case it specifies the // array of frequencies; in then second it provides the data frame // from which the variables occurring in the formula are // preferentially obtained in the usual way. // } // \item{start, param, eps, iter, print}{ // Arguments passed to \code{\link{ loglin} }. // } // \item{\dots}{ // arguments passed to the default method. // } // } Dictionary <string, string> argumentDescriptions = new Dictionary <string, string>(); TokenStream <RdToken> tokens = context.Tokens; // '\arguments{' is expected Debug.Assert(tokens.NextToken.TokenType == RdTokenType.OpenCurlyBrace); if (tokens.NextToken.TokenType == RdTokenType.OpenCurlyBrace) { // Move past '\arguments' tokens.MoveToNextToken(); int startTokenIndex, endTokenIndex; if (RdParseUtility.GetKeywordArgumentBounds(tokens, out startTokenIndex, out endTokenIndex)) { // Now that we know bounds of \arguments{...} go through // inner '\item' elements and fetch description and all // argument names the description applies to. // // Example: // // \item{start, param, eps, iter, print}{Arguments // passed to \code{\link{ loglin} }.} // while (!tokens.IsEndOfStream() && tokens.Position < endTokenIndex) { RdToken token = tokens.CurrentToken; if (context.IsAtKeyword(@"\item")) { IEnumerable <IArgumentInfo> args = ParseArgumentItem(context); if (args == null) { break; } foreach (var a in args) { argumentDescriptions[a.Name] = a.Description; } } else { tokens.MoveToNextToken(); } } } tokens.Position = endTokenIndex; } return(argumentDescriptions); }