/// <summary> /// Searches for regex patterns in the textual input and returns them as a list of tokens. /// </summary> /// <remarks>This tokenizer does not return line number and column information.</remarks> /// <param name="inputStream">Input stream containing textual data, which should be searched for regex patterns.</param> /// <returns>Enumeration of tokens, matching the specified regex patterns.</returns> public IEnumerable <IToken> EnumerateTokens(System.IO.TextReader inputReader) { if (inputReader == null) { yield break; } string inputText = inputReader.ReadToEnd(); var matches = mPatternRegEx.Matches(inputText); // Perform regex matching foreach (System.Text.RegularExpressions.Match match in matches) { // Find first group, containing a match for (int i = 1; i < match.Groups.Count; ++i) { var currentGroup = match.Groups[i]; if (currentGroup.Success) { string token = currentGroup.Value; string tokenType = mPatternRegEx.GroupNameFromNumber(i); yield return(new Token(token, tokenType, currentGroup.Index)); } } } }
/// <summary> /// Returns tokens for all multi line comments. /// </summary> /// <param name="text"></param> /// <returns>Enumeration of all multi line comment blocks.</returns> public IEnumerable <IHighlighterToken> HighlightMultiLineBlocks(string text) { if (mMultiLineBlockRegEx != null) { if (text != null) { bool insideComment = false; int commentStart = -1; int commentEnd = -1; uint commentColor = 0; var matches = mMultiLineBlockRegEx.Matches(text); // Perform regex matching foreach (System.Text.RegularExpressions.Match match in matches) { // Find first group, containing a match for (int i = 1; i < match.Groups.Count; ++i) { var currentGroup = match.Groups[i]; if (currentGroup.Success) { string tokenType = mMultiLineBlockRegEx.GroupNameFromNumber(i); // Not in multi-line comment? if (!insideComment) { // Multiline comment handling... if (tokenType.Equals("COMMENT_START", StringComparison.OrdinalIgnoreCase) || tokenType.Equals("CommentStart", StringComparison.OrdinalIgnoreCase)) { insideComment = true; commentStart = currentGroup.Index; commentColor = mTokenTypeToPattern[tokenType].FirstOrDefault().Color; } } else { // Multiline comment handling... if (tokenType.Equals("COMMENT_END", StringComparison.OrdinalIgnoreCase) || tokenType.Equals("CommentEnd", StringComparison.OrdinalIgnoreCase)) { insideComment = false; commentEnd = currentGroup.Index + currentGroup.Length; yield return(new Interfaces.HighlighterToken(commentColor, "COMMENT", commentStart, commentEnd)); } } break; } } } } else { yield break; } } yield break; }
private HTMLElementCollection GetScripts(string HTML) { string strExpression; //= "(?i:(?:<(?<element>script[^/ >]*)(?:\s(?!/)+(?:(?<attr>[^=]+)=(?:""|')(?<attrv>[^""\']+)(" & "?:""|')))*)(?:[^/]*/>|[^/]{0}>(?<text>[\s\S]*)(?<close></\k<element>>+)))" //(?i: // (?<element>(?:<script // (?:\s* // (?: // (?<attr>[^=>]*?) // =(?:"|') // (?<attrv>[^"|']*?) // (?:"|') // ))* // ) // ( //(?(?=\s*?/>)\s*?/> //| // (?:\s*?> // (?:[\s\r\n]*?<!--)?(?<text>[\s\S]*?) // </script>)) // )) //) strExpression = "(?i:" + "\t(?<element>(?:<script" + "\t\t(?:\\s*" + "\t\t(?:" + "\t\t\t(?<attr>[^=>]*?)" + "\t\t\t=(?:\"|')" + "\t\t\t(?<attrv>[^\"|']*?)" + "\t\t\t(?:\"|')" + "\t\t))*" + " )" + "\t(" + "(?(?=\\s*?/>)\\s*?/>" + "|" + " (?:\\s*?>" + "\t(?:[\\s\\r\\n]*?<!--)?(?<text>[\\s\\S]*?)" + " </script>))" + "\t)" + "))"; System.Text.RegularExpressions.Regex oRE = new System.Text.RegularExpressions.Regex(strExpression, System.Text.RegularExpressions.RegexOptions.IgnorePatternWhitespace | System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline); string strGroup; HTMLElementCollection oCol = new HTMLElementCollection(); HTMLElement objElement; ArrayList objAttr = new ArrayList(); //Todo: the association between attr and attrv is a minor hack here... think of something better! int intAttr; foreach (System.Text.RegularExpressions.Match oMatch in oRE.Matches(HTML)) { objElement = null; for (int iGroup = 0; iGroup <= oMatch.Groups.Count - 1; iGroup++) { strGroup = oRE.GroupNameFromNumber(iGroup); if (strGroup == "attr") objAttr = new ArrayList(); intAttr = 1; foreach (System.Text.RegularExpressions.Capture oCapture in oMatch.Groups[iGroup].Captures) { switch (strGroup) { case "element": objElement = new HTMLElement(oCapture.Value); objElement.Raw = oMatch.Value; break; case "attr": objAttr.Add(oCapture.Value); break; case "attrv": if ((string)objAttr[intAttr] == "src") { //need to replace & with & (webresource.axd for IE6) objElement.Attributes.Add(objAttr[intAttr], System.Web.HttpUtility.HtmlDecode(oCapture.Value)); } else { objElement.Attributes.Add(objAttr[intAttr], oCapture.Value); } intAttr += 1; break; case "text": objElement.Text = oCapture.Value; break; } } } if ((objElement != null)) { oCol.Add(objElement); } } return oCol; }
/// <summary> /// Highlights elements in the specified text. /// /// The highlighter tries to find matches of the patterns specified by the Patterns property in the provided text. /// For each match a IHighlighterToken instance is returned and the color is used as defined in the matched pattern /// definition. Except for multi line comments an keywords/identifiers the Patterns list also defines a token type /// precedence, i.e. a highlighter token instance is always returned for the first token type matched by the regex, /// even if mutliple token types are matched simultanously. /// </summary> /// <param name="text">Text, which should be highlighted.</param> /// <param name="activeBlock">Active multi line block.</param> /// <returns>Enumeration of IHighlighterToken instances.</returns> public IEnumerable <Interfaces.IHighlighterToken> Highlight(string text, IHighlighterToken activeBlock = null) { if (text != null) { bool activeBlockIsComment = activeBlock != null && activeBlock.Type.Equals("COMMENT", StringComparison.OrdinalIgnoreCase); bool insideComment = activeBlock != null && activeBlock.Type.Equals("COMMENT", StringComparison.OrdinalIgnoreCase); int commentStart = insideComment ? 0 : -1; int commentEnd = -1; uint commentColor = insideComment ? activeBlock.Color : 0; var matches = mPatternRegEx.Matches(text); // Perform regex matching foreach (System.Text.RegularExpressions.Match match in matches) { // Find first group, containing a match for (int i = 1; i <= match.Groups.Count; ++i) { var currentGroup = match.Groups[i]; if (currentGroup.Success) { string tokenType = mPatternRegEx.GroupNameFromNumber(i); // Not in multi-line comment? if (!(insideComment)) { // Special treatment for identifiers and keywords... if (tokenType.Equals("IDENTIFIER", StringComparison.OrdinalIgnoreCase) && mKeywordRegEx != null) { string identifier = currentGroup.Value; var keywordMatch = mKeywordRegEx.Match(identifier); // Keywords have precedence over identifiers, but identifiers have precendence over keywords, // if the keyword is only contained in the identifier. if (keywordMatch.Success && keywordMatch.Length == identifier.Length) { // Determine keyword token type for (int g = 1; g <= keywordMatch.Groups.Count; ++g) { var currentKeywordGroup = keywordMatch.Groups[g]; if (currentKeywordGroup.Success) { tokenType = mKeywordRegEx.GroupNameFromNumber(g); break; } } yield return(new Interfaces.HighlighterToken(mTokenTypeToPattern[tokenType].FirstOrDefault().Color, tokenType, currentGroup.Index, currentGroup.Index + currentGroup.Length)); } } // Multiline comment handling... if (tokenType.Equals("COMMENT_START", StringComparison.OrdinalIgnoreCase) || tokenType.Equals("CommentStart", StringComparison.OrdinalIgnoreCase)) { insideComment = true; commentStart = currentGroup.Index; commentColor = mTokenTypeToPattern[tokenType].FirstOrDefault().Color; } else // Single line comment handling... if (tokenType.Equals("LINE_COMMENT", StringComparison.OrdinalIgnoreCase) || tokenType.Equals("LineComment", StringComparison.OrdinalIgnoreCase)) { commentColor = mTokenTypeToPattern[tokenType].FirstOrDefault().Color; var tagColor = mTokenTypeToPattern["TAG"].FirstOrDefault().Color; // Handle tags... if (mTagRegEx != null) { commentStart = currentGroup.Index; string lineComment = currentGroup.Value; var tags = mTagRegEx.Matches(lineComment); if (tags.Count > 0) { foreach (System.Text.RegularExpressions.Match tag in tags) { commentEnd = tag.Index + currentGroup.Index; int tagStart = tag.Index + currentGroup.Index; int tagEnd = tagStart + tag.Length; if (commentEnd > commentStart) { yield return(new Interfaces.HighlighterToken(commentColor, tokenType, commentStart, commentEnd)); } if (tagEnd > tagStart) { yield return(new Interfaces.HighlighterToken(tagColor, "TAG", tagStart, tagEnd)); } commentStart = tagEnd; } commentEnd = currentGroup.Index + currentGroup.Length; if (commentEnd > commentStart) { yield return(new Interfaces.HighlighterToken(commentColor, tokenType, commentStart, commentEnd)); } } else { yield return(new Interfaces.HighlighterToken(mTokenTypeToPattern[tokenType].FirstOrDefault().Color, tokenType, currentGroup.Index, currentGroup.Index + currentGroup.Length)); } } else { yield return(new Interfaces.HighlighterToken(mTokenTypeToPattern[tokenType].FirstOrDefault().Color, tokenType, currentGroup.Index, currentGroup.Index + currentGroup.Length)); } } else // Normal match... { yield return(new Interfaces.HighlighterToken(mTokenTypeToPattern[tokenType].FirstOrDefault().Color, tokenType, currentGroup.Index, currentGroup.Index + currentGroup.Length)); } } else { // Handle tag if (tokenType.Equals("TAG", StringComparison.OrdinalIgnoreCase)) { commentEnd = currentGroup.Index; yield return(new Interfaces.HighlighterToken(commentColor, "COMMENT", commentStart, commentEnd)); yield return(new Interfaces.HighlighterToken(mTokenTypeToPattern[tokenType].FirstOrDefault().Color, tokenType, currentGroup.Index, currentGroup.Index + currentGroup.Length)); commentStart = currentGroup.Index + currentGroup.Length; insideComment = true; } // Multiline comment handling... if (tokenType.Equals("COMMENT_END", StringComparison.OrdinalIgnoreCase) || tokenType.Equals("CommentEnd", StringComparison.OrdinalIgnoreCase)) { insideComment = false; commentEnd = currentGroup.Index + currentGroup.Length; yield return(new Interfaces.HighlighterToken(commentColor, "COMMENT", commentStart, commentEnd)); } } break; } } } // Emit comment token if not closed if (insideComment && commentStart >= 0) { yield return(new Interfaces.HighlighterToken(commentColor, "COMMENT", commentStart, text.Length)); } } else { yield break; } }
private async Task <ExecValue> executeNativeRegexFunctionAsync(ExecutionContext ctx, FunctionDefinition func, ObjectData thisValue) { if (func == ctx.Env.RegexContainsFunction) { ObjectData arg = ctx.FunctionArguments.Single(); ObjectData arg_val = arg.DereferencedOnce(); string arg_str = arg_val.NativeString; ObjectData pattern_obj = thisValue.GetField(ctx.Env.RegexPatternField); ObjectData pattern_val = pattern_obj.DereferencedOnce(); string pattern = pattern_val.NativeString; bool val = new System.Text.RegularExpressions.Regex(pattern).IsMatch(arg_str); ExecValue result = ExecValue.CreateReturn(await ObjectData.CreateInstanceAsync(ctx, func.ResultTypeName.Evaluation.Components, val).ConfigureAwait(false)); return(result); } else if (func == ctx.Env.RegexMatchFunction) { ObjectData arg = ctx.FunctionArguments.Single(); ObjectData arg_val = arg.DereferencedOnce(); string arg_str = arg_val.NativeString; ObjectData pattern_obj = thisValue.GetField(ctx.Env.RegexPatternField); ObjectData pattern_val = pattern_obj.DereferencedOnce(); string pattern = pattern_val.NativeString; System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(pattern); System.Text.RegularExpressions.MatchCollection matches = regex.Matches(arg_str); var elements = new List <ObjectData>(); for (int match_idx = 0; match_idx < matches.Count; ++match_idx) { System.Text.RegularExpressions.Match match = matches[match_idx]; ObjectData match_start_val = await createNat64Async(ctx, (UInt64)match.Index).ConfigureAwait(false); ObjectData match_end_val = await createNat64Async(ctx, (UInt64)(match.Index + match.Length)).ConfigureAwait(false); ObjectData array_captures_ptr; { if (!ctx.Env.DereferencedOnce(ctx.Env.MatchCapturesProperty.TypeName.Evaluation.Components, out IEntityInstance array_captures_type, out bool dummy)) { throw new Exception($"Internal error {ExceptionCode.SourceInfo()}"); } ExecValue ret = await createObject(ctx, true, array_captures_type, ctx.Env.ArrayDefaultConstructor, null) .ConfigureAwait(false); if (ret.IsThrow) { return(ret); } array_captures_ptr = ret.ExprValue; ctx.Heap.TryInc(ctx, array_captures_ptr, RefCountIncReason.StoringLocalPointer, ""); // skipping implicit "everything" group for (int grp_idx = 1; grp_idx < match.Groups.Count; ++grp_idx) { System.Text.RegularExpressions.Group group = match.Groups[grp_idx]; string group_name = regex.GroupNameFromNumber(grp_idx); if (group_name == $"{grp_idx}") // hack for anonymous captures { group_name = null; } for (int cap_idx = 0; cap_idx < group.Captures.Count; ++cap_idx) { System.Text.RegularExpressions.Capture cap = group.Captures[cap_idx]; ObjectData cap_start_val = await createNat64Async(ctx, (UInt64)cap.Index).ConfigureAwait(false); ObjectData cap_end_val = await createNat64Async(ctx, (UInt64)(cap.Index + cap.Length)).ConfigureAwait(false); ObjectData cap_opt_name_val; { Option <ObjectData> opt_group_name_obj; if (group_name != null) { ObjectData str_ptr = await createStringAsync(ctx, group_name).ConfigureAwait(false); opt_group_name_obj = new Option <ObjectData>(str_ptr); } else { opt_group_name_obj = new Option <ObjectData>(); } IEntityInstance opt_cap_type = ctx.Env.CaptureConstructor.Parameters.Last().TypeName.Evaluation.Components; ExecValue opt_exec = await createOption(ctx, opt_cap_type, opt_group_name_obj).ConfigureAwait(false); if (opt_exec.IsThrow) { return(opt_exec); } cap_opt_name_val = opt_exec.ExprValue; } ExecValue capture_obj_exec = await createObject(ctx, false, ctx.Env.CaptureType.InstanceOf, ctx.Env.CaptureConstructor, null, cap_start_val, cap_end_val, cap_opt_name_val).ConfigureAwait(false); if (capture_obj_exec.IsThrow) { return(capture_obj_exec); } ObjectData capture_ref = await capture_obj_exec.ExprValue.ReferenceAsync(ctx).ConfigureAwait(false); ExecValue append_exec = await callNonVariadicFunctionDirectly(ctx, ctx.Env.ArrayAppendFunction, null, array_captures_ptr, capture_ref).ConfigureAwait(false); if (append_exec.IsThrow) { return(append_exec); } } } } ObjectData match_val; { ExecValue ret = await createObject(ctx, false, ctx.Env.MatchType.InstanceOf, ctx.Env.MatchConstructor, null, match_start_val, match_end_val, array_captures_ptr).ConfigureAwait(false); ctx.Heap.TryRelease(ctx, array_captures_ptr, null, false, RefCountDecReason.DroppingLocalPointer, ""); if (ret.IsThrow) { return(ret); } match_val = ret.ExprValue; } elements.Add(match_val); } ObjectData heap_chunk = await createChunkOnHeap(ctx, ctx.Env.MatchType.InstanceOf, elements).ConfigureAwait(false); ExecValue result = ExecValue.CreateReturn(heap_chunk); return(result); } else { throw new NotImplementedException($"{ExceptionCode.SourceInfo()}"); } }