/// <summary>
        /// Searches for regex patterns in the textual input and returns them as a list of tokens.
        /// </summary>
        /// <remarks>This tokenizer does not return line number and column information.</remarks>
        /// <param name="inputStream">Input stream containing textual data, which should be searched for regex patterns.</param>
        /// <returns>Enumeration of tokens, matching the specified regex patterns.</returns>
        public IEnumerable <IToken> EnumerateTokens(System.IO.TextReader inputReader)
        {
            if (inputReader == null)
            {
                yield break;
            }

            string inputText = inputReader.ReadToEnd();
            var    matches   = mPatternRegEx.Matches(inputText);

            // Perform regex matching
            foreach (System.Text.RegularExpressions.Match match in matches)
            {
                // Find first group, containing a match
                for (int i = 1; i < match.Groups.Count; ++i)
                {
                    var currentGroup = match.Groups[i];
                    if (currentGroup.Success)
                    {
                        string token     = currentGroup.Value;
                        string tokenType = mPatternRegEx.GroupNameFromNumber(i);
                        yield return(new Token(token, tokenType, currentGroup.Index));
                    }
                }
            }
        }
 /// <summary>
 /// Returns tokens for all multi line comments.
 /// </summary>
 /// <param name="text"></param>
 /// <returns>Enumeration of all multi line comment blocks.</returns>
 public IEnumerable <IHighlighterToken> HighlightMultiLineBlocks(string text)
 {
     if (mMultiLineBlockRegEx != null)
     {
         if (text != null)
         {
             bool insideComment = false;
             int  commentStart  = -1;
             int  commentEnd    = -1;
             uint commentColor  = 0;
             var  matches       = mMultiLineBlockRegEx.Matches(text);
             // Perform regex matching
             foreach (System.Text.RegularExpressions.Match match in matches)
             {
                 // Find first group, containing a match
                 for (int i = 1; i < match.Groups.Count; ++i)
                 {
                     var currentGroup = match.Groups[i];
                     if (currentGroup.Success)
                     {
                         string tokenType = mMultiLineBlockRegEx.GroupNameFromNumber(i);
                         // Not in multi-line comment?
                         if (!insideComment)
                         {
                             // Multiline comment handling...
                             if (tokenType.Equals("COMMENT_START", StringComparison.OrdinalIgnoreCase) || tokenType.Equals("CommentStart", StringComparison.OrdinalIgnoreCase))
                             {
                                 insideComment = true;
                                 commentStart  = currentGroup.Index;
                                 commentColor  = mTokenTypeToPattern[tokenType].FirstOrDefault().Color;
                             }
                         }
                         else
                         {
                             // Multiline comment handling...
                             if (tokenType.Equals("COMMENT_END", StringComparison.OrdinalIgnoreCase) || tokenType.Equals("CommentEnd", StringComparison.OrdinalIgnoreCase))
                             {
                                 insideComment = false;
                                 commentEnd    = currentGroup.Index + currentGroup.Length;
                                 yield return(new Interfaces.HighlighterToken(commentColor, "COMMENT", commentStart, commentEnd));
                             }
                         }
                         break;
                     }
                 }
             }
         }
         else
         {
             yield break;
         }
     }
     yield break;
 }
Example #3
0
		private HTMLElementCollection GetScripts(string HTML)
		{
			string strExpression;
			//= "(?i:(?:<(?<element>script[^/ >]*)(?:\s(?!/)+(?:(?<attr>[^=]+)=(?:""|')(?<attrv>[^""\']+)(" & "?:""|')))*)(?:[^/]*/>|[^/]{0}>(?<text>[\s\S]*)(?<close></\k<element>>+)))"
			//(?i:
			//	(?<element>(?:<script
			//		(?:\s*
			//		(?:
			//			(?<attr>[^=>]*?)
			//			=(?:"|')
			//			(?<attrv>[^"|']*?)
			//			(?:"|')
			//		))*
			//        )
			//	(
			//(?(?=\s*?/>)\s*?/>
			//|
			//                (?:\s*?>
			//	(?:[\s\r\n]*?<!--)?(?<text>[\s\S]*?)
			//                </script>))
			//	))
			//)
			strExpression = "(?i:" + "\t(?<element>(?:<script" + "\t\t(?:\\s*" + "\t\t(?:" + "\t\t\t(?<attr>[^=>]*?)" + "\t\t\t=(?:\"|')" + "\t\t\t(?<attrv>[^\"|']*?)" + "\t\t\t(?:\"|')" + "\t\t))*" + "        )" + "\t(" + "(?(?=\\s*?/>)\\s*?/>" + "|" + "                (?:\\s*?>" + "\t(?:[\\s\\r\\n]*?<!--)?(?<text>[\\s\\S]*?)" + "                </script>))" + "\t)" + "))";

			System.Text.RegularExpressions.Regex oRE = new System.Text.RegularExpressions.Regex(strExpression, System.Text.RegularExpressions.RegexOptions.IgnorePatternWhitespace | System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Singleline);

			string strGroup;
			HTMLElementCollection oCol = new HTMLElementCollection();
			HTMLElement objElement;
			ArrayList objAttr = new ArrayList();
			//Todo: the association between attr and attrv is a minor hack here... think of something better!
			int intAttr;
			foreach (System.Text.RegularExpressions.Match oMatch in oRE.Matches(HTML)) {
				objElement = null;

				for (int iGroup = 0; iGroup <= oMatch.Groups.Count - 1; iGroup++) {
					strGroup = oRE.GroupNameFromNumber(iGroup);
					if (strGroup == "attr") objAttr = new ArrayList(); 
					intAttr = 1;
					foreach (System.Text.RegularExpressions.Capture oCapture in oMatch.Groups[iGroup].Captures) {
						switch (strGroup) {
							case "element":
								objElement = new HTMLElement(oCapture.Value);
								objElement.Raw = oMatch.Value;
								break;
							case "attr":
								objAttr.Add(oCapture.Value);
								break;
							case "attrv":
								if ((string)objAttr[intAttr] == "src")
								{
									//need to replace &amp; with &  (webresource.axd for IE6)
									objElement.Attributes.Add(objAttr[intAttr], System.Web.HttpUtility.HtmlDecode(oCapture.Value));
								}
								else
								{
									objElement.Attributes.Add(objAttr[intAttr], oCapture.Value);
								}

								intAttr += 1;
								break;
							case "text":
								objElement.Text = oCapture.Value;
								break;
						}

					}
				}
				if ((objElement != null))
				{
					oCol.Add(objElement);
				}
			}
			return oCol;
		}
        /// <summary>
        /// Highlights elements in the specified text.
        ///
        /// The highlighter tries to find matches of the patterns specified by the Patterns property in the provided text.
        /// For each match a IHighlighterToken instance is returned and the color is used as defined in the matched pattern
        /// definition. Except for multi line comments an keywords/identifiers the Patterns list also defines a token type
        /// precedence, i.e. a highlighter token instance is always returned for the first token type matched by the regex,
        /// even if mutliple token types are matched simultanously.
        /// </summary>
        /// <param name="text">Text, which should be highlighted.</param>
        /// <param name="activeBlock">Active multi line block.</param>
        /// <returns>Enumeration of IHighlighterToken instances.</returns>
        public IEnumerable <Interfaces.IHighlighterToken> Highlight(string text, IHighlighterToken activeBlock = null)
        {
            if (text != null)
            {
                bool activeBlockIsComment = activeBlock != null && activeBlock.Type.Equals("COMMENT", StringComparison.OrdinalIgnoreCase);
                bool insideComment        = activeBlock != null && activeBlock.Type.Equals("COMMENT", StringComparison.OrdinalIgnoreCase);
                int  commentStart         = insideComment ? 0 : -1;
                int  commentEnd           = -1;
                uint commentColor         = insideComment ? activeBlock.Color : 0;

                var matches = mPatternRegEx.Matches(text);
                // Perform regex matching
                foreach (System.Text.RegularExpressions.Match match in matches)
                {
                    // Find first group, containing a match
                    for (int i = 1; i <= match.Groups.Count; ++i)
                    {
                        var currentGroup = match.Groups[i];
                        if (currentGroup.Success)
                        {
                            string tokenType = mPatternRegEx.GroupNameFromNumber(i);
                            // Not in multi-line comment?
                            if (!(insideComment))
                            {
                                // Special treatment for identifiers and keywords...
                                if (tokenType.Equals("IDENTIFIER", StringComparison.OrdinalIgnoreCase) && mKeywordRegEx != null)
                                {
                                    string identifier   = currentGroup.Value;
                                    var    keywordMatch = mKeywordRegEx.Match(identifier);
                                    // Keywords have precedence over identifiers, but identifiers have precendence over keywords,
                                    // if the keyword is only contained in the identifier.
                                    if (keywordMatch.Success && keywordMatch.Length == identifier.Length)
                                    {
                                        // Determine keyword token type
                                        for (int g = 1; g <= keywordMatch.Groups.Count; ++g)
                                        {
                                            var currentKeywordGroup = keywordMatch.Groups[g];
                                            if (currentKeywordGroup.Success)
                                            {
                                                tokenType = mKeywordRegEx.GroupNameFromNumber(g);
                                                break;
                                            }
                                        }
                                        yield return(new Interfaces.HighlighterToken(mTokenTypeToPattern[tokenType].FirstOrDefault().Color, tokenType, currentGroup.Index, currentGroup.Index + currentGroup.Length));
                                    }
                                }
                                // Multiline comment handling...
                                if (tokenType.Equals("COMMENT_START", StringComparison.OrdinalIgnoreCase) || tokenType.Equals("CommentStart", StringComparison.OrdinalIgnoreCase))
                                {
                                    insideComment = true;
                                    commentStart  = currentGroup.Index;
                                    commentColor  = mTokenTypeToPattern[tokenType].FirstOrDefault().Color;
                                }
                                else
                                // Single line comment handling...
                                if (tokenType.Equals("LINE_COMMENT", StringComparison.OrdinalIgnoreCase) || tokenType.Equals("LineComment", StringComparison.OrdinalIgnoreCase))
                                {
                                    commentColor = mTokenTypeToPattern[tokenType].FirstOrDefault().Color;
                                    var tagColor = mTokenTypeToPattern["TAG"].FirstOrDefault().Color;

                                    // Handle tags...
                                    if (mTagRegEx != null)
                                    {
                                        commentStart = currentGroup.Index;
                                        string lineComment = currentGroup.Value;
                                        var    tags        = mTagRegEx.Matches(lineComment);
                                        if (tags.Count > 0)
                                        {
                                            foreach (System.Text.RegularExpressions.Match tag in tags)
                                            {
                                                commentEnd = tag.Index + currentGroup.Index;
                                                int tagStart = tag.Index + currentGroup.Index;
                                                int tagEnd   = tagStart + tag.Length;
                                                if (commentEnd > commentStart)
                                                {
                                                    yield return(new Interfaces.HighlighterToken(commentColor, tokenType, commentStart, commentEnd));
                                                }
                                                if (tagEnd > tagStart)
                                                {
                                                    yield return(new Interfaces.HighlighterToken(tagColor, "TAG", tagStart, tagEnd));
                                                }
                                                commentStart = tagEnd;
                                            }
                                            commentEnd = currentGroup.Index + currentGroup.Length;
                                            if (commentEnd > commentStart)
                                            {
                                                yield return(new Interfaces.HighlighterToken(commentColor, tokenType, commentStart, commentEnd));
                                            }
                                        }
                                        else
                                        {
                                            yield return(new Interfaces.HighlighterToken(mTokenTypeToPattern[tokenType].FirstOrDefault().Color, tokenType, currentGroup.Index, currentGroup.Index + currentGroup.Length));
                                        }
                                    }
                                    else
                                    {
                                        yield return(new Interfaces.HighlighterToken(mTokenTypeToPattern[tokenType].FirstOrDefault().Color, tokenType, currentGroup.Index, currentGroup.Index + currentGroup.Length));
                                    }
                                }
                                else
                                // Normal match...
                                {
                                    yield return(new Interfaces.HighlighterToken(mTokenTypeToPattern[tokenType].FirstOrDefault().Color, tokenType, currentGroup.Index, currentGroup.Index + currentGroup.Length));
                                }
                            }
                            else
                            {
                                // Handle tag
                                if (tokenType.Equals("TAG", StringComparison.OrdinalIgnoreCase))
                                {
                                    commentEnd = currentGroup.Index;
                                    yield return(new Interfaces.HighlighterToken(commentColor, "COMMENT", commentStart, commentEnd));

                                    yield return(new Interfaces.HighlighterToken(mTokenTypeToPattern[tokenType].FirstOrDefault().Color, tokenType, currentGroup.Index, currentGroup.Index + currentGroup.Length));

                                    commentStart  = currentGroup.Index + currentGroup.Length;
                                    insideComment = true;
                                }
                                // Multiline comment handling...
                                if (tokenType.Equals("COMMENT_END", StringComparison.OrdinalIgnoreCase) || tokenType.Equals("CommentEnd", StringComparison.OrdinalIgnoreCase))
                                {
                                    insideComment = false;
                                    commentEnd    = currentGroup.Index + currentGroup.Length;
                                    yield return(new Interfaces.HighlighterToken(commentColor, "COMMENT", commentStart, commentEnd));
                                }
                            }
                            break;
                        }
                    }
                }

                // Emit comment token if not closed
                if (insideComment && commentStart >= 0)
                {
                    yield return(new Interfaces.HighlighterToken(commentColor, "COMMENT", commentStart, text.Length));
                }
            }
            else
            {
                yield break;
            }
        }
Example #5
0
        private async Task <ExecValue> executeNativeRegexFunctionAsync(ExecutionContext ctx, FunctionDefinition func,
                                                                       ObjectData thisValue)
        {
            if (func == ctx.Env.RegexContainsFunction)
            {
                ObjectData arg     = ctx.FunctionArguments.Single();
                ObjectData arg_val = arg.DereferencedOnce();
                string     arg_str = arg_val.NativeString;

                ObjectData pattern_obj = thisValue.GetField(ctx.Env.RegexPatternField);
                ObjectData pattern_val = pattern_obj.DereferencedOnce();
                string     pattern     = pattern_val.NativeString;

                bool val = new System.Text.RegularExpressions.Regex(pattern).IsMatch(arg_str);

                ExecValue result = ExecValue.CreateReturn(await ObjectData.CreateInstanceAsync(ctx,
                                                                                               func.ResultTypeName.Evaluation.Components, val).ConfigureAwait(false));
                return(result);
            }
            else if (func == ctx.Env.RegexMatchFunction)
            {
                ObjectData arg     = ctx.FunctionArguments.Single();
                ObjectData arg_val = arg.DereferencedOnce();
                string     arg_str = arg_val.NativeString;

                ObjectData pattern_obj = thisValue.GetField(ctx.Env.RegexPatternField);
                ObjectData pattern_val = pattern_obj.DereferencedOnce();
                string     pattern     = pattern_val.NativeString;

                System.Text.RegularExpressions.Regex           regex   = new System.Text.RegularExpressions.Regex(pattern);
                System.Text.RegularExpressions.MatchCollection matches = regex.Matches(arg_str);

                var elements = new List <ObjectData>();
                for (int match_idx = 0; match_idx < matches.Count; ++match_idx)
                {
                    System.Text.RegularExpressions.Match match = matches[match_idx];
                    ObjectData match_start_val = await createNat64Async(ctx, (UInt64)match.Index).ConfigureAwait(false);

                    ObjectData match_end_val = await createNat64Async(ctx, (UInt64)(match.Index + match.Length)).ConfigureAwait(false);

                    ObjectData array_captures_ptr;

                    {
                        if (!ctx.Env.DereferencedOnce(ctx.Env.MatchCapturesProperty.TypeName.Evaluation.Components,
                                                      out IEntityInstance array_captures_type, out bool dummy))
                        {
                            throw new Exception($"Internal error {ExceptionCode.SourceInfo()}");
                        }

                        ExecValue ret = await createObject(ctx, true, array_captures_type, ctx.Env.ArrayDefaultConstructor, null)
                                        .ConfigureAwait(false);

                        if (ret.IsThrow)
                        {
                            return(ret);
                        }

                        array_captures_ptr = ret.ExprValue;
                        ctx.Heap.TryInc(ctx, array_captures_ptr, RefCountIncReason.StoringLocalPointer, "");

                        // skipping implicit "everything" group
                        for (int grp_idx = 1; grp_idx < match.Groups.Count; ++grp_idx)
                        {
                            System.Text.RegularExpressions.Group group = match.Groups[grp_idx];
                            string group_name = regex.GroupNameFromNumber(grp_idx);
                            if (group_name == $"{grp_idx}") // hack for anonymous captures
                            {
                                group_name = null;
                            }

                            for (int cap_idx = 0; cap_idx < group.Captures.Count; ++cap_idx)
                            {
                                System.Text.RegularExpressions.Capture cap = group.Captures[cap_idx];

                                ObjectData cap_start_val = await createNat64Async(ctx, (UInt64)cap.Index).ConfigureAwait(false);

                                ObjectData cap_end_val = await createNat64Async(ctx, (UInt64)(cap.Index + cap.Length)).ConfigureAwait(false);

                                ObjectData cap_opt_name_val;
                                {
                                    Option <ObjectData> opt_group_name_obj;
                                    if (group_name != null)
                                    {
                                        ObjectData str_ptr = await createStringAsync(ctx, group_name).ConfigureAwait(false);

                                        opt_group_name_obj = new Option <ObjectData>(str_ptr);
                                    }
                                    else
                                    {
                                        opt_group_name_obj = new Option <ObjectData>();
                                    }

                                    IEntityInstance opt_cap_type = ctx.Env.CaptureConstructor.Parameters.Last().TypeName.Evaluation.Components;
                                    ExecValue       opt_exec     = await createOption(ctx, opt_cap_type, opt_group_name_obj).ConfigureAwait(false);

                                    if (opt_exec.IsThrow)
                                    {
                                        return(opt_exec);
                                    }
                                    cap_opt_name_val = opt_exec.ExprValue;
                                }
                                ExecValue capture_obj_exec = await createObject(ctx, false, ctx.Env.CaptureType.InstanceOf,
                                                                                ctx.Env.CaptureConstructor, null, cap_start_val, cap_end_val, cap_opt_name_val).ConfigureAwait(false);

                                if (capture_obj_exec.IsThrow)
                                {
                                    return(capture_obj_exec);
                                }
                                ObjectData capture_ref = await capture_obj_exec.ExprValue.ReferenceAsync(ctx).ConfigureAwait(false);

                                ExecValue append_exec = await callNonVariadicFunctionDirectly(ctx, ctx.Env.ArrayAppendFunction, null,
                                                                                              array_captures_ptr, capture_ref).ConfigureAwait(false);

                                if (append_exec.IsThrow)
                                {
                                    return(append_exec);
                                }
                            }
                        }
                    }
                    ObjectData match_val;
                    {
                        ExecValue ret = await createObject(ctx, false, ctx.Env.MatchType.InstanceOf,
                                                           ctx.Env.MatchConstructor, null, match_start_val, match_end_val, array_captures_ptr).ConfigureAwait(false);

                        ctx.Heap.TryRelease(ctx, array_captures_ptr, null, false, RefCountDecReason.DroppingLocalPointer, "");

                        if (ret.IsThrow)
                        {
                            return(ret);
                        }

                        match_val = ret.ExprValue;
                    }

                    elements.Add(match_val);
                }

                ObjectData heap_chunk = await createChunkOnHeap(ctx, ctx.Env.MatchType.InstanceOf, elements).ConfigureAwait(false);

                ExecValue result = ExecValue.CreateReturn(heap_chunk);
                return(result);
            }
            else
            {
                throw new NotImplementedException($"{ExceptionCode.SourceInfo()}");
            }
        }