Beispiel #1
0
        /// <summary>
        /// Gets only the cdata (inner text) for the html page returning it as a list.
        /// </summary>
        /// <param name="webResponseText"></param>
        /// <param name="filter">Optional filter, pass in null to skip</param>
        /// <param name="cdata"></param>
        /// <returns></returns>
        public static bool TryGetCdata(string webResponseText, Func <string, bool> filter, out string[] cdata)
        {
            cdata = null;
            if (string.IsNullOrWhiteSpace(webResponseText))
            {
                return(false);
            }

            var ms        = new MemoryStream(Encoding.UTF8.GetBytes(webResponseText));
            var antlrHtml = AspNetParseTree.InvokeParse(ms);

            if (antlrHtml == null)
            {
                return(false);
            }

            var innerText = antlrHtml.CharData;

            if (innerText.Count <= 0)
            {
                return(false);
            }

            cdata = antlrHtml.CharData.ToArray();

            if (filter != null)
            {
                cdata = cdata.Where(filter).ToArray();
            }
            return(cdata.Length > 0);
        }
Beispiel #2
0
        public static HtmlParseResults InvokeParse(Stream stream)
        {
            var input  = new AntlrInputStream(stream);
            var lexer  = new HTMLLexer(input);
            var tokens = new CommonTokenStream(lexer);
            var parser = new HTMLParser(tokens);

            var tree = parser.htmlDocument();

            var walker = new ParseTreeWalker();
            var loader = new AspNetParseTree();

            walker.Walk(loader, tree);

            return(loader.Results);
        }