/// <summary> /// Gets only the cdata (inner text) for the html page returning it as a list. /// </summary> /// <param name="webResponseText"></param> /// <param name="filter">Optional filter, pass in null to skip</param> /// <param name="cdata"></param> /// <returns></returns> public static bool TryGetCdata(string webResponseText, Func <string, bool> filter, out string[] cdata) { cdata = null; if (string.IsNullOrWhiteSpace(webResponseText)) { return(false); } var ms = new MemoryStream(Encoding.UTF8.GetBytes(webResponseText)); var antlrHtml = AspNetParseTree.InvokeParse(ms); if (antlrHtml == null) { return(false); } var innerText = antlrHtml.CharData; if (innerText.Count <= 0) { return(false); } cdata = antlrHtml.CharData.ToArray(); if (filter != null) { cdata = cdata.Where(filter).ToArray(); } return(cdata.Length > 0); }
public static HtmlParseResults InvokeParse(Stream stream) { var input = new AntlrInputStream(stream); var lexer = new HTMLLexer(input); var tokens = new CommonTokenStream(lexer); var parser = new HTMLParser(tokens); var tree = parser.htmlDocument(); var walker = new ParseTreeWalker(); var loader = new AspNetParseTree(); walker.Walk(loader, tree); return(loader.Results); }