Exemple #1
0
        /// <summary>
        /// Gets only the body of all script tags from the web response content
        /// </summary>
        /// <param name="webResponseText"></param>
        /// <param name="filter"></param>
        /// <param name="scriptBodies"></param>
        /// <returns></returns>
        public static bool TryGetScriptBodies(string webResponseText, Func <string, bool> filter, out string[] scriptBodies)
        {
            scriptBodies = null;
            if (String.IsNullOrWhiteSpace(webResponseText))
            {
                return(false);
            }

            var ms        = new MemoryStream(Encoding.UTF8.GetBytes(webResponseText));
            var antlrHtml = AspNetParseTree.InvokeParse(ms);

            if (antlrHtml == null)
            {
                return(false);
            }

            var innerText = antlrHtml.CharData;

            if (innerText.Count <= 0)
            {
                return(false);
            }

            scriptBodies = antlrHtml.ScriptBodies.ToArray();

            if (filter != null)
            {
                scriptBodies = scriptBodies.Where(filter).ToArray();
            }
            return(scriptBodies.Length > 0);
        }
Exemple #2
0
        /// <summary>
        /// Strips the content of <see cref="rawHtml"/> down to
        /// just its html with no scrips, css styles, doc-types, etc.
        /// </summary>
        /// <param name="rawHtml"></param>
        /// <returns></returns>
        public static string GetHtmlOnly(string rawHtml)
        {
            if (String.IsNullOrWhiteSpace(rawHtml))
            {
                return(null);
            }

            var antlrRslts = AspNetParseTree.InvokeParse(new MemoryStream(Encoding.UTF8.GetBytes(rawHtml)));

            return(antlrRslts?.HtmlOnly);
        }
Exemple #3
0
        public override IEnumerable <dynamic> ParseContent(object content)
        {
            var webResponseBody = GetWebResponseBody(content);

            if (webResponseBody == null)
            {
                return(null);
            }

            var ms        = new MemoryStream(Encoding.UTF8.GetBytes(webResponseBody));
            var htmlRslts = AspNetParseTree.InvokeParse(ms);

            if (string.IsNullOrWhiteSpace(htmlRslts?.HtmlOnly))
            {
                return(null);
            }
            var xml = new XmlDocument();

            xml.LoadXml(htmlRslts.HtmlOnly);

            var insComNameNodes = xml.SelectNodes("//div[contains(text(),'Main article')]/following-sibling::ul/li/a");

            if (insComNameNodes == null || insComNameNodes.Count <= 0)
            {
                return(null);
            }

            var insComNames = new List <string>();

            foreach (var node in insComNameNodes)
            {
                var elem = node as XmlElement;

                var name = elem?.InnerText;
                if (string.IsNullOrWhiteSpace(name) || insComNames.Contains(name) ||
                    _skipThese.Contains(name))
                {
                    continue;
                }

                insComNames.Add(name);
            }
            insComNames.Sort();
            return(new List <dynamic> {
                new { UsInsComNames = insComNames.ToArray() }
            });
        }
Exemple #4
0
        public void TestPoc()
        {
            var testFile = PutTestFileOnDisk("AccountEdit_aspx.eg");

            Assert.IsTrue(System.IO.File.Exists(testFile));

            var testResult = AspNetParseTree.InvokeParse(testFile);

            Assert.IsNotNull(testResult);
            Assert.IsNotNull(testResult.Tags2Attrs);
            Assert.AreNotEqual(0, testResult.Tags2Attrs.Keys.Count);
            foreach (var key in testResult.Tags2Attrs.Keys)
            {
                //var attrs = string.Join("|", testResult.DistinctTags[key]);
                Console.WriteLine(key);
            }

            Assert.IsNotNull(testResult.ScriptBodies);
            Assert.AreNotEqual(0, testResult.ScriptBodies.Count);
            foreach (var script in testResult.ScriptBodies)
            {
                Console.WriteLine(script);
            }
        }
Exemple #5
0
        public override IEnumerable <dynamic> ParseContent(object content)
        {
            var webResponseBody = GetWebResponseBody(content);

            if (webResponseBody == null)
            {
                return(null);
            }

            var ms        = new MemoryStream(Encoding.UTF8.GetBytes(webResponseBody));
            var htmlRslts = AspNetParseTree.InvokeParse(ms);

            if (htmlRslts?.Tags2Attrs == null || !htmlRslts.Tags2Attrs.ContainsKey("a"))
            {
                return(null);
            }
            var xrblUri = GetXbrlXmlPartialUri(htmlRslts.Tags2Attrs["a"]);

            if (string.IsNullOrWhiteSpace(xrblUri))
            {
                return(null);
            }

            string irsId;

            TryGetGetIrsId(htmlRslts, out irsId);

            return(new List <dynamic>
            {
                new
                {
                    XrblUri = Edgar.SEC_ROOT_URL + xrblUri,
                    IrsId = irsId
                }
            });
        }