コード例 #1
0
        public string ExtractQueryHashScriptLink()
        {
            HtmlNode link = _documentNode.QuerySelector("[href*=\"" + QueryHashSource + "\"]");
            HtmlAttributeCollection attributes = link.Attributes;

            HtmlAttribute href = attributes.FirstOrDefault(attr => attr.Name == "href");
            string        scriptSourceRelative = href.Value;
            string        scriptSourceAbsolute = Utility.BaseUrl + scriptSourceRelative;

            return(scriptSourceAbsolute);
        }
コード例 #2
0
        public static string GetIgnoreCase(this HtmlAttributeCollection dictionary, string name)
        {
            if (dictionary == null)
            {
                return(null);
            }

            var key = dictionary.FirstOrDefault(k => k.Name.Equals(name, StringComparison.InvariantCultureIgnoreCase));

            if (key == null)
            {
                return(null);
            }

            return(dictionary[key.Name].Value);
        }
コード例 #3
0
        public ActionResult GetUrl(string encoding, string url)
        {
            try
            {
                var    uri        = new Uri(url);
                string domain     = string.Format("{0}://{1}:{2}", uri.Scheme, uri.Host, uri.Port);
                var    handler    = new HttpClientHandler();
                var    httpClient = new HttpClient(handler);
                httpClient.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1");
                var          request        = new HttpRequestMessage(HttpMethod.Get, url);
                var          response       = httpClient.SendAsync(request).Result;
                var          responseString = response.Content.ReadAsStringAsync().Result;
                HtmlDocument htmlDoc        = new HtmlDocument();
                htmlDoc.LoadHtml(responseString);

                var el = htmlDoc.DocumentNode.Descendants("script").Count();
                foreach (HtmlNode script in htmlDoc.DocumentNode.Descendants("script"))
                {
                    HtmlAttributeCollection atrib = script.Attributes;
                    HtmlAttribute           src   = atrib.FirstOrDefault(x => x.Name == "src");
                    if (src != null)
                    {
                        string oldAttr = src.Value;
                        if (!(oldAttr.StartsWith("//") || oldAttr.StartsWith("http://") || oldAttr.StartsWith("https://")))
                        {
                            if (oldAttr.StartsWith("."))
                            {
                                oldAttr = oldAttr.TrimStart('.');
                            }
                            if (!oldAttr.StartsWith("/"))
                            {
                                oldAttr = "/" + oldAttr;
                            }
                            var newAttr = string.Format("{0}{1}", domain, oldAttr);
                            src.Value = newAttr;
                        }
                    }
                }
                int el2;
                if (htmlDoc.DocumentNode.Descendants("link") != null)
                {
                    el2 = htmlDoc.DocumentNode.Descendants("link").Count();

                    foreach (HtmlNode link in htmlDoc.DocumentNode.Descendants("link"))
                    {
                        HtmlAttributeCollection atrib = link.Attributes;
                        HtmlAttribute           href  = atrib.FirstOrDefault(x => x.Name == "href");
                        if (href != null)
                        {
                            string oldAttr = href.Value;
                            if (!(oldAttr.StartsWith("//") || oldAttr.StartsWith("http://") || oldAttr.StartsWith("https://")))
                            {
                                if (oldAttr.StartsWith("."))
                                {
                                    oldAttr = oldAttr.TrimStart('.');
                                }
                                if (!oldAttr.StartsWith("/"))
                                {
                                    oldAttr = "/" + oldAttr;
                                }
                                var newAttr = string.Format("{0}{1}", domain, oldAttr);
                                href.Value = newAttr;
                            }
                        }
                    }
                }
                ;
                int el3;
                if (htmlDoc.DocumentNode.Descendants("img") != null)
                {
                    el3 = htmlDoc.DocumentNode.Descendants("img").Count();
                    foreach (HtmlNode img in htmlDoc.DocumentNode.Descendants("img"))
                    {
                        HtmlAttributeCollection atrib = img.Attributes;
                        HtmlAttribute           src   = atrib.FirstOrDefault(x => x.Name == "src");
                        if (src != null)
                        {
                            string oldAttr = src.Value;
                            if (!(oldAttr.StartsWith("//") || oldAttr.StartsWith("http://") || oldAttr.StartsWith("https://")))
                            {
                                if (oldAttr.StartsWith("."))
                                {
                                    oldAttr = oldAttr.TrimStart('.');
                                }
                                if (!oldAttr.StartsWith("/"))
                                {
                                    oldAttr = "/" + oldAttr;
                                }
                                var newAttr = string.Format("{0}{1}", domain, oldAttr);
                                src.Value = newAttr;
                            }
                        }
                    }
                }
                int el4;
                if (htmlDoc.DocumentNode.Descendants("a") != null)
                {
                    el4 = htmlDoc.DocumentNode.Descendants("//a").Count();
                    foreach (HtmlNode a in htmlDoc.DocumentNode.Descendants("a"))
                    {
                        HtmlAttributeCollection atrib = a.Attributes;
                        HtmlAttribute           href  = atrib.FirstOrDefault(x => x.Name == "href");
                        if (href != null)
                        {
                            string oldAttr = href.Value;
                            if (oldAttr.StartsWith("http://") || oldAttr.StartsWith("https://"))
                            {
                                var newAttr = string.Format("/Scrapping/GetUrl?encoding=" + encoding + "&url={0}", oldAttr);
                                href.Value = newAttr;
                            }
                            else
                            {
                                if (oldAttr.StartsWith("."))
                                {
                                    oldAttr = oldAttr.TrimStart('.');
                                }
                                if (!oldAttr.StartsWith("/"))
                                {
                                    oldAttr = "/" + oldAttr;
                                }
                                var newAttr = string.Format("/Scrapping/GetUrl?encoding=" + encoding + "&url={0}{1}", domain, oldAttr);
                                href.Value = newAttr;
                            }
                        }
                    }
                }

                var res = htmlDoc.DocumentNode.OuterHtml;
                return(View("GetUrl", (object)res));
            }
            catch (Exception ex)
            {
                return(View("GetUrlExeption", (object)ex.Message));
            }
        }