Exemplo n.º 1
0
        /// <summary>
        /// コンストラクタ
        /// </summary>
        /// <param name="url">参照先URL</param>
        /// <param name="follow">robots.txt参照可否</param>
        /// <param name="agent">ユーザーエージェント</param>
        public HtmlReader(string url, bool follow = true, UserAgent agent = null, Encoding encoding = null)
        {
            // Httpリクエスト
            HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url);
            // ユーザーエージェント
            if (agent != null)
                req.UserAgent = agent.ToString();
            // robots.txt
            Robots robots = (follow) ? Robots.Create(new Uri(url)) : null;
            if (robots != null) {
                if (!robots.Parse(url))
                    throw new RobotsDisallowException("Robots Disallow [" + url + "]");
                if (robots.CrawlDelay != 0)
                    System.Threading.Thread.Sleep(robots.CrawlDelay * 1000);
            }

            using (HttpWebResponse res = (HttpWebResponse)req.GetResponse())
            using (Stream stream = res.GetResponseStream()) {
                Encoding enc = (encoding != null) ? encoding : Encoding.GetEncoding(res.CharacterSet);
                using (StreamReader reader = new StreamReader(stream, enc))
                using (SgmlReader sgml = new SgmlReader {
                    DocType = "HTML",
                    InputStream = reader,
                    CaseFolding = CaseFolding.ToLower,
                    IgnoreDtd = true
                }) {
                    Html = XDocument.Load(sgml, LoadOptions.None);
                    Uri = url;
                    Encoding = enc;
                }
            }
        }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            string url = @"http://www.1pondo.tv/static-seo/sitemap-videos.xml";
            UserAgent agent = new UserAgent("hoge", 1, "*****@*****.**", DefaultAgents.Firefox14);
            var reader = new HtmlReader(url, true, agent);
            var xml = reader.Html;
            XNamespace ns = reader.Namespace;
            XNamespace video = reader.Html.Root.Attribute(XNamespace.Xmlns + "video").Value;

            var list = xml.Descendants(video + "content_loc").Where(a => a.Value.Contains("092615_161"));
            var item = list.First().Ancestors(ns + "url");
            var vitem = list.First().Ancestors(video + "video");
        }