Esempio n. 1
0
        public static HtmlAgilityPack.HtmlDocument GetContent(string url, ParseRuleConnectionType type, out string urlResponse)
        {
            HtmlAgilityPack.HtmlDocument document = null;
            urlResponse = url;

            if (IsInited)
            {
                if (type == ParseRuleConnectionType.Direct)
                {
                    HtmlWeb htmlWeb = new HtmlWeb() { AutoDetectEncoding = true, UserAgent = "Other" };
                    document = htmlWeb.Load(url);
                    if (document.StreamEncoding != document.Encoding)
                    {
                        htmlWeb.AutoDetectEncoding = false;
                        htmlWeb.OverrideEncoding = document.Encoding;
                        document = htmlWeb.Load(url);
                    }
                    urlResponse = htmlWeb.ResponseUri.AbsoluteUri;
                }
                else if (new ParseRuleConnectionType[] { ParseRuleConnectionType.IE_00_sec, ParseRuleConnectionType.IE_05_sec, ParseRuleConnectionType.IE_10_sec }.Contains(type))
                {
                    string waitSeconds = type.GetType().GetEnumName(type);
                    waitSeconds = waitSeconds.Substring(0, waitSeconds.LastIndexOf("_"));
                    waitSeconds = waitSeconds.Substring(waitSeconds.IndexOf("_") + 1);
                    int wait;
                    if (int.TryParse(waitSeconds, out wait))
                    {
                        SiteManagerIE mgr = new SiteManagerIE();
                        var res = mgr.Navigate(new Uri(url), wait);
                        document = new HtmlDocument();
                        document.LoadHtml(res.Content);
                        urlResponse = res.ResponseUri.AbsoluteUri;
                    }
                }
                else if (new ParseRuleConnectionType[] { ParseRuleConnectionType.CHR_00_sec, ParseRuleConnectionType.CHR_05_sec, ParseRuleConnectionType.CHR_10_sec }.Contains(type))
                {
                    string waitSeconds = type.GetType().GetEnumName(type);
                    waitSeconds = waitSeconds.Substring(0, waitSeconds.LastIndexOf("_"));
                    waitSeconds = waitSeconds.Substring(waitSeconds.IndexOf("_") + 1);
                    int wait;
                    if (int.TryParse(waitSeconds, out wait))
                    {
                        SiteManagerCHR mgr = new SiteManagerCHR();
                        var res = mgr.Navigate(new Uri(url), wait * 1000);
                        document = new HtmlDocument();
                        document.LoadHtml(res.Content ?? string.Empty);
                        urlResponse = res.ResponseUri.AbsoluteUri;
                    }
                }
            }
            else
                throw new Exception("SiteManager not inited. Use SiteManager.Init() to initialize components");

            return document;
        }