示例#1
0
        /// <summary>
        /// Loads a single Html pages and runs multiple microformat parses
        /// </summary>
        /// <param name="url">A full web page address</param>
        /// <param name="formatDescriberArray">An array of format describers</param>
        public void Load(string url, ArrayList formatDescriberArray)
        {
            this.formatDescriberArray = formatDescriberArray;

            try
            {
                if (url != string.Empty)
                {
                    url = url.Trim();
                    UfWebPage webPage = LoadHtmlDoc(url);
                    if (webPage != null)
                    {
                        Url urlReport = new Url();
                        urlReport.Address = webPage.Url;
                        DateTime started = DateTime.Now;
                        urlReport.Status = webPage.StatusCode;

                        // Process many time
                        foreach (UfFormatDescriber format in formatDescriberArray)
                        {
                            parsedUrls.Add(urlReport);

                            if (webPage.StatusCode == 200 && webPage.Html != null)
                            {
                                ParseUf(webPage.Html, webPage.Url, format, true, urlReport);
                            }

                            if (webPage.StatusCode != 200)
                            {
                                throw (new Exception("Could not load url: " + url + " " + webPage.StatusCode));
                            }
                        }

                        DateTime ended = DateTime.Now;
                        urlReport.LoadTime = ended.Subtract(started);
                        Urls.Clear();
                        Urls.Add(urlReport);
                    }
                }
                else
                {
                    throw (new Exception("No Url given"));
                }
            }
            catch (Exception ex)
            {
                if (ex.Message != string.Empty)
                {
                    throw (new Exception(ex.Message));
                }
                else
                {
                    throw (new Exception("Could not load Url: " + url));
                }
            }
        }
示例#2
0
        /// <summary>
        /// Loads a single Html pages and does a microformat parse
        /// </summary>
        /// <param name="url">The Url of the webpage to be pasred</param>
        /// <param name="formatDescriber">A format describer for microformat to be parsed</param>
        public void Load(string url, UfFormatDescriber formatDescriber)
        {
            this.formatDescriber = formatDescriber;
            try
            {
                if (url != string.Empty)
                {
                    // Check for issues with url
                    url = url.Trim();
                    url = HttpUtility.UrlDecode(url);

                    UfWebPage webPage = LoadHtmlDoc(url);

                    if (webPage != null)
                    {
                        Url urlReport = new Url();
                        urlReport.Address = webPage.Url;
                        urlReport.Status  = webPage.StatusCode;
                        parsedUrls.Add(urlReport);
                        DateTime started = DateTime.Now;

                        if (webPage.StatusCode == 200 && webPage.Html != null)
                        {
                            ParseUf(webPage.Html, url, formatDescriber, false, urlReport);
                        }

                        if (webPage.StatusCode != 200)
                        {
                            throw (new Exception("Could not load url: " + url + " " + webPage.StatusCode));
                        }


                        DateTime ended = DateTime.Now;
                        urlReport.LoadTime = ended.Subtract(started);
                        Urls.Add(urlReport);
                    }
                }
                else
                {
                    throw (new Exception("No Url given"));
                }
            }
            catch (Exception ex)
            {
                if (ex.Message != string.Empty)
                {
                    throw (new Exception(ex.Message));
                }
                else
                {
                    throw (new Exception("Could not load Url: " + url));
                }
            }
        }
示例#3
0
        /// <summary>
        /// Load a exteranl html document using webPage
        /// </summary>
        /// <param name="url">A full web page address</param>
        /// <returns></returns>
        private UfWebPage LoadHtmlDoc(string url)
        {
            UfWebPage webPage = new UfWebPage();

            if (userAgent != "")
            {
                webPage.UserAgent = userAgent;
            }

            try
            {
                if (url != string.Empty)
                {
                    // Check for issues with url
                    url = url.Trim();
                    if (url.StartsWith("http://") == false && url.StartsWith("https://") == false && url.StartsWith("file://") == false)
                    {
                        url = "http://" + url;
                    }

                    // Load page once
                    Uri uri = new Uri(url);
                    webPage.DocumentContentType = UfWebPage.ContentType.Html;
                    webPage.DocumentRequestType = UfWebPage.RequestType.Get;
                    webPage.Load(uri);
                }
            }
            catch (Exception ex)
            {
                if (ex.Message != string.Empty)
                {
                    throw (new Exception(ex.Message));
                }
                else
                {
                    throw (new Exception("Could not load Url: " + url));
                }
            }
            return(webPage);
        }
示例#4
0
        /// <summary>
        /// Load a exteranl html document using webPage
        /// </summary>
        /// <param name="url">A full web page address</param>
        /// <returns></returns>
        private UfWebPage LoadHtmlDoc(string url)
        {
            UfWebPage webPage = new UfWebPage();
            if (_userAgent != "")
                webPage.UserAgent = _userAgent;

            try
            {
                if (url != string.Empty)
                {
                    // Check for issues with url
                    url = url.Trim();
                    if (url.StartsWith("http://") == false && url.StartsWith("https://") == false && url.StartsWith("file://") == false)
                        url = "http://" + url;

                    // Load page once
                    Uri uri = new Uri(url);
                    webPage.DocumentContentType = UfWebPage.ContentType.Html;
                    webPage.DocumentRequestType = UfWebPage.RequestType.Get;
                    webPage.Load(uri);
                }
            }
            catch (Exception ex)
            {
                if (ex.Message != string.Empty)
                    throw (new Exception(ex.Message));
                else
                    throw (new Exception("Could not load Url: " + url));
            }
            return webPage;
        }