Esempio n. 1
0
        /// <summary>
        /// Loads a single Html pages and does a microformat parse
        /// </summary>
        /// <param name="url">The Url of the webpage to be pasred</param>
        /// <param name="formatDescriber">A format describer for microformat to be parsed</param>
        public void Load(string url, UfFormatDescriber formatDescriber)
        {
            this.formatDescriber = formatDescriber;
            try
            {
                if (url != string.Empty)
                {
                    // Check for issues with url
                    url = url.Trim();
                    url = HttpUtility.UrlDecode(url);

                    UfWebPage webPage = LoadHtmlDoc(url);

                    if (webPage != null)
                    {
                        Url urlReport = new Url();
                        urlReport.Address = webPage.Url;
                        urlReport.Status  = webPage.StatusCode;
                        parsedUrls.Add(urlReport);
                        DateTime started = DateTime.Now;

                        if (webPage.StatusCode == 200 && webPage.Html != null)
                        {
                            ParseUf(webPage.Html, url, formatDescriber, false, urlReport);
                        }

                        if (webPage.StatusCode != 200)
                        {
                            throw (new Exception("Could not load url: " + url + " " + webPage.StatusCode));
                        }


                        DateTime ended = DateTime.Now;
                        urlReport.LoadTime = ended.Subtract(started);
                        Urls.Add(urlReport);
                    }
                }
                else
                {
                    throw (new Exception("No Url given"));
                }
            }
            catch (Exception ex)
            {
                if (ex.Message != string.Empty)
                {
                    throw (new Exception(ex.Message));
                }
                else
                {
                    throw (new Exception("Could not load Url: " + url));
                }
            }
        }
Esempio n. 2
0
        /// <summary>
        /// Loads a single Html pages and runs multiple microformat parses
        /// </summary>
        /// <param name="url">A full web page address</param>
        /// <param name="formatDescriberArray">An array of format describers</param>
        public void Load(string url, ArrayList formatDescriberArray)
        {
            this.formatDescriberArray = formatDescriberArray;

            try
            {
                if (url != string.Empty)
                {
                    url = url.Trim();
                    UfWebPage webPage = LoadHtmlDoc(url);
                    if (webPage != null)
                    {
                        Url urlReport = new Url();
                        urlReport.Address = webPage.Url;
                        DateTime started = DateTime.Now;
                        urlReport.Status = webPage.StatusCode;

                        // Process many time
                        foreach (UfFormatDescriber format in formatDescriberArray)
                        {
                            parsedUrls.Add(urlReport);

                            if (webPage.StatusCode == 200 && webPage.Html != null)
                            {
                                ParseUf(webPage.Html, webPage.Url, format, true, urlReport);
                            }

                            if (webPage.StatusCode != 200)
                            {
                                throw (new Exception("Could not load url: " + url + " " + webPage.StatusCode));
                            }
                        }

                        DateTime ended = DateTime.Now;
                        urlReport.LoadTime = ended.Subtract(started);
                        Urls.Clear();
                        Urls.Add(urlReport);
                    }
                }
                else
                {
                    throw (new Exception("No Url given"));
                }
            }
            catch (Exception ex)
            {
                if (ex.Message != string.Empty)
                {
                    throw (new Exception(ex.Message));
                }
                else
                {
                    throw (new Exception("Could not load Url: " + url));
                }
            }
        }