/// <summary> /// Loads a single Html pages and does a microformat parse /// </summary> /// <param name="url">The Url of the webpage to be pasred</param> /// <param name="formatDescriber">A format describer for microformat to be parsed</param> public void Load(string url, UfFormatDescriber formatDescriber) { this.formatDescriber = formatDescriber; try { if (url != string.Empty) { // Check for issues with url url = url.Trim(); url = HttpUtility.UrlDecode(url); UfWebPage webPage = LoadHtmlDoc(url); if (webPage != null) { Url urlReport = new Url(); urlReport.Address = webPage.Url; urlReport.Status = webPage.StatusCode; parsedUrls.Add(urlReport); DateTime started = DateTime.Now; if (webPage.StatusCode == 200 && webPage.Html != null) { ParseUf(webPage.Html, url, formatDescriber, false, urlReport); } if (webPage.StatusCode != 200) { throw (new Exception("Could not load url: " + url + " " + webPage.StatusCode)); } DateTime ended = DateTime.Now; urlReport.LoadTime = ended.Subtract(started); Urls.Add(urlReport); } } else { throw (new Exception("No Url given")); } } catch (Exception ex) { if (ex.Message != string.Empty) { throw (new Exception(ex.Message)); } else { throw (new Exception("Could not load Url: " + url)); } } }
/// <summary> /// Loads a single Html pages and runs multiple microformat parses /// </summary> /// <param name="url">A full web page address</param> /// <param name="formatDescriberArray">An array of format describers</param> public void Load(string url, ArrayList formatDescriberArray) { this.formatDescriberArray = formatDescriberArray; try { if (url != string.Empty) { url = url.Trim(); UfWebPage webPage = LoadHtmlDoc(url); if (webPage != null) { Url urlReport = new Url(); urlReport.Address = webPage.Url; DateTime started = DateTime.Now; urlReport.Status = webPage.StatusCode; // Process many time foreach (UfFormatDescriber format in formatDescriberArray) { parsedUrls.Add(urlReport); if (webPage.StatusCode == 200 && webPage.Html != null) { ParseUf(webPage.Html, webPage.Url, format, true, urlReport); } if (webPage.StatusCode != 200) { throw (new Exception("Could not load url: " + url + " " + webPage.StatusCode)); } } DateTime ended = DateTime.Now; urlReport.LoadTime = ended.Subtract(started); Urls.Clear(); Urls.Add(urlReport); } } else { throw (new Exception("No Url given")); } } catch (Exception ex) { if (ex.Message != string.Empty) { throw (new Exception(ex.Message)); } else { throw (new Exception("Could not load Url: " + url)); } } }