/// <summary> /// Selects Page records by PK /// </summary> public static PageDO[] GetByPK(Int32 PageId) { SqlParameter _PageId = new SqlParameter("PageId", SqlDbType.Int); _PageId.Value = PageId; SqlParameter[] _params = new SqlParameter[] { _PageId }; SafeReader sr = DataCommon.ExecuteSafeReader("[dbo].[Page_GetByPK]", _params, "dbo"); List <PageDO> objs = new List <PageDO>(); while (sr.Read()) { PageDO obj = new PageDO(); obj.PageId = sr.GetInt32(sr.GetOrdinal("PageId")); obj.Url = sr.GetString(sr.GetOrdinal("Url")); obj.DownloadDate = sr.GetDateTime(sr.GetOrdinal("DownloadDate")); obj.Html = sr.GetString(sr.GetOrdinal("Html")); objs.Add(obj); } return(objs.ToArray()); }
/// <summary> /// Returns the html for a page from the database or by downloading /// </summary> /// <param name="url"></param> /// <returns></returns> PageDO GetPageHtml(string url) { PageDO[] allPages = Page.GetAll(); PageDO page = allPages.Where(p => p.Url == url).FirstOrDefault(); if (page != null) { return(page); } using (WebClient client = new WebClient()) { string content = client.DownloadString(url); JObject json = JObject.Parse(content); _totalPages = Convert.ToInt32(json["list"]["numPages"].ToString()); JToken token = json["list"]["listHTML"]; string html = token.ToString(); // save the page to the database page = new PageDO() { DownloadDate = DateTime.Now, Html = html, Url = url }; page.PageId = Page.Create(page); return(page); } }
/// <summary> /// Deletes a Page record /// </summary> public static int Delete(PageDO DO) { SqlParameter _PageId = new SqlParameter("PageId", SqlDbType.Int); _PageId.Value = DO.PageId; SqlParameter[] _params = new SqlParameter[] { _PageId }; return(DataCommon.ExecuteScalar("[dbo].[Page_Delete]", _params, "dbo")); }
/// <summary> /// Creates a new Page record /// </summary> public static int Create(PageDO DO) { SqlParameter _Url = new SqlParameter("Url", SqlDbType.VarChar); SqlParameter _DownloadDate = new SqlParameter("DownloadDate", SqlDbType.DateTime); SqlParameter _Html = new SqlParameter("Html", SqlDbType.VarChar); _Url.Value = DO.Url; _DownloadDate.Value = DO.DownloadDate; _Html.Value = DO.Html; SqlParameter[] _params = new SqlParameter[] { _Url, _DownloadDate, _Html }; return(DataCommon.ExecuteScalar("[dbo].[Page_Insert]", _params, "dbo")); }
public void Run() { string url = string.Format(_urlFormat, _page); PageDO page = GetPageHtml(url); XmlDocument[] articleDocs = GetArticles(page.Html); foreach (XmlDocument doc in articleDocs) { Article article = new Article(doc, page.PageId); PropertyDO property = article.GetProperty(); Property.Create(property); } _page += 1; if (_page <= _totalPages) { Run(); } }
/// <summary> /// Gets all Page records /// </summary> public static PageDO[] GetAll() { SafeReader sr = DataCommon.ExecuteSafeReader("[dbo].[Page_GetAll]", new SqlParameter[] { }, "dbo"); List <PageDO> objs = new List <PageDO>(); while (sr.Read()) { PageDO obj = new PageDO(); obj.PageId = sr.GetInt32(sr.GetOrdinal("PageId")); obj.Url = sr.GetString(sr.GetOrdinal("Url")); obj.DownloadDate = sr.GetDateTime(sr.GetOrdinal("DownloadDate")); obj.Html = sr.GetString(sr.GetOrdinal("Html")); objs.Add(obj); } return(objs.ToArray()); }