static WebPage GetWebPage(string url) { string html = GetWebHTML(url); if (html == null) return null; int title_start_pos = html.IndexOf("<title>") + "<title>".Length; int title_end_pos = html.IndexOf("</title>"); int pub_time_start_pos = html.IndexOf(">发布时间:") + ">发布时间:".Length; //int content_start_pos = html.IndexOf("<font size='3'>") + "<td valign='top'><div><font size='3'>".Length; //int content_end_pos = html.IndexOf("</font></div></td>"); WebPage page = new WebPage(); page.url = url; page.html = html; page.title = html.Substring(title_start_pos, title_end_pos - title_start_pos); try { page.published_time = DateTime.Parse(html.Substring(pub_time_start_pos, 10)); } catch { //Console.WriteLine("{0} {1}", url, html.Substring(pub_time_start_pos, 10)); page.published_time = DateTime.Now; } page.content = ""; return page; }
public void InsertWebPage(WebPage page) { MongoCollection<WebPage> collection = mongo_database.GetCollection<WebPage>("webpages"); SafeModeResult smr = collection.Insert<WebPage>(page); }