Ejemplo n.º 1
0
 static WebPage GetWebPage(string url)
 {
     string html = GetWebHTML(url);
     if (html == null) return null;
     int title_start_pos = html.IndexOf("<title>") + "<title>".Length;
     int title_end_pos = html.IndexOf("</title>");
     int pub_time_start_pos = html.IndexOf(">发布时间:") + ">发布时间:".Length;
     //int content_start_pos = html.IndexOf("<font size='3'>") + "<td valign='top'><div><font size='3'>".Length;
     //int content_end_pos = html.IndexOf("</font></div></td>");
     WebPage page = new WebPage();
     page.url = url;
     page.html = html;
     page.title = html.Substring(title_start_pos, title_end_pos - title_start_pos);
     try
     {
         page.published_time = DateTime.Parse(html.Substring(pub_time_start_pos, 10));
     }
     catch
     {
         //Console.WriteLine("{0} {1}", url, html.Substring(pub_time_start_pos, 10));
         page.published_time = DateTime.Now;
     }
     page.content = "";
     return page;
 }
Ejemplo n.º 2
0
 public void InsertWebPage(WebPage page)
 {
     MongoCollection<WebPage> collection = mongo_database.GetCollection<WebPage>("webpages");
     SafeModeResult smr = collection.Insert<WebPage>(page);
 }