static void LaunchSpider(string source, int max_count = 5000) { WebSpider spider = new WebSpider(); spider.SetSourcePage(source); while (spider.CountToVisit > 0 && spider.Count < max_count) { WEBPAGE page = spider.SingleStep(); } }
public static DisplayItem GetDisplayItem(DBReader reader, int pageid) { WEBPAGE page = reader.GetPageByIndex(pageid); DisplayItem item = new DisplayItem(); item.title = page.TITLE; item.url = page.URL; item.page_id = pageid; string text = htmlParser.Html2PlainText(page.raw_source); item.desc = Regex.Replace(Regex.Replace(text, @"(\s)", " "), " {1,}", " ").Substring(0, 100); return(item); }