Example #1
0
        static void LaunchSpider(string source, int max_count = 5000)
        {
            WebSpider spider = new WebSpider();

            spider.SetSourcePage(source);

            while (spider.CountToVisit > 0 && spider.Count < max_count)
            {
                WEBPAGE page = spider.SingleStep();
            }
        }
Example #2
0
        public static DisplayItem GetDisplayItem(DBReader reader, int pageid)
        {
            WEBPAGE page = reader.GetPageByIndex(pageid);

            DisplayItem item = new DisplayItem();

            item.title   = page.TITLE;
            item.url     = page.URL;
            item.page_id = pageid;
            string text = htmlParser.Html2PlainText(page.raw_source);

            item.desc = Regex.Replace(Regex.Replace(text, @"(\s)", " "), " {1,}", " ").Substring(0, 100);
            return(item);
        }