Пример #1
0
 public sealed override void DoStuff()
 {
     using (MongoQueue <TaskMessage> queue = new MongoQueue <TaskMessage>(ConfigurationManager.AppSettings["mongodbqueue"], 1000000))
     {
         if (Strategy == MiningStrategy.UntilLastPageNum)
         {
             for (int i = 1; i <= LastPageNum; i += Step)
             {
                 string url = string.Format(ListPageUrl, i);
                 var    msg = CreatePageTask(this.Name, url);
                 queue.Send(msg);
             }
         }
         else
         {
             int    i   = 1;
             string url = string.Format(ListPageUrl, i);
             HttpRequestContentExtractor ce = new HttpRequestContentExtractor(HttpMethod.GET, null);
             while (ce.Goto(url))
             {
                 var msg = CreatePageTask(this.Name, url);
                 queue.Send(msg);
                 i  += Step;
                 url = string.Format(ListPageUrl, i);
             }
         }
     }
 }
Пример #2
0
        public sealed override void DoStuff()
        {
            HttpRequestContentExtractor hqExtractor = new HttpRequestContentExtractor(HttpMethod.GET, null);

            if (!(hqExtractor.Goto(this.Url, this.Referer, this.DefaultEncodingName)))
            {
                return;
            }
            var         classnames = MainContainerClassName.Split(new char[] { '|' });
            HtmlContent htmldesc   = null;

            foreach (string classname in classnames)
            {
                htmldesc = hqExtractor.GetContent(classname);
                if (htmldesc != null)
                {
                    break;
                }
            }
            if (htmldesc == null)
            {
                logger.Error("class=" + MainContainerClassName + " div not found");
            }
            var htmldoc = htmldesc.GetHtmlDocumentObject();

            if (htmldoc != null)
            {
                var divSet = htmldoc.DocumentNode.SelectNodes("//li[@class='" + ListItemContainerClassname + "']");
                foreach (HtmlNode node in divSet)
                {
                    HandleListItem(node);
                }
            }
        }
Пример #3
0
        public override void DoStuff()
        {
            HttpRequestContentExtractor ce = new HttpRequestContentExtractor(HttpMethod.GET, null);

            if (ce.Goto(this.Url))
            {
                var doc = ce.GetContent(DivClassName);
                logger.Info(this.Url);
                if (!Save(doc))
                {
                    Console.Write("file exists");
                }
            }
        }