public sealed override void DoStuff() { using (MongoQueue <TaskMessage> queue = new MongoQueue <TaskMessage>(ConfigurationManager.AppSettings["mongodbqueue"], 1000000)) { if (Strategy == MiningStrategy.UntilLastPageNum) { for (int i = 1; i <= LastPageNum; i += Step) { string url = string.Format(ListPageUrl, i); var msg = CreatePageTask(this.Name, url); queue.Send(msg); } } else { int i = 1; string url = string.Format(ListPageUrl, i); HttpRequestContentExtractor ce = new HttpRequestContentExtractor(HttpMethod.GET, null); while (ce.Goto(url)) { var msg = CreatePageTask(this.Name, url); queue.Send(msg); i += Step; url = string.Format(ListPageUrl, i); } } } }
public sealed override void DoStuff() { HttpRequestContentExtractor hqExtractor = new HttpRequestContentExtractor(HttpMethod.GET, null); if (!(hqExtractor.Goto(this.Url, this.Referer, this.DefaultEncodingName))) { return; } var classnames = MainContainerClassName.Split(new char[] { '|' }); HtmlContent htmldesc = null; foreach (string classname in classnames) { htmldesc = hqExtractor.GetContent(classname); if (htmldesc != null) { break; } } if (htmldesc == null) { logger.Error("class=" + MainContainerClassName + " div not found"); } var htmldoc = htmldesc.GetHtmlDocumentObject(); if (htmldoc != null) { var divSet = htmldoc.DocumentNode.SelectNodes("//li[@class='" + ListItemContainerClassname + "']"); foreach (HtmlNode node in divSet) { HandleListItem(node); } } }
public override void DoStuff() { HttpRequestContentExtractor ce = new HttpRequestContentExtractor(HttpMethod.GET, null); if (ce.Goto(this.Url)) { var doc = ce.GetContent(DivClassName); logger.Info(this.Url); if (!Save(doc)) { Console.Write("file exists"); } } }