public void Process(Page page)
		{
			IList<String> links = page.GetHtml().Links().Regex("http://my\\.oschina\\.net/flashsword/blog/\\d+").GetAll();
			page.AddTargetRequests(links);
			page.PutField("title", page.GetHtml().XPath("//div[@class='BlogEntity']/div[@class='BlogTitle']/h1/text()").ToString());
			page.PutField("content", page.GetHtml().XPath("//div[@class='BlogContent']/tidyText()").ToString());
			page.PutField("tags", page.GetHtml().XPath("//div[@class='BlogTags']/a/text()").GetAll());
			page.PutField("artical", page.GetHtml().XPath("//*[@Class='Blog']/div[1]/div/h2/a").ToString());
		}
Exemplo n.º 2
0
 public void Process(Page page)
 {
     foreach (PageModelExtractor pageModelExtractor in _pageModelExtractorList)
     {
         ExtractLinks(page, pageModelExtractor.GetHelpUrlRegionSelector(), pageModelExtractor.GetHelpUrlPatterns());
         ExtractLinks(page, pageModelExtractor.GetTargetUrlRegionSelector(), pageModelExtractor.GetTargetUrlPatterns());
         object process = pageModelExtractor.Process(page);
         if (process == null || (process is IList && ((IList)process).Count == 0))
         {
             continue;
         }
         PostProcessPageModel(process);
         page.PutField(pageModelExtractor.GetModelType().FullName, process);
     }
     if (page.GetResultItems().GetAll().Count == 0)
     {
         page.GetResultItems().IsSkip = true;
     }
 }