public ZhaopinLagouJobPageAnalyzer() { Site = SiteConfiguration.Instance.GetSite(SiteId); _fieldSelectors = new List <FieldSelector>(); _fieldSelectors.Add(new FieldSelector() { FieldName = "JobName", Selector = "/html/body/div[2]/div/div[1]/div/span" }); _fieldSelectors.Add(new FieldSelector() { FieldName = "Salary", Selector = "/html/body/div[2]/div/div[1]/dd/p[1]/span[1]" }); _fieldSelectors.Add(new FieldSelector() { FieldName = "Lightspot", Selector = "//*[@id=\"job_detail\"]/dd[1]/p" }); _fieldSelectors.Add(new FieldSelector() { FieldName = "PositionInfo", Selector = "//*[@id=\"job_detail\"]/dd[2]/div" }); _fieldSelectors.Add(new FieldSelector() { FieldName = "Address", Selector = "//*[@id=\"job_detail\"]/dd[3]/div[1]" }); _fieldSelectors.Add(new FieldSelector() { FieldName = "Company", Selector = "//*[@id=\"job_company\"]/dd/ul/li[3]" }); _fieldSelectors.Add(new FieldSelector() { FieldName = "SiteUrl", Selector = "//*[@id=\"job_company\"]/dd/ul/li[4]/a[@href]" }); }
public void ProductionTest() { var site = new Site() { Domain = "www.usashopcn.com" }; var requestMessage = NewTestRequestMessage(site); var bloomFilter = new MemoryBloomFilter<string>(1000 * 10, 1000 * 10 * 20); var lineScheduler = new SequenceScheduler(bloomFilter); lineScheduler.Push(requestMessage); Trace.WriteLine("CurrentQueueCount:" + lineScheduler.CurrentQueueCount()); Assert.IsTrue(lineScheduler.CurrentQueueCount() == 1, "添加消息失败"); lineScheduler.Dispose(); }
public PlantCsdbPhotoPageAnalyzer() { Site = SiteConfiguration.Instance.GetSite(SiteId); latinSelector = new FieldSelector() { FieldName = "LatinName", Selector = "//*[@id=\"content\"]/h1" }; thumbImgUrlSelector = new FieldSelector() { FieldName = "ThumbImgUrl", Selector = "//*[@id=\"quicktabs_tabpage__fourth\"]/a/img" }; }
public RequestMessage NewTestRequestMessage(Site site) { return new RequestMessage() { Topic = TopicType.StaticHtml, Request = new Request() { SiteId = site.Id, Url = "http://www.usashopcn.com/Product/Details/126993" } }; }
public DevTestPageAnalyzer() { Site = SiteConfiguration.Instance.GetSite(SiteId); _fieldXPaths = new Dictionary <string, string> { { "Title", "//*[@id=\"p_mid\"]/div[2]/h2" }, { "PicUrl", "//img[@id=\"productImage\"]/@src" }, { "Price", "//*[@id=\"p_mid\"]/div[2]/div[1]/div[2]/span" }, { "Description", "//*[@id=\"intro_0\"]" } }.Select(e => new FieldSelector() { FieldName = e.Key, Selector = e.Value }).ToList(); }
public List<RequestMessage> NewTestRequestMessages(Site site) { var list = new List<RequestMessage>(); list.Add(new RequestMessage() { Topic = TopicType.StaticHtml, Request = new Request() { SiteId = site.Id, Url = "http://www.usashopcn.com/Product/Details/126334" } }); list.Add(new RequestMessage() { Topic = TopicType.StaticHtml, Request = new Request() { SiteId = site.Id, Url = "http://www.usashopcn.com/Product/Details/127698" } }); list.Add(new RequestMessage() { Topic = TopicType.StaticHtml, Request = new Request() { SiteId = site.Id, Url = "http://www.usashopcn.com/Product/Details/127593" } }); list.Add(new RequestMessage() { Topic = TopicType.StaticHtml, Request = new Request() { SiteId = site.Id, Url = "http://www.usashopcn.com/Product/Details/126855" } }); return list; }
public PlantCsdbListPageAnalyzer() { Site = SiteConfiguration.Instance.GetSite(SiteId); }
public void ConsumeTest() { var site = new Site() {Domain = "www.usashopcn.com"}; var requestMessages = NewTestRequestMessages(site); var bloomFilter = new MemoryBloomFilter<string>(1000 * 10, 1000 * 10 * 20); var lineScheduler = new SequenceScheduler(bloomFilter); lineScheduler.Push(requestMessages); Trace.WriteLine("CurrentQueueCount:" + lineScheduler.CurrentQueueCount()); var resultPipeline = new ResultPipeline(); resultPipeline.RegisterModule(new TestPipelineMoudle()); var pageAnalyzers = new List<KeyValuePair<string, Type>>(); pageAnalyzers.Add(new KeyValuePair<string, Type>(site.Domain, typeof(TestPageAnalyzer))); var consumerBroker = new ConsumerBroker(2, lineScheduler, new List<IDownloader>() {new TestDownloader()}, new ResultPipeline(), pageAnalyzers); consumerBroker.Start(); var timer = new Timer((state) => { Trace.WriteLine("Timer结束:"); Assert.IsTrue(lineScheduler.CurrentQueueCount() == 0, "添加消息失败"); }, null, 20000, 0); //lineScheduler.Dispose(); }
public ZhaopinLagouListPageAnalyzer() { Site = SiteConfiguration.Instance.GetSite(SiteId); }