示例#1
0
        public ZhaopinLagouJobPageAnalyzer()
        {
            Site            = SiteConfiguration.Instance.GetSite(SiteId);
            _fieldSelectors = new List <FieldSelector>();
            _fieldSelectors.Add(new FieldSelector()
            {
                FieldName = "JobName", Selector = "/html/body/div[2]/div/div[1]/div/span"
            });
            _fieldSelectors.Add(new FieldSelector()
            {
                FieldName = "Salary", Selector = "/html/body/div[2]/div/div[1]/dd/p[1]/span[1]"
            });
            _fieldSelectors.Add(new FieldSelector()
            {
                FieldName = "Lightspot", Selector = "//*[@id=\"job_detail\"]/dd[1]/p"
            });

            _fieldSelectors.Add(new FieldSelector()
            {
                FieldName = "PositionInfo", Selector = "//*[@id=\"job_detail\"]/dd[2]/div"
            });
            _fieldSelectors.Add(new FieldSelector()
            {
                FieldName = "Address", Selector = "//*[@id=\"job_detail\"]/dd[3]/div[1]"
            });
            _fieldSelectors.Add(new FieldSelector()
            {
                FieldName = "Company", Selector = "//*[@id=\"job_company\"]/dd/ul/li[3]"
            });
            _fieldSelectors.Add(new FieldSelector()
            {
                FieldName = "SiteUrl", Selector = "//*[@id=\"job_company\"]/dd/ul/li[4]/a[@href]"
            });
        }
示例#2
0
 public void ProductionTest()
 {
     var site = new Site() { Domain = "www.usashopcn.com" };
     var requestMessage = NewTestRequestMessage(site);
     var bloomFilter = new MemoryBloomFilter<string>(1000 * 10, 1000 * 10 * 20);
     var lineScheduler = new SequenceScheduler(bloomFilter);
     lineScheduler.Push(requestMessage);
     Trace.WriteLine("CurrentQueueCount:" + lineScheduler.CurrentQueueCount());
     Assert.IsTrue(lineScheduler.CurrentQueueCount() == 1, "添加消息失败");
     lineScheduler.Dispose();
 }
示例#3
0
 public PlantCsdbPhotoPageAnalyzer()
 {
     Site          = SiteConfiguration.Instance.GetSite(SiteId);
     latinSelector = new FieldSelector()
     {
         FieldName = "LatinName", Selector = "//*[@id=\"content\"]/h1"
     };
     thumbImgUrlSelector = new FieldSelector()
     {
         FieldName = "ThumbImgUrl", Selector = "//*[@id=\"quicktabs_tabpage__fourth\"]/a/img"
     };
 }
示例#4
0
        public RequestMessage NewTestRequestMessage(Site site)
        {
            return new RequestMessage()
            {
                Topic = TopicType.StaticHtml,
                Request = new Request()
                {
                    SiteId = site.Id,
                    Url = "http://www.usashopcn.com/Product/Details/126993"
                }

            };
        }
示例#5
0
 public DevTestPageAnalyzer()
 {
     Site         = SiteConfiguration.Instance.GetSite(SiteId);
     _fieldXPaths = new Dictionary <string, string> {
         { "Title", "//*[@id=\"p_mid\"]/div[2]/h2" },
         { "PicUrl", "//img[@id=\"productImage\"]/@src" },
         { "Price", "//*[@id=\"p_mid\"]/div[2]/div[1]/div[2]/span" },
         { "Description", "//*[@id=\"intro_0\"]" }
     }.Select(e => new FieldSelector()
     {
         FieldName = e.Key, Selector = e.Value
     }).ToList();
 }
示例#6
0
        public List<RequestMessage> NewTestRequestMessages(Site site)
        {
            var list = new List<RequestMessage>();
            list.Add(new RequestMessage()
            {
                Topic = TopicType.StaticHtml,
                Request = new Request()
                { SiteId = site.Id,  Url = "http://www.usashopcn.com/Product/Details/126334" }

            });

            list.Add(new RequestMessage()
            {
                Topic = TopicType.StaticHtml,
                Request = new Request()
                { SiteId = site.Id, Url = "http://www.usashopcn.com/Product/Details/127698" }

            });

            list.Add(new RequestMessage()
            {
                Topic = TopicType.StaticHtml,
                Request = new Request()
                { SiteId = site.Id, Url = "http://www.usashopcn.com/Product/Details/127593" }

            });

            list.Add(new RequestMessage()
            {
                Topic = TopicType.StaticHtml,
                Request = new Request()
                { SiteId = site.Id, Url = "http://www.usashopcn.com/Product/Details/126855" }

            });

            return list;
        }
示例#7
0
 public PlantCsdbListPageAnalyzer()
 {
     Site = SiteConfiguration.Instance.GetSite(SiteId);
 }
示例#8
0
        public void ConsumeTest()
        {
            var site = new Site() {Domain = "www.usashopcn.com"};

            var requestMessages = NewTestRequestMessages(site);
            var bloomFilter = new MemoryBloomFilter<string>(1000 * 10, 1000 * 10 * 20);
            var lineScheduler = new SequenceScheduler(bloomFilter);
            lineScheduler.Push(requestMessages);
            Trace.WriteLine("CurrentQueueCount:" + lineScheduler.CurrentQueueCount());

            var resultPipeline = new ResultPipeline();
            resultPipeline.RegisterModule(new TestPipelineMoudle());
            var pageAnalyzers = new List<KeyValuePair<string, Type>>();
            pageAnalyzers.Add(new KeyValuePair<string, Type>(site.Domain, typeof(TestPageAnalyzer)));
            var consumerBroker = new ConsumerBroker(2, lineScheduler, new List<IDownloader>() {new TestDownloader()}, new ResultPipeline(), pageAnalyzers);
            consumerBroker.Start();

            var timer = new Timer((state) =>
            {
                Trace.WriteLine("Timer结束:");
                Assert.IsTrue(lineScheduler.CurrentQueueCount() == 0, "添加消息失败");
            }, null, 20000, 0);
            
            //lineScheduler.Dispose();
        }
 public ZhaopinLagouListPageAnalyzer()
 {
     Site = SiteConfiguration.Instance.GetSite(SiteId);
 }