Пример #1
0
 public static ModelPageProcessor Create(Site site, params Type[] types)
 {
     ModelPageProcessor modelPageProcessor = new ModelPageProcessor(site);
     foreach (Type type in types)
     {
         modelPageProcessor.AddPageModel(type);
     }
     return modelPageProcessor;
 }
Пример #2
0
 public static void Run()
 {
     Site site = new Site();
     site.AddStartUrl("http://www.36kr.com/");
     Core.Spider thread = OoSpider.Create(site, new CollectorPageModelToDbPipeline(), typeof(Kr36NewsModel)).SetThreadNum(20);
     thread.Start();
     SpiderMonitor spiderMonitor = SpiderMonitor.Instance;
     spiderMonitor.Register(thread);
 }
Пример #3
0
        //[ExtractBy(Value = "//div[@class='BlogStat']/regex('\\d+-\\d+-\\d+\\s+\\d+:\\d+')")]
        //public DateTime Date { get; set; }
        public static void Run()
        {
            Site site = new Site
            {
                UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36"
            };
            site.AddStartUrl("http://my.oschina.net/flashsword/blog");
            site.SleepTime = 0;
            site.RetryTimes = 3;

            OoSpider.Create(site, new CollectorPageModelToDbPipeline(),typeof(OschinaBlog)).SetThreadNum(1).Run();
        }
Пример #4
0
        /// <summary>
        /// Create a spider
        /// </summary>
        /// <param name="identify"></param>
        /// <param name="site"></param>
        /// <param name="pageModelPipeline"></param>
        /// <param name="pageModels"></param>
        public OoSpider(string identify, Site site, IPageModelPipeline pageModelPipeline, params Type[] pageModels)
            : this(identify, ModelPageProcessor.Create(site, pageModels))
        {
            // Check ΪʲôҪ���һ��modelpipeline
            _modelPipeline = new ModelPipeline();

            AddPipeline(_modelPipeline);

            foreach (Type pageModel in pageModels)
            {
                if (pageModelPipeline != null)
                {
                    _modelPipeline.Put(pageModel, pageModelPipeline);
                }
                _pageModelTypes.Add(pageModel);
            }
        }
		public override void Build(Site site, dynamic obj)
		{
			foreach (JObject jobject in obj)
			{
				Dictionary<string, object> tmp = obj;

				if (Extras != null)
				{
					foreach (var extra in Extras)
					{
						tmp.Add(extra.Key, extra.Value);
					}
				}

				foreach (var node in jobject.Children())
				{
					tmp.Add("", node.ToString());
				}

				List<string> arguments = new List<string>();
				foreach (var column in Columns)
				{
					string value = tmp[column.Name]?.ToString();

					value = column.Formatters.Aggregate(value, (current, formatter) => formatter.Formate(current));
					arguments.Add(value);
				}

				foreach (var formate in FormateStrings)
				{
					string tmpUrl = string.Format(formate, arguments.Cast<object>().ToArray());
					site.AddStartRequest(new Request(tmpUrl, 0, tmp)
					{
						Method = Method,
						Origin = Origin,
						PostBody = GetPostBody(PostBody, tmp),
						Referer = Referer
					});
				}
			}
		}
		public override void Build(Site site, dynamic obj)
		{
			BuildQueryString();

			int interval = 0;
			StringBuilder formatBuilder = new StringBuilder();

			var datas = PrepareDatas();

			foreach (var data in datas)
			{
				if (interval == Interval)
				{
					foreach (var formate in FormateStrings)
					{
						string tmpUrl = string.Format(formate, formatBuilder.ToString(0, formatBuilder.Length - (string.IsNullOrEmpty(RowSeparator) ? 0 : RowSeparator.Length)));
						site.AddStartRequest(new Request(tmpUrl, 0, null)
						{
							Method = Method,
							Origin = Origin,
							Referer = Referer
						});
					}

					interval = 0;
					formatBuilder = new StringBuilder();
				}

				Dictionary<string, object> tmp = data;

				StringBuilder argumentsBuilder = new StringBuilder();
				foreach (var column in Columns)
				{
					string value = tmp[column.Name]?.ToString();

					value = column.Formatters.Aggregate(value, (current, formatter) => formatter.Formate(current));

					argumentsBuilder.Append(value).Append(ColumnSeparator);
				}
				formatBuilder.Append(argumentsBuilder.ToString(0, argumentsBuilder.Length - (string.IsNullOrEmpty(ColumnSeparator) ? 0 : ColumnSeparator.Length))).Append(RowSeparator);
				interval++;

				if (interval != 0)
				{
					foreach (var formate in FormateStrings)
					{
						string tmpUrl = string.Format(formate, formatBuilder.ToString(0, formatBuilder.Length - 1));
						site.AddStartRequest(new Request(tmpUrl, 0, null)
						{
							Method = Method,
							Origin = Origin,
							Referer = Referer
						});
					}
				}
			}
		}
		public override void Build(Site site, dynamic obj)
		{
			BuildQueryString();

			var datas = PrepareDatas();
			foreach (var data in datas)
			{
				for (int i = From; i <= To; i += Interval)
				{
					var arguments = PrepareArguments(data);
					arguments.Add(i.ToString());

					for (int j = PostFrom; j <= PostTo; j += PostInterval)
					{
						foreach (var formate in FormateStrings)
						{
							string tmpUrl = string.Format(formate, arguments.Cast<object>().ToArray());
							site.AddStartRequest(new Request(tmpUrl, 0, data)
							{
								Method = Method,
								Origin = Origin,
								PostBody = GetPostBody(PostBody, data, j),
								Referer = Referer
							});
						}
					}
				}
			}
		}
Пример #8
0
 public EntityGeneralSpider(Site site, string identify, string userid, string taskGroup, IPageProcessor pageProcessor, IScheduler scheduler)
     : base(site, identify, userid, taskGroup, pageProcessor, scheduler)
 {
 }
Пример #9
0
 public static void Run()
 {
     Site site = new Site {Encoding = Encoding.UTF8};
     site.AddStartUrl("http://www.oschina.net/question/1995445_2136783");
     OoSpider.Create(site, typeof(OschinaAnswer)).Run();
 }
Пример #10
0
 public CompositePageProcessor(Site site)
 {
     _site = site;
 }
Пример #11
0
 protected BaseModelSpider(Site site, string identify, string userid, string taskGroup, IPageProcessor pageProcessor, IScheduler scheduler)
     : base(site, identify, userid, taskGroup, pageProcessor, scheduler)
 {
 }
Пример #12
0
 private ModelPageProcessor(Site site)
 {
     Site = site;
 }
Пример #13
0
 public static OoSpider Create(Site site, IPageModelPipeline pageModelPipeline, params Type[] pageModels)
 {
     return new OoSpider(null, site, pageModelPipeline, pageModels);
 }
Пример #14
0
 public static OoSpider Create(string identify, Site site, params Type[] pageModels)
 {
     return new OoSpider(identify, site, null, pageModels);
 }
		public abstract void Build(Site site, dynamic obj);
Пример #16
0
 public EntityProcessor(Site site)
 {
     Site = site;
 }
		public override void Build(Site site, dynamic obj)
		{
			Dictionary<string, object> data = new Dictionary<string, object>();

			if (Extras != null)
			{
				foreach (var extra in Extras)
				{
					data.Add(extra.Key, extra.Value);
				}
			}

			for (int i = From; i <= To; i += Interval)
			{
				site.AddStartRequest(new Request(string.Format(FormateString, i), 1, data)
				{
					PostBody = PostBody,
					Origin = Origin,
					Method = Method,
					Referer = Referer
				});
			}
		}
 public ConfigurablePageProcessor(Site site, List<ExtractRule> extractRules)
 {
     Site = site;
     _extractRules = extractRules;
 }
Пример #19
0
 public CompositePageProcessor SetSite(Site site)
 {
     _site = site;
     return this;
 }
Пример #20
0
 public static void Run()
 {
     var site = new Site();
     site.AddStartUrl("http://flashsword20.iteye.com/blog");
     OoSpider.Create(site, typeof(IteyeBlog)).Run();
 }