public static ModelPageProcessor Create(Site site, params Type[] types) { ModelPageProcessor modelPageProcessor = new ModelPageProcessor(site); foreach (Type type in types) { modelPageProcessor.AddPageModel(type); } return modelPageProcessor; }
public static void Run() { Site site = new Site(); site.AddStartUrl("http://www.36kr.com/"); Core.Spider thread = OoSpider.Create(site, new CollectorPageModelToDbPipeline(), typeof(Kr36NewsModel)).SetThreadNum(20); thread.Start(); SpiderMonitor spiderMonitor = SpiderMonitor.Instance; spiderMonitor.Register(thread); }
//[ExtractBy(Value = "//div[@class='BlogStat']/regex('\\d+-\\d+-\\d+\\s+\\d+:\\d+')")] //public DateTime Date { get; set; } public static void Run() { Site site = new Site { UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36" }; site.AddStartUrl("http://my.oschina.net/flashsword/blog"); site.SleepTime = 0; site.RetryTimes = 3; OoSpider.Create(site, new CollectorPageModelToDbPipeline(),typeof(OschinaBlog)).SetThreadNum(1).Run(); }
/// <summary> /// Create a spider /// </summary> /// <param name="identify"></param> /// <param name="site"></param> /// <param name="pageModelPipeline"></param> /// <param name="pageModels"></param> public OoSpider(string identify, Site site, IPageModelPipeline pageModelPipeline, params Type[] pageModels) : this(identify, ModelPageProcessor.Create(site, pageModels)) { // Check ΪʲôҪ���һ��modelpipeline _modelPipeline = new ModelPipeline(); AddPipeline(_modelPipeline); foreach (Type pageModel in pageModels) { if (pageModelPipeline != null) { _modelPipeline.Put(pageModel, pageModelPipeline); } _pageModelTypes.Add(pageModel); } }
public override void Build(Site site, dynamic obj) { foreach (JObject jobject in obj) { Dictionary<string, object> tmp = obj; if (Extras != null) { foreach (var extra in Extras) { tmp.Add(extra.Key, extra.Value); } } foreach (var node in jobject.Children()) { tmp.Add("", node.ToString()); } List<string> arguments = new List<string>(); foreach (var column in Columns) { string value = tmp[column.Name]?.ToString(); value = column.Formatters.Aggregate(value, (current, formatter) => formatter.Formate(current)); arguments.Add(value); } foreach (var formate in FormateStrings) { string tmpUrl = string.Format(formate, arguments.Cast<object>().ToArray()); site.AddStartRequest(new Request(tmpUrl, 0, tmp) { Method = Method, Origin = Origin, PostBody = GetPostBody(PostBody, tmp), Referer = Referer }); } } }
public override void Build(Site site, dynamic obj) { BuildQueryString(); int interval = 0; StringBuilder formatBuilder = new StringBuilder(); var datas = PrepareDatas(); foreach (var data in datas) { if (interval == Interval) { foreach (var formate in FormateStrings) { string tmpUrl = string.Format(formate, formatBuilder.ToString(0, formatBuilder.Length - (string.IsNullOrEmpty(RowSeparator) ? 0 : RowSeparator.Length))); site.AddStartRequest(new Request(tmpUrl, 0, null) { Method = Method, Origin = Origin, Referer = Referer }); } interval = 0; formatBuilder = new StringBuilder(); } Dictionary<string, object> tmp = data; StringBuilder argumentsBuilder = new StringBuilder(); foreach (var column in Columns) { string value = tmp[column.Name]?.ToString(); value = column.Formatters.Aggregate(value, (current, formatter) => formatter.Formate(current)); argumentsBuilder.Append(value).Append(ColumnSeparator); } formatBuilder.Append(argumentsBuilder.ToString(0, argumentsBuilder.Length - (string.IsNullOrEmpty(ColumnSeparator) ? 0 : ColumnSeparator.Length))).Append(RowSeparator); interval++; if (interval != 0) { foreach (var formate in FormateStrings) { string tmpUrl = string.Format(formate, formatBuilder.ToString(0, formatBuilder.Length - 1)); site.AddStartRequest(new Request(tmpUrl, 0, null) { Method = Method, Origin = Origin, Referer = Referer }); } } } }
public override void Build(Site site, dynamic obj) { BuildQueryString(); var datas = PrepareDatas(); foreach (var data in datas) { for (int i = From; i <= To; i += Interval) { var arguments = PrepareArguments(data); arguments.Add(i.ToString()); for (int j = PostFrom; j <= PostTo; j += PostInterval) { foreach (var formate in FormateStrings) { string tmpUrl = string.Format(formate, arguments.Cast<object>().ToArray()); site.AddStartRequest(new Request(tmpUrl, 0, data) { Method = Method, Origin = Origin, PostBody = GetPostBody(PostBody, data, j), Referer = Referer }); } } } } }
public EntityGeneralSpider(Site site, string identify, string userid, string taskGroup, IPageProcessor pageProcessor, IScheduler scheduler) : base(site, identify, userid, taskGroup, pageProcessor, scheduler) { }
public static void Run() { Site site = new Site {Encoding = Encoding.UTF8}; site.AddStartUrl("http://www.oschina.net/question/1995445_2136783"); OoSpider.Create(site, typeof(OschinaAnswer)).Run(); }
public CompositePageProcessor(Site site) { _site = site; }
protected BaseModelSpider(Site site, string identify, string userid, string taskGroup, IPageProcessor pageProcessor, IScheduler scheduler) : base(site, identify, userid, taskGroup, pageProcessor, scheduler) { }
private ModelPageProcessor(Site site) { Site = site; }
public static OoSpider Create(Site site, IPageModelPipeline pageModelPipeline, params Type[] pageModels) { return new OoSpider(null, site, pageModelPipeline, pageModels); }
public static OoSpider Create(string identify, Site site, params Type[] pageModels) { return new OoSpider(identify, site, null, pageModels); }
public abstract void Build(Site site, dynamic obj);
public EntityProcessor(Site site) { Site = site; }
public override void Build(Site site, dynamic obj) { Dictionary<string, object> data = new Dictionary<string, object>(); if (Extras != null) { foreach (var extra in Extras) { data.Add(extra.Key, extra.Value); } } for (int i = From; i <= To; i += Interval) { site.AddStartRequest(new Request(string.Format(FormateString, i), 1, data) { PostBody = PostBody, Origin = Origin, Method = Method, Referer = Referer }); } }
public ConfigurablePageProcessor(Site site, List<ExtractRule> extractRules) { Site = site; _extractRules = extractRules; }
public CompositePageProcessor SetSite(Site site) { _site = site; return this; }
public static void Run() { var site = new Site(); site.AddStartUrl("http://flashsword20.iteye.com/blog"); OoSpider.Create(site, typeof(IteyeBlog)).Run(); }