public override bool CanStop(Page page) { int totalPage = -2000; if (TotalPageSelector != null) { string totalStr = page.Selectable.Select(SelectorUtil.GetSelector(TotalPageSelector)).GetValue(); if (!string.IsNullOrEmpty(totalStr)) { totalPage = int.Parse(totalStr); } } int currentPage = -1000; if (CurrenctPageSelector != null) { string currentStr = page.Selectable.Select(SelectorUtil.GetSelector(CurrenctPageSelector)).GetValue(); if (!string.IsNullOrEmpty(currentStr)) { currentPage = int.Parse(currentStr); } } if (currentPage == totalPage) { return(true); } return(false); }
public override bool CanStop(Page page) { var current = page.Selectable.SelectList(SelectorUtil.GetSelector(CurrenctPageSelector)).GetValues(); if (current == null) { return(true); } List <string> timeStrings = new List <string>(); foreach (var c in current) { var s = c; if (CurrenctPageFormatters != null) { foreach (var formatter in CurrenctPageFormatters) { s = formatter.Formate(s); } } timeStrings.Add(s); } foreach (var c in timeStrings) { var dt = DateTime.Parse(c.ToString()); if (IsBefore) { foreach (var stopper in Stoppers) { var stopDate = DateTime.Parse(stopper); if (dt < stopDate) { return(true); } } } else { foreach (var stopper in Stoppers) { var stopDate = DateTime.Parse(stopper); if (dt > stopDate) { return(true); } } } } return(false); }
internal Model.TargetUrlExtractor GetTargetUrlExtractInfo() { var t = new Model.TargetUrlExtractor { Formatters = Formatters, Region = SelectorUtil.GetSelector(Region) }; foreach (var p in Patterns) { if (!string.IsNullOrEmpty(p?.Trim())) { t.Patterns.Add(new Regex(p)); } } return(t); }
public override bool CanStop(Page page) { var current = page.Selectable.SelectList(SelectorUtil.GetSelector(CurrenctPageSelector)).GetValues(); if (current == null) { return(true); } foreach (var c in (List <string>)current) { var dt = DateTime.Parse(c.ToString()); if (IsBefore) { foreach (var stopper in Stoppers) { var stopDate = DateTime.Parse(stopper); if (dt < stopDate) { return(true); } } } else { foreach (var stopper in Stoppers) { var stopDate = DateTime.Parse(stopper); if (dt > stopDate) { return(true); } } } } return(false); }
public dynamic Process(Page page) { if (_enviromentValues != null && _enviromentValues.Count > 0) { foreach (var enviromentValue in _enviromentValues) { string name = enviromentValue.Name; var value = page.Selectable.Select(SelectorUtil.GetSelector(enviromentValue.Selector)).GetValue(); page.Request.PutExtra(name, value); } } bool isMulti = false; ISelector selector = SelectorUtil.GetSelector(_entityDefine.Selector); if (selector == null) { isMulti = false; } else { isMulti = _entityDefine.Multi; } if (isMulti) { var list = page.Selectable.SelectList(selector).Nodes(); if (list == null || list.Count == 0) { return(null); } var countToken = _entityDefine.Limit; if (countToken != null) { list = list.Take(countToken.Value).ToList(); } List <JObject> result = new List <JObject>(); int index = 0; foreach (var item in list) { try { JObject obj = ProcessSingle(page, item, _entityDefine, index); if (obj != null) { result.Add(obj); } index++; } catch (Exception e) { } } return(result); } else { ISelectable select; if (selector == null) { select = page.Selectable; } else { select = page.Selectable.Select(selector); if (select == null) { return(null); } } return(ProcessSingle(page, select, _entityDefine, 0)); } }
private dynamic ExtractField(ISelectable item, Page page, DataToken field, int index) { ISelector selector = SelectorUtil.GetSelector(field.Selector); if (selector == null) { return(null); } var f = field as Field; List <Formatter.Formatter> formatters = GenerateFormatter(f?.Formatters); bool isEntity = field is Entity; if (!isEntity) { string tmpValue; if (selector is EnviromentSelector) { var enviromentSelector = selector as EnviromentSelector; tmpValue = GetEnviromentValue(enviromentSelector.Field, page, index); foreach (var formatter in formatters) { tmpValue = formatter.Formate(tmpValue); } return(tmpValue); } else { if (field.Multi) { var propertyValues = item.SelectList(selector).Nodes(); if (((Field)field).Option == PropertyExtractBy.ValueOption.Count) { var tempValue = propertyValues != null?propertyValues.Count.ToString() : "-1"; return(tempValue); } else { List <string> results = new List <string>(); foreach (var propertyValue in propertyValues) { string tmp = propertyValue.GetValue(((Field)field).Option == PropertyExtractBy.ValueOption.PlainText); foreach (var formatter in formatters) { tmp = formatter.Formate(tmp); } results.Add(tmp); } return(new JArray(results)); } } else { tmpValue = item.Select(selector)?.GetValue(((Field)field).Option == PropertyExtractBy.ValueOption.PlainText); if (((Field)field).Option == PropertyExtractBy.ValueOption.Count) { return(tmpValue == null ? 0 : 1); } else { tmpValue = formatters.Aggregate(tmpValue, (current, formatter) => formatter.Formate(current)); return(tmpValue); } } } } else { if (field.Multi) { var propertyValues = item.SelectList(selector).Nodes(); JArray objs = new JArray(); var selectables = item.SelectList(selector).Nodes(); foreach (var selectable in selectables) { JObject obj = new JObject(); foreach (var child in ((Entity)field).Fields) { obj.Add(child.Name, ExtractField(selectable, page, child, 0)); } objs.Add(obj); } return(objs); } else { JObject obj = new JObject(); var selectable = item.Select(selector); foreach (var child in ((Entity)field).Fields) { obj.Add(child.Name, ExtractField(selectable, page, field, 0)); } return(obj); } } }
private dynamic ExtractField(ISelectable item, Page page, Field field, int index) { ISelector selector = SelectorUtil.GetSelector(field.Selector); if (selector == null) { return(null); } List <Formatter.Formatter> formatters = GenerateFormatter(field.Formatters); bool isEntity = field.Fields != null && field.Fields.Count > 0; if (!isEntity) { string tmpValue; if (selector is EnviromentSelector) { var enviromentSelector = selector as EnviromentSelector; tmpValue = GetEnviromentValue(enviromentSelector.Field, page, index); foreach (var formatter in formatters) { tmpValue = formatter.Formate(tmpValue); } return(tmpValue); } else { if (field.Multi) { var propertyValues = item.SelectList(selector).Nodes(); if (field.Option == PropertyExtractBy.ValueOption.Count) { var tempValue = propertyValues != null?propertyValues.Count.ToString() : "-1"; return(tempValue); } else { List <string> results = new List <string>(); foreach (var propertyValue in propertyValues) { string tmp = propertyValue.GetValue(field.Option == PropertyExtractBy.ValueOption.PlainText); foreach (var formatter in formatters) { tmp = formatter.Formate(tmp); } results.Add(tmp); } return(new JArray(results)); } } else { tmpValue = item.Select(selector)?.GetValue(field.Option == PropertyExtractBy.ValueOption.PlainText); if (field.Option == PropertyExtractBy.ValueOption.Count) { return(tmpValue == null ? 0 : 1); } else { tmpValue = formatters.Aggregate(tmpValue, (current, formatter) => formatter.Formate(current)); return(tmpValue); } } } } else { JObject dataObject = new JObject(); foreach (var child in field.Fields) { if (child.Multi) { var childItems = item.SelectList(SelectorUtil.GetSelector(child.Selector)).Nodes(); foreach (var childItem in childItems) { dataObject.Add(child.Name, ExtractField(childItem, page, child, childItems.IndexOf(childItem))); } } else { var childItem = item.Select(SelectorUtil.GetSelector(child.Selector)); dataObject.Add(child.Name, ExtractField(childItem, page, child, 0)); } } return(dataObject); } }
public dynamic Process(Page page) { if (_enviromentValues != null && _enviromentValues.Count > 0) { foreach (var enviromentValue in _enviromentValues) { string name = enviromentValue.Name; var value = page.Selectable.Select(SelectorUtil.GetSelector(enviromentValue.Selector)).GetValue(); page.Request.PutExtra(name, value); } } bool isMulti = _entityDefine.SelectToken("$.Multi").ToObject <bool>(); ISelector selector = SelectorUtil.GetSelector(_entityDefine.SelectToken("$.Selector").ToObject <Selector>()); if (isMulti) { if (selector == null) { throw new SpiderExceptoin("Selector can't be null when set isMulti true."); } var list = page.Selectable.SelectList(selector).Nodes(); if (list == null || list.Count == 0) { return(null); } var countToken = _entityDefine.SelectToken("$.Count"); if (countToken != null) { int count = countToken.ToObject <int>(); list = list.Take(count).ToList(); } List <JObject> result = new List <JObject>(); int index = 0; foreach (var item in list) { JObject obj = ProcessSingle(page, item, _entityDefine, index); if (obj != null) { result.Add(obj); } index++; } return(result); } else { ISelectable select; if (selector == null) { select = page.Selectable; } else { select = page.Selectable.Select(selector); if (select == null) { return(null); } } return(ProcessSingle(page, select, _entityDefine, 0)); } }
private JObject ProcessSingle(Page page, ISelectable item, JToken entityDefine, int index) { JObject dataItem = new JObject(); foreach (var field in entityDefine.SelectTokens("$.Fields[*]")) { ISelector selector = SelectorUtil.GetSelector(field.SelectToken("$.Selector").ToObject <Selector>()); if (selector == null) { continue; } var datatype = field.SelectToken("$.DataType"); bool isEntity = VerifyIfEntity(datatype); var multiToken = field.SelectToken("$.Multi"); bool isMulti = multiToken?.ToObject <bool>() ?? false; var optionToken = field.SelectToken("$.Option"); var option = optionToken?.ToObject <PropertyExtractBy.ValueOption>() ?? PropertyExtractBy.ValueOption.None; string propertyName = field.SelectToken("$.Name").ToString(); List <Formatter.Formatter> formatters = GenerateFormatter(field.SelectTokens("$.Formatters[*]")); if (!isEntity) { string tmpValue; if (selector is EnviromentSelector) { var enviromentSelector = selector as EnviromentSelector; tmpValue = GetEnviromentValue(enviromentSelector.Field, page, index); foreach (var formatter in formatters) { tmpValue = formatter.Formate(tmpValue); } dataItem.Add(propertyName, tmpValue); } else { if (isMulti) { var propertyValues = item.SelectList(selector).GetValue(option == PropertyExtractBy.ValueOption.PlainText); if (option == PropertyExtractBy.ValueOption.Count) { var tempValue = propertyValues != null?propertyValues.Count.ToString() : "ERROR"; if (tempValue == "ERROR") { } dataItem.Add(propertyName, tempValue); } else { var countToken = _entityDefine.SelectToken("$.Count"); if (countToken != null) { int count = countToken.ToObject <int>(); propertyValues = propertyValues.Take(count).ToList(); } List <string> results = new List <string>(); foreach (var propertyValue in propertyValues) { string tmp = propertyValue; foreach (var formatter in formatters) { tmp = formatter.Formate(tmp); } results.Add(tmp); } dataItem.Add(propertyName, new JArray(results)); } } else { tmpValue = item.Select(selector)?.GetValue(option == PropertyExtractBy.ValueOption.PlainText); if (option == PropertyExtractBy.ValueOption.Count) { dataItem.Add(propertyName, tmpValue == null ? 0 : 1); } else { tmpValue = formatters.Aggregate(tmpValue, (current, formatter) => formatter.Formate(current)); dataItem.Add(propertyName, tmpValue); } } } } else { if (isMulti) { var propertyValues = item.SelectList(selector).Nodes(); var countToken = _entityDefine.SelectToken("$.Count"); if (countToken != null) { int count = countToken.ToObject <int>(); propertyValues = propertyValues.Take(count).ToList(); } List <JObject> result = new List <JObject>(); int index1 = 0; foreach (var entity in propertyValues) { JObject obj = ProcessSingle(page, entity, datatype, index1); if (obj != null) { result.Add(obj); } index1++; } dataItem.Add(propertyName, new JArray(result)); } else { var select = item.Select(selector); if (select == null) { return(null); } var propertyValue = ProcessSingle(page, select, datatype, 0); dataItem.Add(propertyName, new JObject(propertyValue)); } } } var stoppingJobject = entityDefine.SelectToken("$.Stopping"); var stopping = stoppingJobject?.ToObject <Stopping>(); if (stopping != null) { var field = entityDefine.SelectToken($"$.Fields[?(@.Name == '{stopping.PropertyName}')]"); var datatype = field.SelectToken("$.DataType"); bool isEntity = VerifyIfEntity(datatype); if (isEntity) { throw new SpiderExceptoin("Can't compare with object."); } stopping.DataType = datatype.ToString().ToLower(); string value = dataItem.SelectToken($"$.{stopping.PropertyName}")?.ToString(); if (string.IsNullOrEmpty(value)) { page.MissTargetUrls = true; } else { if (stopping.NeedStop(value)) { page.MissTargetUrls = true; } } } return(dataItem); }