/// <summary> /// 把BaseSelector转换成真正的查询器 /// </summary> /// <param name="selector">BaseSelector</param> /// <returns>查询器</returns> public static ISelector ToSelector(this Selector selector) { if (selector != null) { string expression = selector.Expression; switch (selector.Type) { case SelectorType.Css: { NotNullExpression(selector); return(Selectors.Css(expression)); } case SelectorType.Enviroment: { return(Selectors.Enviroment(expression)); } case SelectorType.JsonPath: { NotNullExpression(selector); return(Selectors.JsonPath(expression)); } case SelectorType.Regex: { NotNullExpression(selector); if (string.IsNullOrEmpty(selector.Arguments)) { return(Selectors.Regex(expression)); } else { int group; if (int.TryParse(selector.Arguments, out group)) { return(Selectors.Regex(expression, group)); } throw new SpiderException("Regex argument should be a number set to group: " + selector); } } case SelectorType.XPath: { NotNullExpression(selector); return(Selectors.XPath(expression)); } default: { throw new SpiderException($"Selector {selector} unsupoort"); } } } else { return(null); } }
protected override void Handle(Page page) { // 利用 Selectable 查询并构造自己想要的数据对象 var formElements = page.Selectable.SelectList(Selectors.JsonPath("$.[*]")).Nodes(); List <AirLineModel> results = new List <AirLineModel>(); foreach (var form in formElements) { var info = new AirLineModel(); info.name = form.Select(Selectors.JsonPath("$.port_name_cn")).GetValue(); info.code = form.Select(Selectors.JsonPath("$.port_code")).GetValue(); if (form.Select(Selectors.JsonPath("$.is_default")).GetValue() != null && form.Select(Selectors.JsonPath("$.is_default")).GetValue() != "") { info.type = "1"; } else { info.type = "2"; } info.bpid = "3"; results.Add(info); if (info.type == "1") { var url = $"http://www.51eumex.com/port/search_dest_port.json?startPortCode=" + info.code + "&token=b4b147bc522828731f1a016bfa72c073-1504522840550-0-364629a1e95e9f9450ab945ae3adeeb0-35ebd44cfa19c0450152121f332cc4fc-0-44cbce77ea242ed3b5ba50d4a78f31a1-0"; page.AddTargetRequest(url); } } // 以自定义KEY存入page对象中供Pipeline调用 page.AddResultItem("spiderproject.EumexAirLine+AirLineModel", results); }
protected override void Handle(Page page) { // 利用 Selectable 查询并构造自己想要的数据对象 var formElements = page.Selectable.SelectList(Selectors.JsonPath("$.[*]")).Nodes(); List <AirLineModel> results = new List <AirLineModel>(); foreach (var form in formElements) { var info = new AirLineModel(); info.name = ""; if (form.Select(Selectors.JsonPath("$.pol")).GetValue() != null && form.Select(Selectors.JsonPath("$.pol")).GetValue() != "") { info.code = form.Select(Selectors.JsonPath("$.pol")).GetValue(); info.type = "1"; } else if (form.Select(Selectors.JsonPath("$.gk")).GetValue() != null && form.Select(Selectors.JsonPath("$.gk")).GetValue() != "") { info.code = form.Select(Selectors.JsonPath("$.gk")).GetValue(); info.type = "2"; } info.bpid = "2"; results.Add(info); if (info.type == "1") { page.AddTargetRequest(string.Format("https://ccliquote.lflogistics.net/ajaxHandler.asp?act=gk&pol={0}", info.code)); } } // 以自定义KEY存入page对象中供Pipeline调用 page.AddResultItem("spiderproject.CcliquoteAirLine+AirLineModel", results); }
public void Handle(ref Page page) { var pager = page.Selectable().Select(Selectors.JsonPath("$.mods.pager.status")).GetValue(); if (pager != "show") { page.SkipTargetRequests = true; } }
public override void Handle(ref Page page, IDownloader downloader, ISpider spider) { var pager = page.Selectable.Select(Selectors.JsonPath("$.mods.pager.status")).GetValue(); if (pager != "show") { page.SkipTargetUrls = true; } }
public void SelectLinks() { var json = File.ReadAllText("test.json"); var selectable = new JsonSelectable(json); var result = selectable.SelectList(Selectors.JsonPath("$.[*].link")).Select(x => x.Value).ToList(); Assert.Equal(8, result.Count); Assert.Equal("http://viettelglobal.vn/", result[0]); }
public static ISelector Parse(BaseSelector selector) { if (string.IsNullOrEmpty(selector?.Expression)) { return(null); } string expression = selector.Expression; switch (selector.Type) { case SelectorType.Css: { return(Selectors.Css(expression)); } case SelectorType.Enviroment: { return(Selectors.Enviroment(expression)); } case SelectorType.JsonPath: { return(Selectors.JsonPath(expression)); } case SelectorType.Regex: { if (string.IsNullOrEmpty(selector.Argument)) { return(Selectors.Regex(expression)); } else { int group; if (int.TryParse(selector.Argument, out group)) { return(Selectors.Regex(expression, group)); } throw new SpiderException("Regex argument should be a number set to group: " + selector); } } case SelectorType.XPath: { return(Selectors.XPath(expression)); } } throw new SpiderException("Not support selector: " + selector); }
/// <summary> /// 把BaseSelector转换成真正的查询器 /// </summary> /// <param name="selector">BaseSelector</param> /// <returns>查询器</returns> public static ISelector ToSelector(this Attribute.Selector selector) { if (selector != null) { string expression = selector.Expression; switch (selector.Type) { case SelectorType.Css: { NotNullExpression(selector); return(Selectors.Css(expression)); } case SelectorType.JsonPath: { NotNullExpression(selector); return(Selectors.JsonPath(expression)); } case SelectorType.Regex: { NotNullExpression(selector); if (string.IsNullOrEmpty(selector.Arguments)) { return(Selectors.Regex(expression)); } if (int.TryParse(selector.Arguments, out var group)) { return(Selectors.Regex(expression, @group)); } throw new ArgumentException($"Regex argument should be a number set to group: {selector}"); } case SelectorType.XPath: { NotNullExpression(selector); return(Selectors.XPath(expression)); } default: { throw new NotSupportedException($"{selector} unsupported"); } } } return(null); }
/// <summary> /// 把 BaseSelector 转换成真正的查询器 /// </summary> /// <param name="selector">BaseSelector</param> /// <returns>查询器</returns> public static ISelector ToSelector(this Selector selector) { if (selector != null) { var expression = selector.Expression; switch (selector.Type) { case SelectorType.Css: { NotNullExpression(selector); return(Selectors.Css(expression)); } case SelectorType.JsonPath: { NotNullExpression(selector); return(Selectors.JsonPath(expression)); } case SelectorType.Regex: { NotNullExpression(selector); if (string.IsNullOrEmpty(selector.Arguments)) { return(Selectors.Regex(expression)); } var arguments = selector.Arguments.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries); var options = (RegexOptions)Enum.Parse(typeof(RegexOptions), arguments[0]); var replacement = arguments[1]; return(Selectors.Regex(expression, options, replacement)); } case SelectorType.XPath: { NotNullExpression(selector); return(Selectors.XPath(expression)); } default: { throw new NotSupportedException($"{selector} unsupported"); } } } return(null); }
public static ISelector Parse(Selector selector) { if (string.IsNullOrEmpty(selector?.Expression)) { return(null); } string expression = selector.Expression; switch (selector.Type) { case SelectorType.Css: { return(Selectors.Css(expression)); } case SelectorType.Enviroment: { return(Selectors.Enviroment(expression)); } case SelectorType.JsonPath: { return(Selectors.JsonPath(expression)); } case SelectorType.Regex: { return(Selectors.Regex(expression)); } case SelectorType.XPath: { return(Selectors.XPath(expression)); } } throw new SpiderException("Not support selector: " + selector); }
/// <summary> /// 构造方法 /// </summary> public DataParser() { Model = new Model <T>(); var patterns = new HashSet <string>(); if (Model.FollowRequestSelectors != null) { foreach (var followSelector in Model.FollowRequestSelectors) { switch (followSelector.SelectorType) { case SelectorType.Css: { foreach (var expression in followSelector.Expressions) { AddFollowRequestQuerier(Selectors.Css(expression)); } break; } case SelectorType.Regex: { foreach (var expression in followSelector.Expressions) { AddFollowRequestQuerier(Selectors.Regex(expression)); } break; } case SelectorType.XPath: { foreach (var expression in followSelector.Expressions) { AddFollowRequestQuerier(Selectors.XPath(expression)); } break; } case SelectorType.Environment: { Logger.LogWarning("SelectorType of follow selector is not supported"); break; } case SelectorType.JsonPath: { foreach (var expression in followSelector.Expressions) { AddFollowRequestQuerier(Selectors.JsonPath(expression)); } break; } } foreach (var pattern in followSelector.Patterns) { patterns.Add(pattern); } } } foreach (var pattern in patterns) { AddRequiredValidator(request => Regex.IsMatch(request.RequestUri.ToString(), pattern)); } }