예제 #1
0
        /// <summary>
        /// 把BaseSelector转换成真正的查询器
        /// </summary>
        /// <param name="selector">BaseSelector</param>
        /// <returns>查询器</returns>
        public static ISelector ToSelector(this Selector selector)
        {
            if (selector != null)
            {
                string expression = selector.Expression;

                switch (selector.Type)
                {
                case SelectorType.Css:
                {
                    NotNullExpression(selector);
                    return(Selectors.Css(expression));
                }

                case SelectorType.Enviroment:
                {
                    return(Selectors.Enviroment(expression));
                }

                case SelectorType.JsonPath:
                {
                    NotNullExpression(selector);
                    return(Selectors.JsonPath(expression));
                }

                case SelectorType.Regex:
                {
                    NotNullExpression(selector);
                    if (string.IsNullOrEmpty(selector.Arguments))
                    {
                        return(Selectors.Regex(expression));
                    }
                    else
                    {
                        int group;
                        if (int.TryParse(selector.Arguments, out group))
                        {
                            return(Selectors.Regex(expression, group));
                        }
                        throw new SpiderException("Regex argument should be a number set to group: " + selector);
                    }
                }

                case SelectorType.XPath:
                {
                    NotNullExpression(selector);
                    return(Selectors.XPath(expression));
                }

                default:
                {
                    throw new SpiderException($"Selector {selector} unsupoort");
                }
                }
            }
            else
            {
                return(null);
            }
        }
예제 #2
0
            protected override void Handle(Page page)
            {
                // 利用 Selectable 查询并构造自己想要的数据对象
                var formElements            = page.Selectable.SelectList(Selectors.JsonPath("$.[*]")).Nodes();
                List <AirLineModel> results = new List <AirLineModel>();

                foreach (var form in formElements)
                {
                    var info = new AirLineModel();
                    info.name = form.Select(Selectors.JsonPath("$.port_name_cn")).GetValue();
                    info.code = form.Select(Selectors.JsonPath("$.port_code")).GetValue();
                    if (form.Select(Selectors.JsonPath("$.is_default")).GetValue() != null && form.Select(Selectors.JsonPath("$.is_default")).GetValue() != "")
                    {
                        info.type = "1";
                    }
                    else
                    {
                        info.type = "2";
                    }
                    info.bpid = "3";
                    results.Add(info);
                    if (info.type == "1")
                    {
                        var url = $"http://www.51eumex.com/port/search_dest_port.json?startPortCode=" + info.code + "&token=b4b147bc522828731f1a016bfa72c073-1504522840550-0-364629a1e95e9f9450ab945ae3adeeb0-35ebd44cfa19c0450152121f332cc4fc-0-44cbce77ea242ed3b5ba50d4a78f31a1-0";
                        page.AddTargetRequest(url);
                    }
                }
                // 以自定义KEY存入page对象中供Pipeline调用
                page.AddResultItem("spiderproject.EumexAirLine+AirLineModel", results);
            }
            protected override void Handle(Page page)
            {
                // 利用 Selectable 查询并构造自己想要的数据对象
                var formElements            = page.Selectable.SelectList(Selectors.JsonPath("$.[*]")).Nodes();
                List <AirLineModel> results = new List <AirLineModel>();

                foreach (var form in formElements)
                {
                    var info = new AirLineModel();
                    info.name = "";
                    if (form.Select(Selectors.JsonPath("$.pol")).GetValue() != null && form.Select(Selectors.JsonPath("$.pol")).GetValue() != "")
                    {
                        info.code = form.Select(Selectors.JsonPath("$.pol")).GetValue();
                        info.type = "1";
                    }
                    else if (form.Select(Selectors.JsonPath("$.gk")).GetValue() != null && form.Select(Selectors.JsonPath("$.gk")).GetValue() != "")
                    {
                        info.code = form.Select(Selectors.JsonPath("$.gk")).GetValue();
                        info.type = "2";
                    }
                    info.bpid = "2";
                    results.Add(info);
                    if (info.type == "1")
                    {
                        page.AddTargetRequest(string.Format("https://ccliquote.lflogistics.net/ajaxHandler.asp?act=gk&pol={0}", info.code));
                    }
                }
                // 以自定义KEY存入page对象中供Pipeline调用
                page.AddResultItem("spiderproject.CcliquoteAirLine+AirLineModel", results);
            }
예제 #4
0
                public void Handle(ref Page page)
                {
                    var pager = page.Selectable().Select(Selectors.JsonPath("$.mods.pager.status")).GetValue();

                    if (pager != "show")
                    {
                        page.SkipTargetRequests = true;
                    }
                }
예제 #5
0
                public override void Handle(ref Page page, IDownloader downloader, ISpider spider)
                {
                    var pager = page.Selectable.Select(Selectors.JsonPath("$.mods.pager.status")).GetValue();

                    if (pager != "show")
                    {
                        page.SkipTargetUrls = true;
                    }
                }
예제 #6
0
        public void SelectLinks()
        {
            var json       = File.ReadAllText("test.json");
            var selectable = new JsonSelectable(json);
            var result     = selectable.SelectList(Selectors.JsonPath("$.[*].link")).Select(x => x.Value).ToList();

            Assert.Equal(8, result.Count);
            Assert.Equal("http://viettelglobal.vn/", result[0]);
        }
예제 #7
0
        public static ISelector Parse(BaseSelector selector)
        {
            if (string.IsNullOrEmpty(selector?.Expression))
            {
                return(null);
            }

            string expression = selector.Expression;

            switch (selector.Type)
            {
            case SelectorType.Css:
            {
                return(Selectors.Css(expression));
            }

            case SelectorType.Enviroment:
            {
                return(Selectors.Enviroment(expression));
            }

            case SelectorType.JsonPath:
            {
                return(Selectors.JsonPath(expression));
            }

            case SelectorType.Regex:
            {
                if (string.IsNullOrEmpty(selector.Argument))
                {
                    return(Selectors.Regex(expression));
                }
                else
                {
                    int group;
                    if (int.TryParse(selector.Argument, out group))
                    {
                        return(Selectors.Regex(expression, group));
                    }
                    throw new SpiderException("Regex argument should be a number set to group: " + selector);
                }
            }

            case SelectorType.XPath:
            {
                return(Selectors.XPath(expression));
            }
            }
            throw new SpiderException("Not support selector: " + selector);
        }
예제 #8
0
        /// <summary>
        /// 把BaseSelector转换成真正的查询器
        /// </summary>
        /// <param name="selector">BaseSelector</param>
        /// <returns>查询器</returns>
        public static ISelector ToSelector(this Attribute.Selector selector)
        {
            if (selector != null)
            {
                string expression = selector.Expression;

                switch (selector.Type)
                {
                case SelectorType.Css:
                {
                    NotNullExpression(selector);
                    return(Selectors.Css(expression));
                }

                case SelectorType.JsonPath:
                {
                    NotNullExpression(selector);
                    return(Selectors.JsonPath(expression));
                }

                case SelectorType.Regex:
                {
                    NotNullExpression(selector);
                    if (string.IsNullOrEmpty(selector.Arguments))
                    {
                        return(Selectors.Regex(expression));
                    }

                    if (int.TryParse(selector.Arguments, out var group))
                    {
                        return(Selectors.Regex(expression, @group));
                    }
                    throw new ArgumentException($"Regex argument should be a number set to group: {selector}");
                }

                case SelectorType.XPath:
                {
                    NotNullExpression(selector);
                    return(Selectors.XPath(expression));
                }

                default:
                {
                    throw new NotSupportedException($"{selector} unsupported");
                }
                }
            }

            return(null);
        }
예제 #9
0
        /// <summary>
        /// 把 BaseSelector 转换成真正的查询器
        /// </summary>
        /// <param name="selector">BaseSelector</param>
        /// <returns>查询器</returns>
        public static ISelector ToSelector(this Selector selector)
        {
            if (selector != null)
            {
                var expression = selector.Expression;

                switch (selector.Type)
                {
                case SelectorType.Css:
                {
                    NotNullExpression(selector);
                    return(Selectors.Css(expression));
                }

                case SelectorType.JsonPath:
                {
                    NotNullExpression(selector);
                    return(Selectors.JsonPath(expression));
                }

                case SelectorType.Regex:
                {
                    NotNullExpression(selector);
                    if (string.IsNullOrEmpty(selector.Arguments))
                    {
                        return(Selectors.Regex(expression));
                    }

                    var arguments   = selector.Arguments.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
                    var options     = (RegexOptions)Enum.Parse(typeof(RegexOptions), arguments[0]);
                    var replacement = arguments[1];
                    return(Selectors.Regex(expression, options, replacement));
                }

                case SelectorType.XPath:
                {
                    NotNullExpression(selector);
                    return(Selectors.XPath(expression));
                }

                default:
                {
                    throw new NotSupportedException($"{selector} unsupported");
                }
                }
            }

            return(null);
        }
예제 #10
0
        public static ISelector Parse(Selector selector)
        {
            if (string.IsNullOrEmpty(selector?.Expression))
            {
                return(null);
            }

            string expression = selector.Expression;

            switch (selector.Type)
            {
            case SelectorType.Css:
            {
                return(Selectors.Css(expression));
            }

            case SelectorType.Enviroment:
            {
                return(Selectors.Enviroment(expression));
            }

            case SelectorType.JsonPath:
            {
                return(Selectors.JsonPath(expression));
            }

            case SelectorType.Regex:
            {
                return(Selectors.Regex(expression));
            }

            case SelectorType.XPath:
            {
                return(Selectors.XPath(expression));
            }
            }
            throw new SpiderException("Not support selector: " + selector);
        }
예제 #11
0
        /// <summary>
        /// 构造方法
        /// </summary>
        public DataParser()
        {
            Model = new Model <T>();

            var patterns = new HashSet <string>();

            if (Model.FollowRequestSelectors != null)
            {
                foreach (var followSelector in Model.FollowRequestSelectors)
                {
                    switch (followSelector.SelectorType)
                    {
                    case SelectorType.Css:
                    {
                        foreach (var expression in followSelector.Expressions)
                        {
                            AddFollowRequestQuerier(Selectors.Css(expression));
                        }

                        break;
                    }

                    case SelectorType.Regex:
                    {
                        foreach (var expression in followSelector.Expressions)
                        {
                            AddFollowRequestQuerier(Selectors.Regex(expression));
                        }

                        break;
                    }

                    case SelectorType.XPath:
                    {
                        foreach (var expression in followSelector.Expressions)
                        {
                            AddFollowRequestQuerier(Selectors.XPath(expression));
                        }

                        break;
                    }

                    case SelectorType.Environment:
                    {
                        Logger.LogWarning("SelectorType of follow selector is not supported");
                        break;
                    }

                    case SelectorType.JsonPath:
                    {
                        foreach (var expression in followSelector.Expressions)
                        {
                            AddFollowRequestQuerier(Selectors.JsonPath(expression));
                        }

                        break;
                    }
                    }

                    foreach (var pattern in followSelector.Patterns)
                    {
                        patterns.Add(pattern);
                    }
                }
            }

            foreach (var pattern in patterns)
            {
                AddRequiredValidator(request => Regex.IsMatch(request.RequestUri.ToString(), pattern));
            }
        }