コード例 #1
0
        public async void JSONSchemaValidation()
        {
            var a            = new JSchemaGenerator();
            var schemaObject = a.Generate(typeof(TaskModel));



            var linkParser = new PropertyParsingRuleModel {
                NodeSelector = new SelectorModel {
                    Type            = SelectorEnum.XPath,
                    MatchExpression = "(.//a[contains(@class,'ellipsis-text')])[1]"
                },
                Type       = OutputTypeEnum.Text,
                OutputFrom = OutputFromEnum.Attribute,
                OutputFromAttributeName = "href",
            };

            var nameParser = new PropertyParsingRuleModel {
                NodeSelector = new SelectorModel {
                    Type            = SelectorEnum.XPath,
                    MatchExpression = "(.//div[contains(@class,'pi-img-wrapper')])[1]/a[1]"
                },
                Type       = OutputTypeEnum.Text,
                OutputFrom = OutputFromEnum.Attribute,
                OutputFromAttributeName = "name",
            };

            var priceParser = new PropertyParsingRuleModel {
                NodeSelector = new SelectorModel {
                    Type            = SelectorEnum.XPath,
                    MatchExpression = "(.//div[contains(@class,'pi-price')])[1]"
                },
                Type       = OutputTypeEnum.Text,
                OutputFrom = OutputFromEnum.InnerText
            };


            var productParser = new PropertyParsingRuleModel {
                Type         = OutputTypeEnum.Array,
                NodeSelector = new SelectorModel {
                    Type            = SelectorEnum.CSS,
                    MatchExpression = "div.product-list div.product-item"
                },
                PropertyParsingRules = new Dictionary <string, PropertyParsingRuleModel> {
                    { "Link", linkParser },
                    { "Name", nameParser },
                    { "Price", priceParser }
                }
            };


            var taskModel = new TaskModel {
                Uri              = "http://www.exdoll.com/productlist.ac",
                RequestMethod    = RequestMethodEnum.Get,
                TaskId           = 0,
                RequestParameter = new RequestParameterModel {
                    Headers = new Dictionary <string, string> {
                        { "cookie", "JSESSIONID=912BD825760319675E9DE1E1C1E2D701" }
                    },
                    Body = null
                },
                PropertyParsingRules = new Dictionary <string, PropertyParsingRuleModel> {
                    { "Products", productParser }
                },
            };

            var stringEnumConverter    = new JsonStringEnumConverter();
            JsonSerializerOptions opts = new JsonSerializerOptions();

            opts.IgnoreNullValues = true;
            opts.WriteIndented    = true;
            //opts.Converters.Add(stringEnumConverter);


            var     taskString = JsonSerializer.Serialize(taskModel);
            JObject taskModel1 = JObject.Parse(taskString);
            JSchema schema     = JSchema.Parse(schemaObject.ToString());
            var     valid      = taskModel1.IsValid(schema);
        }
コード例 #2
0
        private async Task <object> Parse(HtmlNode node, PropertyParsingRuleModel parser)
        {
            bool   selectorIsXPath = parser.NodeSelector.Type == SelectorEnum.XPath;
            string selector        = parser.NodeSelector.MatchExpression;
            object tempResult      = null;

            switch (parser.Type)
            {
            case OutputTypeEnum.Text:
                var nodeInfo = selectorIsXPath ? node.SelectSingleNode(selector) : node.QuerySelector(selector);
                switch (parser.OutputFrom)
                {
                case OutputFromEnum.Attribute:
                    tempResult = nodeInfo.GetAttributeValue(parser.OutputFromAttributeName, string.Empty);
                    break;

                case OutputFromEnum.InnerHtml:
                    tempResult = nodeInfo.InnerHtml;
                    break;

                case OutputFromEnum.OuterHtml:
                    tempResult = nodeInfo.OuterHtml;
                    break;

                case OutputFromEnum.InnerLength:
                    tempResult = nodeInfo.InnerLength;
                    break;

                case OutputFromEnum.OuterLength:
                    tempResult = nodeInfo.OuterLength;
                    break;

                case OutputFromEnum.None:
                case OutputFromEnum.InnerText:
                default:
                    tempResult = nodeInfo.InnerText;
                    break;
                }
                break;

            case OutputTypeEnum.Array:
                var            nodes       = selectorIsXPath ? node.SelectNodes(selector) : node.QuerySelectorAll(selector);
                IList <object> tTempResult = new List <object>();
                //有Parser 即为对象
                if (parser.PropertyParsingRules?.Any() ?? false)
                {
                    foreach (var tempNode in nodes)
                    {
                        dynamic tempDynamicResult    = new ExpandoObject();
                        var     tempDynamicResultDic = (IDictionary <string, object>)tempDynamicResult;
                        foreach (var tempParser in parser.PropertyParsingRules)
                        {
                            tempDynamicResultDic[tempParser.Key] = await Parse(tempNode, tempParser.Value);
                        }
                        tTempResult.Add(tempDynamicResult);
                    }
                    tempResult = tTempResult;
                }
                else
                {
                    //无Parser 即为字符串数组
                    tempResult = nodes.Select(c => c.InnerText);
                }
                break;
            }
            return(tempResult);
        }
コード例 #3
0
        public async void ParserTest()
        {
            /*
             * 构思
             * 请求引擎:默认,WebDriver
             * 内容类型:Text,JSON,HTML
             * 内容字符集:UTF8,ASNII -restSharp有 不需要,
             *
             * 使用QuartZ.NET启动一个Job 定时 批量 拉取MQ中的Task
             * 拉取后要将该Task状态置为执行中,如果达到超时阈值没有执行完毕,该状态重置
             * 在执行完毕后,将该Task的Response丢入MQ中并将该Task设置为执行完毕
             *
             */


            var linkParser = new PropertyParsingRuleModel {
                NodeSelector = new SelectorModel {
                    Type            = SelectorEnum.XPath,
                    MatchExpression = "(.//a[contains(@class,'ellipsis-text')])[1]"
                },
                Type       = OutputTypeEnum.Text,
                OutputFrom = OutputFromEnum.Attribute,
                OutputFromAttributeName = "href",
            };

            var nameParser = new PropertyParsingRuleModel {
                NodeSelector = new SelectorModel {
                    Type            = SelectorEnum.XPath,
                    MatchExpression = "(.//div[contains(@class,'pi-img-wrapper')])[1]/a[1]"
                },
                Type       = OutputTypeEnum.Text,
                OutputFrom = OutputFromEnum.Attribute,
                OutputFromAttributeName = "name",
            };

            var priceParser = new PropertyParsingRuleModel {
                NodeSelector = new SelectorModel {
                    Type            = SelectorEnum.XPath,
                    MatchExpression = "(.//div[contains(@class,'pi-price')])[1]"
                },
                Type       = OutputTypeEnum.Text,
                OutputFrom = OutputFromEnum.InnerText
            };


            var productParser = new PropertyParsingRuleModel {
                Type         = OutputTypeEnum.Array,
                NodeSelector = new SelectorModel {
                    Type            = SelectorEnum.CSS,
                    MatchExpression = "div.product-list div.product-item"
                },
                PropertyParsingRules = new Dictionary <string, PropertyParsingRuleModel> {
                    { "Link", linkParser },
                    { "Name", nameParser },
                    { "Price", priceParser }
                }
            };


            var taskModel = new TaskModel {
                Uri              = "http://www.exdoll.com/productlist.ac",
                RequestMethod    = RequestMethodEnum.Get,
                TaskId           = 0,
                RequestParameter = new RequestParameterModel {
                    Headers = new Dictionary <string, string> {
                        { "cookie", "JSESSIONID=912BD825760319675E9DE1E1C1E2D701" }
                    },
                    Body = null
                },
                PropertyParsingRules = new Dictionary <string, PropertyParsingRuleModel> {
                    { "Products", productParser }
                },
            };

            var stringEnumConverter    = new JsonStringEnumConverter();
            JsonSerializerOptions opts = new JsonSerializerOptions();

            opts.IgnoreNullValues = true;
            opts.WriteIndented    = true;
            //opts.Converters.Add(stringEnumConverter);


            var txt = JsonSerializer.Serialize(taskModel, opts);
            //IRestClient restClient = new RestClient(taskModel.Uri);
            //IRestRequest request = new RestRequest(Method.GET);
            //request.AddCookie("JSESSIONID", "4173BE2521D676127C3F9C3F8EA68F67");
            //IRestResponse response = await restClient.ExecuteGetAsync(request);

            var response = await DownloadData(taskModel);

            var          contentText = response.Content;
            HtmlDocument document    = new HtmlDocument();

            document.LoadHtml(contentText);
            var rootNode = document.DocumentNode;
            var result   = await Parse(rootNode, taskModel.PropertyParsingRules.First().Value);

            var resultT = new {
                TaskId          = taskModel.TaskId,
                Result          = result,
                ResponseHeaders = response.Headers.ToDictionary(c => c.Name, c => c?.Value?.ToString()),
                ResponseCookies = response.Cookies.ToDictionary(c => c.Name, c => c.Value)
            };
            var aaa = JsonSerializer.Serialize(resultT, opts);
        }