public async void JSONSchemaValidation() { var a = new JSchemaGenerator(); var schemaObject = a.Generate(typeof(TaskModel)); var linkParser = new PropertyParsingRuleModel { NodeSelector = new SelectorModel { Type = SelectorEnum.XPath, MatchExpression = "(.//a[contains(@class,'ellipsis-text')])[1]" }, Type = OutputTypeEnum.Text, OutputFrom = OutputFromEnum.Attribute, OutputFromAttributeName = "href", }; var nameParser = new PropertyParsingRuleModel { NodeSelector = new SelectorModel { Type = SelectorEnum.XPath, MatchExpression = "(.//div[contains(@class,'pi-img-wrapper')])[1]/a[1]" }, Type = OutputTypeEnum.Text, OutputFrom = OutputFromEnum.Attribute, OutputFromAttributeName = "name", }; var priceParser = new PropertyParsingRuleModel { NodeSelector = new SelectorModel { Type = SelectorEnum.XPath, MatchExpression = "(.//div[contains(@class,'pi-price')])[1]" }, Type = OutputTypeEnum.Text, OutputFrom = OutputFromEnum.InnerText }; var productParser = new PropertyParsingRuleModel { Type = OutputTypeEnum.Array, NodeSelector = new SelectorModel { Type = SelectorEnum.CSS, MatchExpression = "div.product-list div.product-item" }, PropertyParsingRules = new Dictionary <string, PropertyParsingRuleModel> { { "Link", linkParser }, { "Name", nameParser }, { "Price", priceParser } } }; var taskModel = new TaskModel { Uri = "http://www.exdoll.com/productlist.ac", RequestMethod = RequestMethodEnum.Get, TaskId = 0, RequestParameter = new RequestParameterModel { Headers = new Dictionary <string, string> { { "cookie", "JSESSIONID=912BD825760319675E9DE1E1C1E2D701" } }, Body = null }, PropertyParsingRules = new Dictionary <string, PropertyParsingRuleModel> { { "Products", productParser } }, }; var stringEnumConverter = new JsonStringEnumConverter(); JsonSerializerOptions opts = new JsonSerializerOptions(); opts.IgnoreNullValues = true; opts.WriteIndented = true; //opts.Converters.Add(stringEnumConverter); var taskString = JsonSerializer.Serialize(taskModel); JObject taskModel1 = JObject.Parse(taskString); JSchema schema = JSchema.Parse(schemaObject.ToString()); var valid = taskModel1.IsValid(schema); }
private async Task <object> Parse(HtmlNode node, PropertyParsingRuleModel parser) { bool selectorIsXPath = parser.NodeSelector.Type == SelectorEnum.XPath; string selector = parser.NodeSelector.MatchExpression; object tempResult = null; switch (parser.Type) { case OutputTypeEnum.Text: var nodeInfo = selectorIsXPath ? node.SelectSingleNode(selector) : node.QuerySelector(selector); switch (parser.OutputFrom) { case OutputFromEnum.Attribute: tempResult = nodeInfo.GetAttributeValue(parser.OutputFromAttributeName, string.Empty); break; case OutputFromEnum.InnerHtml: tempResult = nodeInfo.InnerHtml; break; case OutputFromEnum.OuterHtml: tempResult = nodeInfo.OuterHtml; break; case OutputFromEnum.InnerLength: tempResult = nodeInfo.InnerLength; break; case OutputFromEnum.OuterLength: tempResult = nodeInfo.OuterLength; break; case OutputFromEnum.None: case OutputFromEnum.InnerText: default: tempResult = nodeInfo.InnerText; break; } break; case OutputTypeEnum.Array: var nodes = selectorIsXPath ? node.SelectNodes(selector) : node.QuerySelectorAll(selector); IList <object> tTempResult = new List <object>(); //有Parser 即为对象 if (parser.PropertyParsingRules?.Any() ?? false) { foreach (var tempNode in nodes) { dynamic tempDynamicResult = new ExpandoObject(); var tempDynamicResultDic = (IDictionary <string, object>)tempDynamicResult; foreach (var tempParser in parser.PropertyParsingRules) { tempDynamicResultDic[tempParser.Key] = await Parse(tempNode, tempParser.Value); } tTempResult.Add(tempDynamicResult); } tempResult = tTempResult; } else { //无Parser 即为字符串数组 tempResult = nodes.Select(c => c.InnerText); } break; } return(tempResult); }
public async void ParserTest() { /* * 构思 * 请求引擎:默认,WebDriver * 内容类型:Text,JSON,HTML * 内容字符集:UTF8,ASNII -restSharp有 不需要, * * 使用QuartZ.NET启动一个Job 定时 批量 拉取MQ中的Task * 拉取后要将该Task状态置为执行中,如果达到超时阈值没有执行完毕,该状态重置 * 在执行完毕后,将该Task的Response丢入MQ中并将该Task设置为执行完毕 * */ var linkParser = new PropertyParsingRuleModel { NodeSelector = new SelectorModel { Type = SelectorEnum.XPath, MatchExpression = "(.//a[contains(@class,'ellipsis-text')])[1]" }, Type = OutputTypeEnum.Text, OutputFrom = OutputFromEnum.Attribute, OutputFromAttributeName = "href", }; var nameParser = new PropertyParsingRuleModel { NodeSelector = new SelectorModel { Type = SelectorEnum.XPath, MatchExpression = "(.//div[contains(@class,'pi-img-wrapper')])[1]/a[1]" }, Type = OutputTypeEnum.Text, OutputFrom = OutputFromEnum.Attribute, OutputFromAttributeName = "name", }; var priceParser = new PropertyParsingRuleModel { NodeSelector = new SelectorModel { Type = SelectorEnum.XPath, MatchExpression = "(.//div[contains(@class,'pi-price')])[1]" }, Type = OutputTypeEnum.Text, OutputFrom = OutputFromEnum.InnerText }; var productParser = new PropertyParsingRuleModel { Type = OutputTypeEnum.Array, NodeSelector = new SelectorModel { Type = SelectorEnum.CSS, MatchExpression = "div.product-list div.product-item" }, PropertyParsingRules = new Dictionary <string, PropertyParsingRuleModel> { { "Link", linkParser }, { "Name", nameParser }, { "Price", priceParser } } }; var taskModel = new TaskModel { Uri = "http://www.exdoll.com/productlist.ac", RequestMethod = RequestMethodEnum.Get, TaskId = 0, RequestParameter = new RequestParameterModel { Headers = new Dictionary <string, string> { { "cookie", "JSESSIONID=912BD825760319675E9DE1E1C1E2D701" } }, Body = null }, PropertyParsingRules = new Dictionary <string, PropertyParsingRuleModel> { { "Products", productParser } }, }; var stringEnumConverter = new JsonStringEnumConverter(); JsonSerializerOptions opts = new JsonSerializerOptions(); opts.IgnoreNullValues = true; opts.WriteIndented = true; //opts.Converters.Add(stringEnumConverter); var txt = JsonSerializer.Serialize(taskModel, opts); //IRestClient restClient = new RestClient(taskModel.Uri); //IRestRequest request = new RestRequest(Method.GET); //request.AddCookie("JSESSIONID", "4173BE2521D676127C3F9C3F8EA68F67"); //IRestResponse response = await restClient.ExecuteGetAsync(request); var response = await DownloadData(taskModel); var contentText = response.Content; HtmlDocument document = new HtmlDocument(); document.LoadHtml(contentText); var rootNode = document.DocumentNode; var result = await Parse(rootNode, taskModel.PropertyParsingRules.First().Value); var resultT = new { TaskId = taskModel.TaskId, Result = result, ResponseHeaders = response.Headers.ToDictionary(c => c.Name, c => c?.Value?.ToString()), ResponseCookies = response.Cookies.ToDictionary(c => c.Name, c => c.Value) }; var aaa = JsonSerializer.Serialize(resultT, opts); }