protected bool IsCollectionDescendants(out int deepth) { deepth = -1; ExtractDataNode parentNode = ParentExtractNode; while (parentNode != null) { if (parentNode.IsReturnCollection()) { deepth = parentNode.Deepth; return(true); } else { parentNode = parentNode.ParentExtractNode; } } if (IsReturnCollection()) { deepth = Deepth; //本身定义为集合 return(true); } return(false); }
ExtractPagerNode getPagerNodeFromExtractDataNode(ExtractDataNode node) { foreach (ExtractDataNode subNode in node.ChildNodes) { if (subNode.GetType().Equals(typeof(ExtractPagerNode))) { return(subNode as ExtractPagerNode); } } return(null); }
internal void parseRuleDocument(XmlDocument xmlDoc) { XmlElement doc = xmlDoc.DocumentElement; foreach (XmlNode node in doc.ChildNodes) { ExtractNode extractNode = new ExtractNode(node, 1); NodeType currentType = extractNode.GetExtractType(); if (currentType == NodeType.Element) { _extractNodes.Add(ExtractDataNode.ExtractNodeAll(node, extractNode.Deepth, this)); } else if (currentType == NodeType.UrlPattern) { EntryUrl = UrlPatternDatasource.FromXmlNode(node); } else if (currentType == NodeType.ScopeResult) { DocumentResult = new ScopeResult(node, extractNode.Deepth); } } }
protected virtual void ExtractDataByRuleMethods(Element element) { List <ExtractMethod> fns = GetExtractMethods(); if (fns != null && fns.Count > 0) { int collectionDeepth = -1; bool needPopUp = IsCollectionDescendants(out collectionDeepth); foreach (ExtractMethod fn in fns) { var dict = fn.ExtractFrom(element); if (dict != null) { foreach (var item in dict.Keys) { if (needPopUp == true || ParentExtractNode == null) { Scope.Set(item, OnOpNode(dict[item])); } else if (ParentExtractNode != null) { ParentExtractNode.Scope.Set(item, OnOpNode(dict[item])); } } } } if (needPopUp && collectionDeepth < Deepth) { ExtractDataNode startNode = this; while (startNode != null && startNode.Deepth > collectionDeepth) { startNode.Scope.PopUp(true); startNode = startNode.ParentExtractNode; } } } }
public static ExtractDataNode ExtractNodeAll(XmlNode node, int deepth, ExtractTaskDocument taskDoc) { ExtractDataNode eNode = new ExtractDataNode(node, deepth); eNode.OwnerTaskDocument = taskDoc; XmlNodeList nodesList = node.ChildNodes; if (nodesList != null && nodesList.Count > 0) { for (int i = 0, j = nodesList.Count; i < j; i++) { XmlNode subNode = nodesList[i]; ExtractDataNode childNode = ExtractDataNode.ExtractNodeAll(subNode, deepth + 1, taskDoc); string pagerAttr = GetNodeNotNullAttrValue(subNode, "isPage"); if (!string.IsNullOrEmpty(pagerAttr) && Convert.ToBoolean(pagerAttr)) { //分页节点定义 ExtractPagerNode pagerNode = new ExtractPagerNode(subNode, deepth + 1); pagerNode.OwnerTaskDocument = taskDoc; pagerNode.ParentExtractNode = eNode; if (childNode.ChildNodes.Count > 0) { pagerNode.ChildNodes.AddRange(childNode.ChildNodes); } eNode.childNodes.Add(pagerNode); } else { childNode.OwnerTaskDocument = taskDoc; childNode.ParentExtractNode = eNode; eNode.ChildNodes.Add(childNode); } } } return(eNode); }
protected override void ExtractDataByRuleMethods(Element element) { List <ExtractDataNode> cNodes = this.ChildNodes; if (cNodes.Count == 1) { ExtractDataNode tempNode = cNodes[0]; string retAttr = GetNodeNotNullAttrValue(tempNode.DefineNode, "retAttr"); string whenDef = GetNodeNotNullAttrValue(tempNode.DefineNode, "when"); string formatDef = GetNodeNotNullAttrValue(tempNode.DefineNode, "format"); string opDef = GetNodeNotNullAttrValue(tempNode.DefineNode, "op"); string opParamsDef = GetNodeNotNullAttrValue(tempNode.DefineNode, "opParams"); string paramName = GetNodeNotNullAttrValue(tempNode.DefineNode, "paramName"); List <ExtractDataNode> whenNodeList = tempNode.ChildNodes.Where(p => p.DefineNode.Name.Equals("when", StringComparison.InvariantCultureIgnoreCase)) .ToList(); if (string.IsNullOrEmpty(whenDef) == false && whenNodeList.Count > 0) { XmlNode whenMatchNode = whenNodeList[0].ChildNodes .First(n => n.DefineNode.Name.Equals("attr", StringComparison.InvariantCultureIgnoreCase)).DefineNode; string whenAttr = GetNodeNotNullAttrValue(whenMatchNode, "name"); string whenVal = GetNodeNotNullAttrValue(whenMatchNode, "value"); List <Element> matchedList = new List <Element>(); string cssQuery = tempNode.GetCssQuery(); foreach (Element ele in element.Select(cssQuery)) { string rawRetVal = ele.Attr(retAttr); if (string.IsNullOrEmpty(opDef) == false && string.IsNullOrEmpty(opParamsDef) == false) { if (opDef == "trim") { rawRetVal = rawRetVal.Trim(opParamsDef.ToCharArray()); } } if (whenAttr == "innerText" && whenDef == "contains") { if (ele.Text().Contains(whenVal)) { Scope.Set(paramName, rawRetVal); if (PageListType == PagerType.ByNext) { pageUrlList.Clear(); break; } } } } } List <ExtractDataNode> paramsNodeList = tempNode.ChildNodes.Where(p => p.DefineNode.Name.Equals("params", StringComparison.InvariantCultureIgnoreCase)) .ToList(); if (paramsNodeList != null && paramsNodeList.Count == 1) { ExtractDataNode paramNode = paramsNodeList[0]; if (paramNode.ChildNodes.Count > 0) { #region 绑定参数 if (extractParams.Count == 0) { //未绑定过参数 extractParams.AddRange(paramNode.ChildNodes.Select(d => new ExtractParam { Name = GetNodeNotNullAttrValue(d.DefineNode, "name"), Index = Convert.ToInt32("0" + GetNodeNotNullAttrValue(d.DefineNode, "index")), Scope = GetNodeNotNullAttrValue(d.DefineNode, "scope") != "workScope" ? ParamScope.arguments : ParamScope.workScope }).ToList()); } Dictionary <string, object> args = paramNode.OwnerTaskDocument.ExtractArguments; extractParams.ForEach(p => { if (p.Scope == ParamScope.arguments) { if (args.ContainsKey(p.Name)) { p.Value = args[p.Name].ToString(); } } else { p.Value = Scope.Get <string>(p.Name); } }); #endregion if (extractParams.Any(p => string.IsNullOrEmpty(p.Value)) == true) { pageUrlList.Clear(); } else { string formatOutput = string.Format(formatDef, extractParams.OrderBy(p => p.Index).Select(n => n.Value).ToArray()); if (!string.IsNullOrEmpty(formatOutput)) { Scope.Set(paramName, formatOutput); pageUrlList.Add(formatOutput); } } } } } }