public CreateNavigator ( ) : |
||
return |
private void RemoveXPaths(ConfigSection config, HtmlAgilityPack.HtmlNode parentNode) { if (parentNode != null && config != null && config.RemoveXPathRules != null && config.RemoveXPathRules.Count > 0) { foreach (var removeXPathRule in config.RemoveXPathRules) { var navigator = parentNode.CreateNavigator(); var nodes = navigator.Select(removeXPathRule); foreach (HtmlNodeNavigator node in nodes) { node.CurrentNode.Remove(); } } } }
private void FillRow(HtmlAgilityPack.HtmlNode node) { nav = node.CreateNavigator(); bool letolt = false; string nyelv = ""; string magyar = ""; string eredeti = ""; string link = ""; DateTime datum = DateTime.Now; //expr = nav.Compile(@"./td/small"); //expr = nav.Compile(@"./td[small='Magyar']"); //expr = nav.Compile(@"./td/div[@class='eredeti']"); // Nyelv: expr = nav.Compile(@"./td/small"); iterator = nav.Select(expr); while (iterator.MoveNext()) { XPathNavigator nav2 = iterator.Current.Clone(); //Console.WriteLine("nyelv:" + nav2.Value); nyelv = nav2.Value; } // magyar cím: expr = nav.Compile(@"./td/div[@class='magyar']"); iterator = nav.Select(expr); while (iterator.MoveNext()) { XPathNavigator nav2 = iterator.Current.Clone(); //Console.WriteLine("magyar cím:" + nav2.Value); magyar = nav2.Value; } // eredeti cím: expr = nav.Compile(@"./td/div[@class='eredeti']"); iterator = nav.Select(expr); while (iterator.MoveNext()) { XPathNavigator nav2 = iterator.Current.Clone(); //Console.WriteLine("eredeti cím:" + nav2.Value); eredeti = nav2.Value; } // Link: expr = nav.Compile(@"./td/a[@href]"); iterator = nav.Select(expr); while (iterator.MoveNext()) { XPathNavigator nav2 = iterator.Current.Clone(); //Console.WriteLine("link:" + website + nav2.GetAttribute("href", "")); link = website + nav2.GetAttribute("href", ""); } // dátum: expr = nav.Compile(@"./td"); iterator = nav.Select(expr); while (iterator.MoveNext()) { XPathNavigator nav2 = iterator.Current.Clone(); string datestring = nav2.Value.Trim(); string format = "yyyy-MM-dd"; DateTime dt; if (DateTime.TryParseExact(datestring, format, CultureInfo.InvariantCulture, DateTimeStyles.None, out dt)) { datum = dt.Date; //DataGridViewCellStyle style = new DataGridViewCellStyle(); //style.Format = "yyyy.MM.dd"; //newrow.Cells[5].Style = style; } } backgroundWorker.ReportProgress(1, new NewRowObject() { datum = datum, eredeti = eredeti, letolt = letolt, link = link, magyar = magyar, nyelv = nyelv }); }
private object Extract(string name, ConfigSection config, HtmlAgilityPack.HtmlNode parentNode, List <HtmlAgilityPack.HtmlNode> logicalParents) { this.RemoveUnwantedTags(config, parentNode); // We will try to extract text for this item because it does not have children var containers = new JArray(); if (config.XPathRules != null && config.XPathRules.Count > 0) { var navigator = parentNode.CreateNavigator(); foreach (var xpath in config.XPathRules) { // TODO: Add try catch Exception var nodes = navigator.Select(xpath); if (nodes != null && nodes.Count > 0) { var newLogicalParents = logicalParents.GetRange(0, logicalParents.Count); newLogicalParents.Add(parentNode); foreach (HtmlNodeNavigator node in nodes) { if (config.Children != null && config.Children.Count > 0) { var container = new JObject(); this.ExtractChildren(config: config, parentNode: node.CurrentNode, container: container, logicalParents: newLogicalParents); containers.Add(container); } else if (config.Transformations != null && config.Transformations.Count > 0) { var obj = this.RunTransformations(config.Transformations, node.CurrentNode, newLogicalParents); if (obj != null) { containers.Add(obj); } } else if (node.Value != null) { containers.Add(HtmlEntity.DeEntitize(node.Value).Trim()); } } } } } else { var container = new JObject(); this.ExtractChildren(config: config, parentNode: parentNode, container: container, logicalParents: logicalParents); containers.Add(container); } if (!config.ForceArray && containers.Count == 0) { return(new JObject()); } else if (!config.ForceArray && containers.Count == 1) { return(containers.First); } else { return(containers); } }