/// <summary> /// 加载树节点 /// </summary> /// <param name="pTreeNode"></param> /// <param name="pHtmlNode"></param> private void AddTreeNode(TreeNode pTreeNode, HtmlNode pHtmlNode) { for (int i = 0; i < pHtmlNode.Nodes.Count; i++) { HtmlNode sHtmlNode = pHtmlNode.Nodes[i]; TreeNode sTreeNode = null; if (sHtmlNode.TagName != "TEXT") { sTreeNode = new TreeNode("<" + sHtmlNode.TagName + ">", sHtmlNode.ImageIndex, sHtmlNode.ImageIndex); } else { if (sHtmlNode.TextDecoded.Trim().Length > 0) { sTreeNode = new TreeNode("<" + sHtmlNode.TagName + ">:" + sHtmlNode.TextDecoded, sHtmlNode.ImageIndex, sHtmlNode.ImageIndex); } } if (sTreeNode != null) { sTreeNode.Tag = sHtmlNode; sHtmlNode.Tag = sTreeNode; pTreeNode.Nodes.Add(sTreeNode); if (sHtmlNode.Nodes.Count > 0) { AddTreeNode(sTreeNode, sHtmlNode); } } } }
public virtual void Render(System.IO.TextWriter writer, UI.UIControlState state, params HtmlAttribute[] attributes) { if (_Label == null) _Label = new HtmlSimple("label", state.Label, new HtmlAttribute("for", state.Name)); _Label.Write(writer); if (_Input == null) _Input = CreateInput(state); for (int i = 0; i < attributes.Length; i++) _Input.Attributes[attributes[i].Name] = attributes[i].Value; this._Input.Write(writer); if (!string.IsNullOrEmpty(state.ErrorMessage)) new HtmlSimple("span", state.ErrorMessage, new HtmlAttribute("class", "error")).Write(writer); else { if (!string.IsNullOrEmpty(state.Hint)) { if (_Hint == null) _Hint = new HtmlSimple("span", state.Hint, new HtmlAttribute("class", "hint")); _Hint.Write(writer); } } }
private void ConvertContentTo(HtmlNode node, TextWriter outText) { foreach (HtmlNode subnode in node.ChildNodes) { ConvertTo(subnode, outText); } }
public ExtractionException(HtmlNode node = null, Entity obj = null, ExtractionAttribute extraction = null, Exception innerException = null, string sourceData = null, string beginString = null, string endString = null, string nodeQuery = null, string attribute = null, string regex = null, string userQuery = null, Uri url = null, string message = null, ListExtractionAttribute listExtraction = null) : base(innerException: innerException, sourceData: sourceData, beginString: beginString, endString: endString, nodeQuery: nodeQuery, attribute: attribute, regex: regex, userQuery: userQuery, url: url, message: message) { this.Node = node; this.Entity = obj; this.Extraction = extraction; this.ListExtraction = listExtraction; }
public void TestNormal1() { var root = new HtmlNode("root") .Node("person") .Node("name", "masuda") .Root; Assert.AreEqual("<root><person><name>masuda</name></person></root>", root.Html); }
public NodeBuilder(HtmlNode htmlNode) { _htmlNode = htmlNode; //kind of yuck, not sure of a better way though... if (htmlNode is IKoComment) InKoCommentMode = true; }
protected void RenderInput(System.IO.TextWriter writer, UI.UIControlState state, params HtmlAttribute[] attributes) { _Input = CreateInput(state); for (int i = 0; i < attributes.Length; i++) _Input.Attributes[attributes[i].Name] = attributes[i].Value; _Input.Write(writer); }
public override DependencyObject GenerateSingle(HtmlNode node, IHtmlTextBlock textBlock) { foreach (var c in node.GetLeaves(textBlock)) { var element = c as TextElement; if (element != null) element.FontWeight = FontWeights.Bold; } return null; }
public DependencyObject[] Generate(HtmlNode node, IHtmlTextBlock textBlock) { var list = new List<DependencyObject>(); var addTopMargin = true; var current = new List<Inline>(); foreach (var c in node.GetLeaves(textBlock)) { if (c is Run && UseTextSplitting && ((Run)c).Text.Contains("\n")) // used to avoid 2048px max control size { // split text var run = (Run) c; var splits = run.Text.Split('\n'); // join some splits to avoid small junks var currentSplit = ""; var newSplits = new List<string>(); for (var i = 0; i < splits.Length; i++) { var split = splits[i]; if (i != 0 && currentSplit.Length + split.Length > 16) { newSplits.Add(currentSplit); currentSplit = split; } else currentSplit += (i != 0 ? "\n" : "") + split; } newSplits.Add(currentSplit); // create multiple text blocks splits = newSplits.ToArray(); for (var i = 0; i < splits.Length; i++) { var split = splits[i]; current.Add(new Run { Text = split }); if (i < splits.Length - 1) // dont create for last CreateTextBox(list, current, textBlock, i == 0 && addTopMargin, false); } addTopMargin = list.Count == 0; } else if (c is Inline) current.Add((Inline)c); else { CreateTextBox(list, current, textBlock, addTopMargin, true); list.Add(c); addTopMargin = true; } } CreateTextBox(list, current, textBlock, addTopMargin, true); if (list.Count == 0) return null; return list.ToArray(); }
private void ParseLink(HtmlNode node, string name) { HtmlAttribute att = node.Attributes[name]; if (att == null) return; // if name = href, we are only interested by <link> tags if ((name == "href") && (node.Name != "link")) return; _links.Add(att.Value); }
protected string GetText(HtmlNode element) { StringBuilder accum = new StringBuilder(); foreach (var node in element.ChildNodes) { if (node is HtmlTextNode) { accum.Append(node.InnerText); } } return accum.ToString(); }
/// <summary> /// Gets a given node from the list. /// </summary> public int this[HtmlNode node] { get { int index = GetNodeIndex(node); if (index == -1) { throw new ArgumentOutOfRangeException("node", "Node \"" + node.CloneNode(false).OuterHtml + "\" was not found in the collection"); } return index; } }
protected void RenderLabel(System.IO.TextWriter writer, UI.UIControlState state) { if (_Label == null) _Label = new HtmlSimple( "label", state.Label, new HtmlAttribute("for", state.Name), new HtmlAttribute("class", "control-label") ); _Label.Write(writer); }
public override DependencyObject GenerateSingle(HtmlNode node, IHtmlTextBlock textBlock) { foreach (var c in node.GetLeaves(textBlock)) { var element = c as TextElement; if (element != null) #if WINRT element.FontStyle = FontStyle.Italic; #else element.FontStyle = FontStyles.Italic; #endif } return null; }
public void ConvertTo(HtmlNode node, TextWriter outText) { string html; switch (node.NodeType) { case HtmlNodeType.Comment: // don't output comments break; case HtmlNodeType.Document: ConvertContentTo(node, outText); break; case HtmlNodeType.Text: // script and style must not be output string parentName = node.ParentNode.Name; if ((parentName == "script") || (parentName == "style")) break; // get text html = ((HtmlTextNode) node).Text; // is it in fact a special closing node output as text? if (HtmlNode.IsOverlappedClosingElement(html)) break; // check the text is meaningful and not a bunch of whitespaces if (html.Trim().Length > 0) { outText.Write(HtmlEntity.DeEntitize(html)); } break; case HtmlNodeType.Element: switch (node.Name) { case "p": // treat paragraphs as crlf outText.Write("\r\n"); break; } if (node.HasChildNodes) { ConvertContentTo(node, outText); } break; } }
protected void RenderHintOrError(System.IO.TextWriter writer, UI.UIControlState state) { if (!string.IsNullOrEmpty(state.ErrorMessage)) { _Hint = new HtmlSimple("span", state.ErrorMessage, new HtmlAttribute("class", "help-block")); } else { string hintText = string.IsNullOrEmpty(state.Hint) ? " " : state.Hint; _Hint = new HtmlSimple("span", hintText, new HtmlAttribute("class", "help-block")); } _Hint.Write(writer); }
public void TestStructure() { HtmlNode<string> root = new HtmlNode<string>("html"); HtmlNode<string> temp, p; temp = root.Children.Add("head"); temp.Children.Add("link").IsEmpty = true; temp = root.Children.Add("body"); p = temp.Children.Add("p").Children.Add("text"); p.Value = "Hello World"; p.Children.Add("span"); temp.Children.Add("img").IsEmpty = true; Console.WriteLine(root.ToString()); }
public DependencyObject[] Generate(HtmlNode node, IHtmlTextBlock textBlock) { var list = new List<Grid>(); foreach (var child in node.Children.Where(c => c.Value == "li")) { var grid = new Grid(); grid.ColumnDefinitions.Add(new ColumnDefinition { Width = GridLength.Auto }); grid.ColumnDefinitions.Add(new ColumnDefinition { Width = new GridLength(1, GridUnitType.Star) }); var tb = new TextBlock(); tb.Foreground = textBlock.Foreground; tb.FontSize = textBlock.FontSize; tb.FontFamily = textBlock.FontFamily; tb.Margin = new Thickness(); tb.Text = "• "; grid.Children.Add(tb); Grid.SetColumn(tb, 0); var panel = new StackPanel(); child.ToHtmlBlock(); foreach (var c in child.GetLeaves(textBlock).OfType<UIElement>()) { var frameworkElement = c as FrameworkElement; if (frameworkElement != null) frameworkElement.HorizontalAlignment = HorizontalAlignment.Stretch; panel.Children.Add(c); } grid.Children.Add(panel); Grid.SetColumn(panel, 1); list.Add(grid); } var first = list.FirstOrDefault(); if (first != null) first.Margin = new Thickness(0, textBlock.ParagraphMargin, 0, 0); var last = list.LastOrDefault(); if (last != null) last.Margin = new Thickness(0, 0, 0, textBlock.ParagraphMargin); return list.OfType<DependencyObject>().ToArray(); }
public override DependencyObject GenerateSingle(HtmlNode node, IHtmlTextBlock textBlock) { try { var uri = node.Attributes["src"]; var height = 0; if (node.Attributes.ContainsKey("height")) int.TryParse(node.Attributes["height"], out height); var width = 0; if (node.Attributes.ContainsKey("width")) int.TryParse(node.Attributes["width"], out width); if (height == 1 && width == 1) return null; var image = new Image(); var imgSource = new BitmapImage(new Uri(uri)); var block = new ImageBlock { Image = image, UserHeight = height, UserWidth = width, Source = imgSource }; imgSource.ImageOpened += delegate { block.Update(textBlock.ActualWidth); }; image.HorizontalAlignment = HorizontalAlignment.Left; image.Source = imgSource; image.Margin = new Thickness(0, textBlock.ParagraphMargin, 0, textBlock.ParagraphMargin); if (width > 0) image.Width = width; if (height > 0) image.Height = height; textBlock.SizeDependentControls.Add(block); return image; } catch { return null; } }
public IEnumerable<Xml.IQueryableNode> Parents(Xml.IQueryableNode node) { var curr = node; var result = node.Parent(); while (result != null) { if (string.Compare(curr.Name.LocalName, "tr", StringComparison.OrdinalIgnoreCase) == 0 && string.Compare(result.Name.LocalName, "table", StringComparison.OrdinalIgnoreCase) == 0) { yield return new TbodyNode(curr); } yield return result; curr = result; result = result.Parent(); } var html = new HtmlNode(); switch (curr.Name.LocalName.ToLowerInvariant()) { case "html": // do nothing break; case "body": case "head": yield return html; break; case "title": case "base": case "link": case "style": case "meta": case "script": case "noscript": case "command": yield return new HeadNode(curr, null, html); yield return html; break; default: yield return new BodyNode(curr, null, html); yield return html; break; } }
public void TestNormal4() { var root = new HtmlNode("persons") .Node("person").AddAttr("id", "1") .AddNode("name", "masuda tomoaki") .AddNode("age", "44") .Parent .Node("person").AddAttr("id", "2") .AddNode("name", "yamada taro") .AddNode("age", "20") .Root; ; Assert.AreEqual("<persons>" + "<person id=\"1\"><name>masuda tomoaki</name><age>44</age></person>" + "<person id=\"2\"><name>yamada taro</name><age>20</age></person>" + "</persons>", root.Html); }
/// <summary> /// Add /// </summary> /// <param name="node"></param> public override void Add(HtmlNode node) { if (node is head) { this.Children.Remove(this.head); this.head = node as head; this.Children.Add(this.head); } else if (node is body) { this.Children.Remove(this.body); this.body = node as body; this.Children.Add(this.body); } else { base.Add(node); } }
public DependencyObject[] Generate(HtmlNode node, IHtmlTextBlock textBlock) { try { var link = node.Attributes["href"]; var block = new TextBlock(); block.Foreground = Foreground; var element = new InlineUIContainer(); element.Child = block; var hr = new Underline(); foreach (var child in node.Children) { var leaves = child.GetLeaves(textBlock).ToArray(); if (leaves.Length > 0) { foreach (var item in leaves.OfType<Inline>()) hr.Inlines.Add(item); } else if (!string.IsNullOrEmpty(child.Value)) hr.Inlines.Add(new Run { Text = child.Value }); } block.Inlines.Add(hr); var action = CreateLinkAction(block, link, textBlock); block.Tapped += (sender, e) => { if (!e.Handled) { e.Handled = true; action(); } }; return new DependencyObject[] { element }; } catch { return node.GetLeaves(textBlock); // suppress link } }
public override dynamic Select(HtmlNode element) { #if !NET_CORE IList<HtmlNode> elements = element.QuerySelectorAll(_selectorText); #else IList<HtmlNode> elements = element.QuerySelectorAll(_selectorText).ToList(); #endif if (elements != null && elements.Count > 0) { if (string.IsNullOrEmpty(_attrName)) { return elements[0]; } else { return elements[0].Attributes[_attrName]?.Value; } } return null; }
protected void RenderInput(System.IO.TextWriter writer, UI.UIControlState state, params HtmlAttribute[] attributes) { _Input = CreateInput(state); for (int i = 0; i < attributes.Length; i++) if (attributes[i].Value != null) { object itemAtt = (object)_Input.Attributes[attributes[i].Name]; if (itemAtt != null) { _Input.Attributes[attributes[i].Name] = itemAtt + " " + attributes[i].Value; } else { _Input.Attributes[attributes[i].Name] = attributes[i].Value; } } _Input.Write(writer); }
public DependencyObject[] Generate(HtmlNode node, IHtmlTextBlock textBlock) { var list = new List<DependencyObject>(); var current = new List<Inline>(); foreach (var c in node.GetLeaves(textBlock)) { if (c is Inline) current.Add((Inline)c); else { CreateTextBox(list, current, textBlock); list.Add(c); } } CreateTextBox(list, current, textBlock); if (list.Count == 0) return null; return list.ToArray(); }
public DependencyObject[] Generate(HtmlNode node, IHtmlTextBlock textBlock) { try { var link = node.Attributes["href"]; var hr = new Hyperlink(); hr.MouseOverForeground = textBlock.Foreground; hr.Foreground = textBlock.Foreground; hr.TextDecorations = TextDecorations.Underline; foreach (var child in node.Children) { var leaves = child.GetLeaves(textBlock).ToArray(); if (leaves.Length > 0) { foreach (var item in leaves.OfType<Inline>()) hr.Inlines.Add(item); } else if (!string.IsNullOrEmpty(child.Value)) hr.Inlines.Add(new Run { Text = child.Value }); } var action = CreateLinkAction(hr, link, textBlock); var origAction = action; action = delegate { if (!PhoneApplication.IsNavigating) origAction(); }; hr.Command = new RelayCommand(action); return new DependencyObject[] { hr }; } catch { return node.GetLeaves(textBlock); // suppress link } }
private void AppendRichtextBox(HtmlNode node) { AddNewTextBox(node); AppendParagraph(node, _currentRichTextBox); }
//reads in scraped html files that contain projected points for each player in the league //writes out a neatly formatted txt file containing week, player name, playerId, projected points and position private static void Main(string[] args) { using ( StreamWriter errorLogger = new StreamWriter("C:\\Users\\SomeFolder\\Logs\\" + DateTime.Now.ToString("MM_dd_yy_hh_mm_ss") + ".txt", false)) { try { //get list of all input data files and instantiate new HtmlDoc string[] files = Directory.GetFiles(dataFilePath); HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.OptionFixNestedTags = true; List<string> results = new List<string>(); //process each file, order does not matter foreach (string fileName in files) { //week data is from is inferred from filename int firstUnderscore = fileName.IndexOf("_"); int secondUnderscore = fileName.IndexOf("_", firstUnderscore + 1); string week = fileName.Substring(firstUnderscore + 1, secondUnderscore - firstUnderscore - 1); int lastUnderScore = fileName.LastIndexOf("_"); int firstPeriod = fileName.IndexOf("."); string playerPage = fileName.Substring(lastUnderScore + 1, firstPeriod - lastUnderScore - 1); htmlDoc.Load(fileName); if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); if (bodyNode != null) { List<HtmlNode> allTables = bodyNode.Descendants("tbody").ToList(); for (int i = 0; i < allTables.Count; i++) { var playerTable = allTables[i]; List<HtmlNode> playerRows = playerTable.Descendants("tr").ToList(); foreach (HtmlNode playerRow in playerRows) { try { List<HtmlNode> rowCells = playerRow.Descendants("td").ToList(); HtmlNode playerNameNode = rowCells[1]; HtmlNode projectedPointsNode = rowCells[5]; HtmlNode playerIdSubNode = playerNameNode.Descendants("a") .FirstOrDefault(a => a.Attributes.Contains("data-ys-playerid")); string playerId = playerIdSubNode.Attributes["data-ys-playerid"].Value; HtmlNode playerPosSubNode = playerNameNode.Descendants("span") .FirstOrDefault(a => a.Attributes["class"].Value == "Fz-xxs"); string playerPos = playerPosSubNode.InnerHtml.Substring( playerPosSubNode.InnerHtml.IndexOf("-") + 1, playerPosSubNode.InnerHtml.Length - playerPosSubNode.InnerHtml.IndexOf("-") - 1); HtmlNode playerNameSubNode = playerNameNode.Descendants("a") .FirstOrDefault( a => a.Attributes["class"].Value == "Nowrap name F-link"); string playerName = playerNameSubNode.InnerHtml; string projectedPoints = projectedPointsNode.InnerText; string line = string.Format("{3}\t{0}\t{1}\t{2}\t{4}\t{5}", playerName, playerId, projectedPoints, week, playerPage, playerPos); if (!results.Contains(line)) { results.Add(line); } } catch (Exception e) { errorLogger.WriteLine(e.Message + "\r\n" + e.StackTrace); } } } } } } //sometimes scraper does not download all pages successfully //calculate the number of players that have projected points from each week //to ensure that new data was retrived for all players int week1Count = 0; int week2Count = 0; int week3Count = 0; int week4Count = 0; int week5Count = 0; int week6Count = 0; int week7Count = 0; int week8Count = 0; int week9Count = 0; int week10Count = 0; int week11Count = 0; int week12Count = 0; int week13Count = 0; using (StreamWriter sw = new StreamWriter("C:\\Users\\SomeFolder\\Data\\" + DateTime.Now.ToString("MM_dd_yy") + ".txt", false)) { sw.WriteLine("Week\tPlayerName\tPlayerId\tPoints\tPageIndex"); foreach (string line in results) { string[] lineArr = line.Split('\t'); string week = lineArr[0]; switch (week) { case "1": { week1Count++; break; } case "2": { week2Count++; break; } case "3": { week3Count++; break; } case "4": { week4Count++; break; } case "5": { week5Count++; break; } case "6": { week6Count++; break; } case "7": { week7Count++; break; } case "8": { week8Count++; break; } case "9": { week9Count++; break; } case "10": { week10Count++; break; } case "11": { week11Count++; break; } case "12": { week12Count++; break; } case "13": { week13Count++; break; } } sw.WriteLine(line); } } Console.WriteLine("Week 1 player count: {0}", week1Count); Console.WriteLine("Week 2 player count: {0}", week2Count); Console.WriteLine("Week 3 player count: {0}", week3Count); Console.WriteLine("Week 4 player count: {0}", week4Count); Console.WriteLine("Week 5 player count: {0}", week5Count); Console.WriteLine("Week 6 player count: {0}", week6Count); Console.WriteLine("Week 7 player count: {0}", week7Count); Console.WriteLine("Week 8 player count: {0}", week8Count); Console.WriteLine("Week 9 player count: {0}", week9Count); Console.WriteLine("Week 10 player count: {0}", week10Count); Console.WriteLine("Week 11 player count: {0}", week11Count); Console.WriteLine("Week 12 player count: {0}", week12Count); Console.WriteLine("Week 13 player count: {0}", week13Count); List<int> weekCounts = new List<int> { week1Count, week2Count, week3Count, week4Count, week5Count, week6Count, week7Count, week8Count, week9Count, week10Count, week11Count, week12Count, week13Count }; //ignore past weeks that have no player data and 0 player rows List<int> uniqueCounts = weekCounts.Where(w => w != 0).Distinct().ToList(); //send an alert email with information showing which week was missing data if (uniqueCounts.Count != 1) { Emailer.SendEmail("Warning: incomplete weekly scoring data detected", string.Format("Weekly players found:\r\n1: {0}\r\n2: {1}\r\n3: {2}\r\n4: {3}\r\n5: {4}\r\n6: {5}\r\n" + "7: {6}\r\n8: {7}\r\n9: {8}\r\n10: {9}\r\n11: {10}\r\n12: {11}\r\n13: {12}\r\n", week1Count, week2Count, week3Count, week4Count, week5Count, week6Count, week7Count, week8Count, week9Count, week10Count, week11Count, week12Count, week13Count)); } } catch (Exception e) { errorLogger.WriteLine(e.Message + "\r\n" + e.StackTrace); Emailer.SendEmail("Warning Error occurred during HTML Parser", e.Message + "\r\n" + e.StackTrace); } } }
private static IList <string> ColumnsHead(HtmlNode head) { #if NET20 var rows = IEnumerableExtensionMethods.ToList(head.Descendants("tr")); #else var rows = head.Descendants("tr").ToList(); #endif if (0 == rows.Count) { return(new List <string>()); } var matrix = new Matrix <string>(); foreach (var cell in rows[0].Descendants("th")) { var colspan = cell.Attributes["colspan"]; if (null == colspan) { matrix.Width++; continue; } for (var i = 0; i < XmlConvert.ToInt32(colspan.Value); i++) { matrix.Width++; } } var carry = new List <int>(); for (var i = 0; i < matrix.Width; i++) { carry.Add(0); } var y = 0; foreach (var row in rows) { var x = 0; matrix.Height++; foreach (var cell in row.Descendants("th")) { while (0 != carry[x]) { matrix[x, y] = matrix[x, y - 1]; carry[x]--; x++; } var rowspan = cell.Attributes["rowspan"]; if (null != rowspan) { carry[x] = XmlConvert.ToInt32(rowspan.Value); } var colspan = cell.Attributes["colspan"]; var name = ColumnName(cell); var index = 1; for (var i = 0; i < (null == colspan ? 1 : XmlConvert.ToInt32(colspan.Value)); i++) { matrix[x++, y] = string.Format(CultureInfo.InvariantCulture, null == colspan ? "{0}" : "{0} ({1})", name, index++); } } y++; } #if NET20 var list = new List <string>(); foreach (var element in matrix.Row(matrix.Height - 1)) { list.Add(element); } return(list); #else return(matrix.Row(matrix.Height - 1).ToList()); #endif }
/// <summary> /// Gets a memory stream representing an image from an explicit favicon location. /// </summary> /// <param name="fullURI">The URI.</param> /// <param name="ms">The memory stream (output).</param> /// <param name="message">Any error message is sent back through this string.</param> /// <returns></returns> private Uri getFromFaviconExplicitLocation(Uri fullURI, ref MemoryStream ms, ref string message) { HtmlWeb hw = new HtmlWeb(); hw.UserAgent = "Mozilla/5.0 (Windows 6.1; rv:27.0) Gecko/20100101 Firefox/27.0"; HtmlAgilityPack.HtmlDocument hdoc = null; Uri responseURI = null; try { int counter = 0; // Protection from cyclic redirect Uri nextUri = fullURI; do { // Some site needs a CookieContainer so that a now empty page is returned // try "http://www.prettygreen.com/" hw.PreRequest += request => { request.CookieContainer = new System.Net.CookieContainer(); return(true); }; // HtmlWeb.Load will follow 302 and 302 redirects to alternate URIs hdoc = hw.Load(nextUri.AbsoluteUri); responseURI = hw.ResponseUri; // Old school meta refreshes need to parsed nextUri = getMetaRefreshLink(responseURI, hdoc); counter++; } while (nextUri != null && counter < 16); // Sixteen redirects would be more than enough. } catch (Exception) { return(responseURI); } if (hdoc == null) { return(responseURI); } string faviconLocation = ""; try { HtmlNodeCollection links = hdoc.DocumentNode.SelectNodes("/html/head/link"); for (int i = 0; i < links.Count; i++) { HtmlNode node = links[i]; try { HtmlAttribute r = node.Attributes["rel"]; if (r.Value.ToLower().CompareTo("shortcut icon") == 0 || r.Value.ToLower().CompareTo("icon") == 0) { try { faviconLocation = node.Attributes["href"].Value; break; } catch (Exception) { } } } catch (Exception) { } } } catch (Exception) { } if (String.IsNullOrEmpty(faviconLocation)) { return(responseURI); } return((getFavicon(new Uri(responseURI, faviconLocation), ref ms, ref message))?new Uri("http://success"):responseURI); }
// 下载西刺代理的html页面 public void Downloadxicidaili(object DATA) { try { List <string> list = new List <string>() { "http://www.xicidaili.com/nt/", "http://www.xicidaili.com/nn/", "http://www.xicidaili.com/wn/", "http://www.xicidaili.com/wt/" }; foreach (var utlitem in list) { for (int i = 1; i < 5; i++) { string url = utlitem + i.ToString(); //var ipProxy = PoolManageService.GetProxy(); //if (string.IsNullOrEmpty(ipProxy)) //{ // LogUtils.ErrorLog(new Exception("Ip代理池暂无可用代理IP")); // return; //} //var ip = ipProxy; //WebProxy webproxy; //if (ipProxy.Contains(":")) //{ // ip = ipProxy.Split(new[] { ':' })[0]; // var port = int.Parse(ipProxy.Split(new[] { ':' })[1]); // webproxy = new WebProxy(ip, port); //} //else //{ // webproxy = new WebProxy(ip); //} string html = HttpHelper.DownloadHtml(url, null); if (string.IsNullOrEmpty(html)) { LogUtils.ErrorLog(new Exception("代理地址:" + url + " 访问失败")); continue; } HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); HtmlNode node = doc.DocumentNode; string xpathstring = "//tr[@class='odd']"; HtmlNodeCollection collection = node.SelectNodes(xpathstring); foreach (var item in collection) { var proxy = new IpProxy(); string xpath = "td[2]"; proxy.Address = item.SelectSingleNode(xpath).InnerHtml; xpath = "td[3]"; proxy.Port = int.Parse(item.SelectSingleNode(xpath).InnerHtml); Task.Run(() => { PoolManageService.Add(proxy); }); } } } } catch (Exception e) { LogUtils.ErrorLog(new Exception("下载西刺代理IP池出现故障", e)); } }
private static IEnumerable<HtmlNode> NodesBeforeSelfImpl(HtmlNode node) { while ((node = node.PreviousSibling) != null) yield return node; }
public string ReturnLink(HtmlNode line) { var element = ReturnPartOfRow(line, 2).Elements("div").ToList()[2].Element("a").Attributes["href"].Value; return($@"https://www.traseo.pl{element}"); }
public static string Sanitize(string html, out bool imagesAreBlocked, Options options = null) { imagesAreBlocked = false; _options = options ?? Options.Default; if (string.IsNullOrEmpty(html)) { return(string.Empty); } var doc = new HtmlDocument(); doc.LoadHtml(html); // ReSharper disable UnusedVariable var encoding = doc.Encoding; // ReSharper restore UnusedVariable var baseTag = doc.DocumentNode.SelectSingleNode("//base"); Uri baseHref = null; if (baseTag != null && baseTag.HasAttributes) { var href = baseTag.Attributes .FirstOrDefault(attr => attr.Name == "href"); if (href != null) { try { var url = new Uri(href.Value); if (url.Scheme == Uri.UriSchemeHttp || url.Scheme == Uri.UriSchemeHttps || url.Scheme == Uri.UriSchemeFtp) { baseHref = url; } } catch (Exception) { // Skip } } } var styleTag = doc.DocumentNode.SelectSingleNode("//style"); _styleClassesNames = new Dictionary <string, string>(); if (styleTag != null) { var classes = StyleTagPattern.Matches(styleTag.OuterHtml); var newValue = string.Empty; foreach (Match cssClass in classes) { var val = cssClass.Groups[2].Value; if (string.IsNullOrEmpty(val)) // Skip empty values { continue; } var classesNames = cssClass.Groups[1].Value .Split(new[] { " ", "," }, StringSplitOptions.RemoveEmptyEntries) .ToList(); classesNames .Where(s => s.StartsWith(".")) .Select(s => s.Remove(0, 1)) .ToList() .ForEach(s => { if (!_styleClassesNames.ContainsKey(s)) { _styleClassesNames.Add(s, s.Insert(0, "x_")); } }); var cleanStyle = ParseStyles(val, baseHref); if (string.IsNullOrEmpty(newValue)) { newValue = styleTag.OuterHtml; } newValue = newValue.Replace(val, cleanStyle); } if (_styleClassesNames.Count > 0) { if (string.IsNullOrEmpty(newValue)) { newValue = styleTag.OuterHtml; } // must change css classes _styleClassesNames .ToList() .ForEach(dict => { if (newValue.IndexOf("." + dict.Key, StringComparison.Ordinal) > -1) { newValue = newValue.Replace("." + dict.Key, "." + dict.Value); } }); } if (!string.IsNullOrEmpty(newValue)) { var newNode = HtmlNode.CreateNode(newValue); styleTag.ParentNode.ReplaceChild(newNode.ParentNode, styleTag); } } var nodesToRemove = new List <HtmlNode>(); _imagesAreBlocked = false; SanitizeNode(doc.DocumentNode, nodesToRemove, baseHref); nodesToRemove .ForEach(node => node.Remove()); imagesAreBlocked = _imagesAreBlocked; return(doc.DocumentNode.OuterHtml); }
/// <summary> /// Get the native-done web string. /// </summary> /// <param name="node">htmlRoot</param> /// <returns></returns> private string GetContent(HtmlNode node) { return(node.ContainsFormat("div", "class", "card") ? NativeStringConnect(node, node.GetHtmlFormat("div", "class", "card")) : ConnectString(RemoveString(node))); }
public string ReturnDistance(HtmlNode line) { return(ReturnDataFromInfoNode(line, 2)); }
private bool IfCanGetContent(HtmlNode node) { return(node.ContainsFormat("div", "class", "card") ? true : false); }
private string GetSectionByClass(HtmlNode node, string className) { return(node.GetHtmlFormat("section", "class", className)); }
public HtmlNode ReturnPartOfRow(HtmlNode line, int index) { return(line.Elements("div").ToList()[index]); }
private void AppendFromHtml(HtmlNode node, Paragraph paragraph, Span span) { switch (node.Name.ToLower()) { case "p": AppendSpan(node, paragraph, span, node.Name); AppendLineBreak(node, paragraph, span, false); break; case "blockquote": AppendSpan(node, paragraph, span, node.Name); break; case "h1": case "h2": case "h3": case "ul": AppendSpan(node, paragraph, span, node.Name); AppendLineBreak(node, paragraph, span, false); break; case "i": AppendItalic(node, paragraph, span); break; case "b": case "strong": AppendBold(node, paragraph, span); break; case "u": AppendUnderline(node, paragraph, span); break; case "#text": case "span": AppendRun(node, paragraph, span); break; case "a": AppendHyperlink(node, paragraph, span); break; case "li": AppendRun(node, paragraph, span); AppendSpan(node, paragraph, span, node.Name); AppendLineBreak(node, paragraph, span, false); break; case "br": AppendLineBreak(node, paragraph, span, true); break; case "image": case "img": AppendImage(node, paragraph); break; default: Debug.WriteLine(String.Format("Element {0} not implemented", node.Name)); break; } foreach (var childNode in node.ChildNodes) { AppendChildren(childNode, paragraph, span); } }
public string ReturnDificulty(HtmlNode line) { return(ReturnPartOfRow(line, 5).Element("span")?.Attributes["title"].Value.Trim()); }
public MangaObject ParseMangaObject(String content) { HtmlDocument MangaObjectDocument = new HtmlDocument(); MangaObjectDocument.LoadHtml(content); String MangaCoverPrime = MangaObjectDocument.GetElementbyId("mangaimg").SelectSingleNode(".//img").Attributes["src"].Value; Regex MangaCoverRegex = new Regex(@"(\d+)\.jpg"); Int32 MangaCoverInt = Int32.Parse(MangaCoverRegex.Match(MangaCoverPrime).Groups[1].Value); List <String> MangaCovers = new List <String>(MangaCoverInt + 1); List <LocationObject> Covers = new List <LocationObject>(); for (Int32 mcI = 0; mcI <= MangaCoverInt; ++mcI) { Covers.Add(new LocationObject() { Url = MangaCoverRegex.Replace(MangaCoverPrime, String.Format("{0}.jpg", mcI)), ExtensionName = ExtensionDescriptionAttribute.Name, ExtensionLanguage = ExtensionDescriptionAttribute.Language }); } Covers.TrimExcess(); HtmlNode MangaProperties = MangaObjectDocument.GetElementbyId("mangaproperties").SelectSingleNode(".//table"), ChapterListing = MangaObjectDocument.GetElementbyId("listing"), MangaDesciption = MangaObjectDocument.GetElementbyId("readmangasum").SelectSingleNode(".//p"); String MangaName = HtmlEntity.DeEntitize(MangaProperties.SelectSingleNode(".//tr[1]/td[2]/h2").InnerText), ReadDirection = MangaProperties.SelectSingleNode(".//tr[7]/td[2]").InnerText, ReleaseYear = Regex.Match(MangaProperties.SelectSingleNode(".//tr[3]/td[2]").InnerText, @"\d+").Value, Release = String.Format("01/01/{0}", String.IsNullOrWhiteSpace(ReleaseYear) ? "0001" : ReleaseYear), Desciption = MangaDesciption != null ? MangaDesciption.InnerText : String.Empty; MangaObjectType MangaType = MangaObjectType.Unknown; FlowDirection PageFlowDirection = FlowDirection.RightToLeft; switch (ReadDirection.ToLower()) { default: MangaType = MangaObjectType.Unknown; PageFlowDirection = FlowDirection.RightToLeft; break; case "right to left": MangaType = MangaObjectType.Manga; PageFlowDirection = FlowDirection.RightToLeft; break; case "left to right": MangaType = MangaObjectType.Manhwa; PageFlowDirection = FlowDirection.LeftToRight; break; } String[] AlternateNames = MangaProperties.SelectSingleNode(".//tr[2]/td[2]").InnerText.Split(new String[] { ", " }, StringSplitOptions.RemoveEmptyEntries), Authors = MangaProperties.SelectSingleNode(".//tr[5]/td[2]").InnerText.Split(new String[] { ", " }, StringSplitOptions.RemoveEmptyEntries), Artists = MangaProperties.SelectSingleNode(".//tr[6]/td[2]").InnerText.Split(new String[] { ", " }, StringSplitOptions.RemoveEmptyEntries), Genres = (from HtmlNode GenreNode in MangaProperties.SelectSingleNode(".//tr[8]/td[2]").SelectNodes(".//span[contains(@class,'genretags')]") select HtmlEntity.DeEntitize(GenreNode.InnerText)).ToArray(); ChapterObject[] Chapters = (from HtmlNode ChapterNode in ChapterListing.SelectNodes(".//tr[not(contains(@class,'table_head'))]") select new ChapterObject() { Name = HtmlEntity.DeEntitize(ChapterNode.SelectSingleNode(".//td[1]").LastChild.InnerText.Substring(3).Trim()), Chapter = UInt32.Parse(ChapterNode.SelectSingleNode(".//td[1]/a").InnerText.Substring(ChapterNode.SelectSingleNode(".//td[1]/a").InnerText.LastIndexOf(' ') + 1)), Locations = { new LocationObject() { ExtensionName = ExtensionDescriptionAttribute.Name, ExtensionLanguage = ExtensionDescriptionAttribute.Language, Url = String.Format("{0}{1}", ExtensionDescriptionAttribute.RootUrl, ChapterNode.SelectSingleNode(".//td[1]/a").Attributes["href"].Value) } }, Released = DateTime.ParseExact(ChapterNode.SelectSingleNode(".//td[2]").InnerText, "MM/dd/yyyy", CultureInfo.InvariantCulture) }).ToArray(); return(new MangaObject() { Name = HtmlEntity.DeEntitize(MangaName), MangaType = MangaType, PageFlowDirection = PageFlowDirection, Description = HtmlEntity.DeEntitize(Desciption), AlternateNames = AlternateNames.ToList(), CoverLocations = Covers, Authors = (from Author in Authors select HtmlEntity.DeEntitize(Author)).ToList(), Artists = (from Artist in Artists select HtmlEntity.DeEntitize(Artist)).ToList(), Genres = Genres.ToList(), Released = DateTime.ParseExact(Release, "MM/dd/yyyy", CultureInfo.InvariantCulture), Chapters = Chapters.ToList() }); }
public string ReturnLocation(HtmlNode line) { return(ReturnPartOfRow(line, 5).Element("a") != null?ReturnPartOfRow(line, 5).Element("a").InnerText.Trim() : null); }
/// <summary> /// A lot of things going on inside: gets current gpu driver, fetches latest gpu driver from NVIDIA server and fetches download link for latest drivers. /// </summary> private static void GpuInfo() { Console.Write("Retrieving GPU information . . . "); int error = 0; string processURL = null; string confirmURL = null; string gpuURL = null; string gpuName = null; // query local driver version try { while (string.IsNullOrEmpty(gpuName)) { gpuName = SettingManager.ReadSetting("GPU Name"); if (string.IsNullOrEmpty(gpuName)) { SettingManager.SetupSetting("GPU Name"); } } ManagementObjectSearcher objectSearcher = new ManagementObjectSearcher("SELECT * FROM Win32_VideoController"); // TODO: this is not the optimal code foreach (ManagementObject obj in objectSearcher.Get()) { if (obj["Description"].ToString() == gpuName) { OfflineGPUVersion = obj["DriverVersion"].ToString().Replace(".", string.Empty).Substring(5); OfflineGPUVersion = OfflineGPUVersion.Substring(0, 3) + "." + OfflineGPUVersion.Substring(3); // add dot break; } else { // gpu not found } } } catch (Exception ex) { error++; OfflineGPUVersion = "000.00"; Console.Write("ERROR!"); LogManager.Log(ex.ToString(), LogManager.Level.ERROR); Console.WriteLine(); Console.WriteLine(ex.StackTrace); } /// In order to proceed, we must input what GPU we have. /// Looking at the supported products on NVIDIA website for desktop and mobile GeForce series, /// we can see that they're sharing drivers with other GPU families, the only thing we have to do is tell the website /// if we're running a mobile or desktop GPU. int psID = 0; int pfID = 0; /// Get correct gpu drivers: /// you do not have to choose the exact GPU, /// looking at supported products, we see that the same driver package includes /// drivers for the majority GPU family. if (gpuName.Contains("M")) { // mobile | notebook psID = 99; // GeForce 900M-series (M for Mobile) pfID = 758; // GTX 970M } else { // desktop psID = 98; // GeForce 900-series pfID = 756; // GTX 970 } // finish request try { gpuURL = "http://www.nvidia.com/Download/processDriver.aspx?psid=" + psID.ToString() + "&pfid=" + pfID.ToString() + "&rpf=1&osid=" + osID.ToString() + "&lid=" + langID.ToString() + "&ctk=0"; WebClient client = new WebClient(); Stream stream = client.OpenRead(gpuURL); StreamReader reader = new StreamReader(stream); processURL = reader.ReadToEnd(); reader.Close(); stream.Close(); } catch (Exception ex) { if (error == 0) { Console.Write("ERROR!"); Console.WriteLine(); error++; } Console.WriteLine(ex.StackTrace); } try { // HTMLAgilityPack // thanks to http://www.codeproject.com/Articles/691119/Html-Agility-Pack-Massive-information-extraction-f for a great article HtmlWeb htmlWeb = new HtmlWeb(); HtmlAgilityPack.HtmlDocument htmlDocument = htmlWeb.Load(processURL); // get version HtmlNode tdVer = htmlDocument.DocumentNode.Descendants().SingleOrDefault(x => x.Id == "tdVersion"); OnlineGPUVersion = tdVer.InnerHtml.Trim().Substring(0, 6); // get release date HtmlNode tdReleaseDate = htmlDocument.DocumentNode.Descendants().SingleOrDefault(x => x.Id == "tdReleaseDate"); var dates = tdReleaseDate.InnerHtml.Trim(); // not the best code, but does the job, might come back to cleanup in the future int status = 0; int year = 0; int month = 0; int day = 0; foreach (var substring in dates.Split('.')) { status++; // goes up starting from 1, being the year, followed by month then day. switch (status) { // year case 1: year = Convert.ToInt32(substring); break; // month case 2: month = Convert.ToInt32(substring); break; // day case 3: day = Convert.ToInt32(substring); break; default: LogManager.Log("The status: '" + status + "' is not a recognized status!", LogManager.Level.ERROR); break; } } releaseDate = new DateTime(year, month, day); // follows the ISO 8601 standard IEnumerable <HtmlNode> links = htmlDocument.DocumentNode.Descendants("a").Where(x => x.Attributes.Contains("href")); foreach (var link in links) { // get driver URL if (link.Attributes["href"].Value.Contains("/content/DriverDownload-March2009/")) { confirmURL = "http://www.nvidia.com" + link.Attributes["href"].Value.Trim(); } // get release notes URL if (link.Attributes["href"].Value.Contains("release-notes.pdf")) { pdfURL = link.Attributes["href"].Value.Trim(); } } if (pdfURL == null) { if (psID == 98) // if desktop { pdfURL = "http://us.download.nvidia.com/Windows/" + OnlineGPUVersion + "/" + OnlineGPUVersion + "-win10-win8-win7-desktop-release-notes.pdf"; } else { pdfURL = "http://us.download.nvidia.com/Windows/" + OnlineGPUVersion + "/" + OnlineGPUVersion + "-win10-win8-win7-notebook-release-notes.pdf"; } LogManager.Log("No release notes found, but a link to the notes has been crafted by following the template Nvidia uses.", LogManager.Level.INFO); } // get driver desc releaseDesc = htmlDocument.DocumentNode.SelectSingleNode("//div[@id='tab1_content']").InnerHtml.Trim(); releaseDesc = HtmlToText.ConvertHtml(releaseDesc + ".", gpuName.Contains("M")); // Remove not needed information if (psID == 98) // desktop { releaseDesc = releaseDesc.Substring(297, releaseDesc.Length - 297).Trim(); } else // mobile { releaseDesc = releaseDesc.Substring(878, releaseDesc.Length - 878).Trim(); } // get download link htmlDocument = htmlWeb.Load(confirmURL); links = htmlDocument.DocumentNode.Descendants("a").Where(x => x.Attributes.Contains("href")); foreach (var link in links) { if (link.Attributes["href"].Value.Contains("download.nvidia")) { downloadURL = link.Attributes["href"].Value.Trim(); break; // don't need to keep search after we've found what we searched for } } } catch (Exception ex) { OnlineGPUVersion = "000.00"; LogManager.Log(ex.Message, LogManager.Level.ERROR); if (error == 0) { Console.Write("ERROR!"); Console.WriteLine(); error++; } Console.WriteLine(ex.StackTrace); } if (error == 0) { Console.Write("OK!"); Console.WriteLine(); } if (debug == true) { Console.WriteLine("gpuURL: " + gpuURL); Console.WriteLine("processURL: " + processURL); Console.WriteLine("confirmURL: " + confirmURL); Console.WriteLine("downloadURL: " + downloadURL); Console.WriteLine("pdfURL: " + pdfURL); Console.WriteLine("releaseDate: " + releaseDate.ToShortDateString()); Console.WriteLine("OfflineGPUVersion: " + OfflineGPUVersion); Console.WriteLine("OnlineGPUVersion: " + OnlineGPUVersion); } }
public string ReturnAuthor(HtmlNode line) { return(ReturnPartOfRow(line, 4)?.InnerText.Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("\r", string.Empty).Trim()); }
private void GetList(IListSheet listSheet, string pageSourceDir, ExcelWriter resultEW) { for (int i = 0; i < listSheet.RowCount; i++) { Dictionary <string, string> row = listSheet.GetRow(i); if (row["giveUpGrab"] != "是") { string pageUrl = listSheet.PageUrlList[i]; string localFilePath = this.RunPage.GetFilePath(pageUrl, pageSourceDir); string productCode = row["productCode"]; string productName = row["productName"]; string productCurrentPrice = row["productCurrentPrice"]; string productOldPrice = row["productOldPrice"]; string category1Code = row["category1Code"]; string category2Code = row["category2Code"]; string category3Code = row["category3Code"]; string category1Name = row["category1Name"]; string category2Name = row["category2Name"]; string category3Name = row["category3Name"]; string standard = row["standard"]; int totalCommentCount = 0; Nullable <decimal> hPer = null; TextReader tr = null; try { tr = new StreamReader(localFilePath); string webPageHtml = tr.ReadToEnd(); HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument(); htmlDoc.LoadHtml(webPageHtml); HtmlNode commentNode = htmlDoc.DocumentNode.SelectSingleNode("//div[@class=\"comment\"]"); HtmlNode pointNode = commentNode.SelectSingleNode("./div[@class=\"point\"]"); if (pointNode != null) { string str = pointNode.InnerText.Trim().Replace(" ", ""); hPer = decimal.Parse(str.Substring(0, str.Length - 1)); } HtmlNode countNode = commentNode.SelectSingleNode("./div[@class=\"count\"]/font"); if (countNode != null) { string str = countNode.InnerText.Trim().Replace(" ", ""); totalCommentCount = int.Parse(str); } } catch (Exception ex) { this.RunPage.InvokeAppendLogText("读取出错. url = " + pageUrl + ". " + ex.Message, LogLevelType.Error, true); throw ex; } Dictionary <string, object> f2vs = new Dictionary <string, object>(); f2vs.Add("商品名称", productName); if (!CommonUtil.IsNullOrBlank(productCurrentPrice)) { f2vs.Add("价格", decimal.Parse(productCurrentPrice)); } f2vs.Add("一级分类", category1Name); f2vs.Add("二级分类", category2Name); f2vs.Add("三级分类", category3Name); f2vs.Add("规格", standard); f2vs.Add("评论数", totalCommentCount); if (hPer != null) { f2vs.Add("满意度", hPer); } f2vs.Add("url", pageUrl); if (!CommonUtil.IsNullOrBlank(productOldPrice)) { f2vs.Add("原价", decimal.Parse(productOldPrice)); } f2vs.Add("商品编码", productCode); f2vs.Add("一级分类编码", category1Code); f2vs.Add("二级分类编码", category2Code); f2vs.Add("三级分类编码", category3Code); resultEW.AddRow(f2vs); } } }
private void ProcessElementNode(StringBuilder sourceDocument, HtmlNode node) { //TODO: there are whitespace incosistencies with the use of mulitple source documents, attempt to adhere to whitespace from template var isTemplateNode = node.Name == "template"; var attributes = node.Attributes; var attributeText = new StringBuilder(); string loop = null; string ifCheck = null; string elseIfCheck = null; bool elseCheck = false; string padding = string.Empty; var getPadding = false; foreach (var attribute in attributes) { if (attribute.Name == "v-for") { loop = attribute.Value; getPadding = true; } else if (attribute.Name == "v-if" || attribute.Name == "v-show") { ifCheck = attribute.Value; getPadding = true; } else if (attribute.Name == "v-else-if") { elseIfCheck = attribute.Value; getPadding = true; } else if (attribute.Name == "v-else") { elseCheck = true; getPadding = true; } else if (attribute.Name.StartsWith(":") || attribute.Name.StartsWith("v-bind:")) { var attributeName = attribute.Name.Substring(attribute.Name.IndexOf(":") + 1); var attributeValue = $"\" + ({attribute.Value}) + \""; attributeText.Append($" {attributeName}=\\\"{attributeValue}\\\""); } else { attributeText.Append($" {attribute.Name}=\\\"{attribute.Value}\\\""); } } //TODO: error out if more than one of "if, elseif, else" attributes are on the same element if (getPadding) { var precedingCode = template.Substring(0, node.StreamPosition); var lastNewline = precedingCode.LastIndexOf('\n'); if (lastNewline > -1) { padding = precedingCode.Substring(lastNewline); } if (padding.Length > 0) { // TODO: padding logic needs to be reconsidered with existence of multiple string builders //remove padding to avoid duplication, 12 chars for Write(@"");\n - TODO: what if \r\n ? //sourceDocument.Remove(sourceDocument.Length - (padding.Length + 12), padding.Length + 12); } } if (loop != null) { sourceDocument.AppendLine($"foreach({loop}){{"); } if (ifCheck != null) { sourceDocument.AppendLine($"if({ifCheck}){{"); } //TODO: need to ensure no white space is written before elseIf and else if (elseIfCheck != null) { sourceDocument.AppendLine($"else if({elseIfCheck}){{"); } if (elseCheck) { sourceDocument.AppendLine($"else{{"); } if (getPadding) { //sourceDocument.AppendLine($"Write(@\"{padding}\");"); } var children = node.ChildNodes; if (children.Count() > 0 || //prevent tags with empty elements from becoming self-closing template.IndexOf("><", node.StreamPosition) == template.IndexOf(">", node.StreamPosition)) { if (!isTemplateNode) { sourceDocument.AppendLine($"Write(\"<{node.Name}{attributeText}>\");"); } foreach (var child in children) { ProcessNode(sourceDocument, child); } if (!isTemplateNode) { sourceDocument.AppendLine($"Write(\"</{node.Name}>\");"); } } else if (!isTemplateNode) { sourceDocument.AppendLine($"Write(\"<{node.Name}{attributeText}/>\");"); } if (ifCheck != null || elseIfCheck != null || elseCheck) { sourceDocument.AppendLine($"}}"); } if (loop != null) { sourceDocument.AppendLine($"}}"); } }
private static void ProcessInline(Span parent, HtmlNode node, InlineCollection collection, TextBlock root) { InlineCollection parentCollection = parent != null ? parent.Inlines : collection; switch (node.NodeType) { case HtmlNodeType.Text: parentCollection.Add( new Run { Text = HtmlEntity.DeEntitize(node.InnerText) } ); break; case HtmlNodeType.Element: switch (node.Name) { case "br": //don't need LineBreak because Inlines already breaking lines return; case "script": //Tag script don't supported. return; case "b": case "strong": var bold = new Bold(); parentCollection.Add(bold); foreach (var boldChild in node.ChildNodes) { ProcessInline(bold, boldChild, collection, root); } return; case "u": var underline = new Underline(); parentCollection.Add(underline); foreach (var underlineChild in node.ChildNodes) { ProcessInline(underline, underlineChild, collection, root); } return; case "a": var url = node.Attributes.FirstOrDefault(a => a.Name.ToLower() == "href"); if (url == null) { return; } Hyperlink hyperlink = null; if (!string.IsNullOrEmpty(url.Value) && (url.Value.StartsWith("https://www.anilibria.tv/release/") || url.Value.StartsWith("http://www.anilibria.tv/release/"))) { var urlValue = url.Value; hyperlink = new Hyperlink { Foreground = new SolidColorBrush(Color.FromArgb(255, 163, 39, 39)) }; hyperlink.Click += (sender, args) => { var linkCommand = GetLinkCommand(root); if (linkCommand != null) { linkCommand.Execute(urlValue); } }; } else { hyperlink = new Hyperlink { NavigateUri = new Uri(url.Value), Foreground = new SolidColorBrush(Color.FromArgb(255, 163, 39, 39)) }; } parentCollection.Add(hyperlink); foreach (var underlineChild in node.ChildNodes) { ProcessInline(hyperlink, underlineChild, collection, root); } return; default: return; } default: return; } }
private void ProcessCss(HtmlDocument doc) { var links = doc.DocumentNode.SelectNodes("//link"); if (links == null) { return; } foreach (var link in links) { var url = link.Attributes["href"]?.Value; var rel = link.Attributes["rel"]?.Value; if (rel != "stylesheet") { continue; } var originalUrl = url; if (url == null) { continue; } string cssText; try { if (url.StartsWith("http")) { using (var http = new WebClient()) { cssText = http.DownloadString(url); } } else if (url.StartsWith("file:///")) { url = url.Substring(8); cssText = File.ReadAllText(WebUtility.UrlDecode(url)); } else // Relative Path { var uri = new Uri(BaseUri, url); url = uri.AbsoluteUri; if (url.StartsWith("http") && url.Contains("://")) { using (var http = new WebClient()) { cssText = http.DownloadString(url); } } else { cssText = File.ReadAllText(uri.LocalPath); } } } catch { // Error occurred retrieving a file - continue processing continue; } cssText = ProcessUrls(cssText, url); if (CreateExternalFiles) { var justFilename = Path.GetFileName(url); string justExt = Path.GetExtension(url); if (string.IsNullOrEmpty(justExt)) { justFilename = DataUtils.GenerateUniqueId(10) + ".css"; } var fullPath = Path.Combine(OutputPath, justFilename); File.WriteAllText(fullPath, cssText); link.Attributes["href"].Value = justFilename; } else { var el = new HtmlNode(HtmlNodeType.Element, doc, ctr++); el.Name = "style"; el.InnerHtml = Environment.NewLine + cssText + Environment.NewLine; link.ParentNode.InsertAfter(el, link); link.Remove(); el = null; } } }
private static int deEntitizeAndParse(HtmlNode node) { return(int.Parse(node.GetInnerText())); }
private void ConvertToText(HtmlNode node, TextWriter outText) { if (hasContentEnd) { return; } string html; switch (node.NodeType) { case HtmlNodeType.Comment: // don't output comments break; case HtmlNodeType.Document: ConvertContentTo(node, outText); break; case HtmlNodeType.Text: // script and style must not be output string parentName = node.ParentNode.Name; if ((parentName == "script") || (parentName == "style")) { break; } // get text html = ((HtmlTextNode)node).Text; // is it in fact a special closing node output as text? if (HtmlNode.IsOverlappedClosingElement(html)) { break; } // check the text is meaningful and not a bunch of whitespaces if (html.Trim().Length > 0) { outText.Write(HtmlEntity.DeEntitize(html)); } break; case HtmlNodeType.Element: bool isHeading = false, isList = false, isCode = false; switch (node.Name) { case "pre": isCode = true; outText.Write("\r\n^\r\n"); break; case "ol": case "ul": isList = true; outText.Write("\r\n⌐\r\n"); break; case "li": outText.Write("\r\n● "); break; case "div": outText.Write("\r\n"); if (hasH1 && !hasContentEnd) { var css = node.getAttribute("class"); if (css != null && css.Length > 0) { bool is_end_content = DIV_CLASS_END.Where(x => css.IndexOf(x) != -1).Count() > 0; if (is_end_content) { hasContentEnd = true; } } } break; case "p": outText.Write("\r\n"); break; case "h2": case "h3": case "h4": case "h5": case "h6": isHeading = true; outText.Write("\r\n■ "); break; case "h1": hasH1 = true; outText.Write("\r\n{H1}\r\n"); break; case "img": var src = node.getAttribute("src"); if (!string.IsNullOrEmpty(src)) { outText.Write("\r\n{IMG-" + src + "-IMG}\r\n"); } break; } if (node.HasChildNodes) { ConvertContentTo(node, outText); } if (isHeading) { outText.Write("\r\n"); } if (isList) { outText.Write("\r\n┘\r\n"); } if (isCode) { outText.Write("\r\nⱽ\r\n"); } break; } }
private static IEnumerable<HtmlNode> DescendantsImpl(HtmlNode node) { Debug.Assert(node != null); foreach (var child in node.ChildNodes) { yield return child; foreach (var descendant in child.Descendants()) yield return descendant; } }
/// <summary> /// 获取主站数据列表 /// </summary> /// <param name="mainUrl"></param> /// <returns></returns> private List <OpenCode8DTModel> GetOpenListFromMainUrl(string mainUrl) { var result = new List <OpenCode8DTModel>(); try { var url = new Uri(mainUrl); var htmlResource = NetHelper.GetUrlResponse(mainUrl, Encoding.GetEncoding("gb2312")); if (htmlResource == null) { return(result); } var doc = new HtmlDocument(); doc.LoadHtml(htmlResource); var table = doc.DocumentNode.SelectSingleNode("//table"); if (table == null) { return(result); } var trs = table.ChildNodes.Where(node => node.Name == "tr").ToList(); OpenCode8DTModel model = null; HtmlNode nodeA = null; var optimizeUrl = string.Empty; for (var i = 2; i < trs.Count; i++) //第一二行为表头 { var trstyle = trs[i].Attributes["style"]; if (trstyle != null && trstyle.Value == "display:none") { continue; } var tds = trs[i].ChildNodes.Where(node => node.Name == "td").ToList(); if (tds.Count < 8) { continue; } model = new OpenCode8DTModel(); nodeA = tds[0].ChildNodes.Where(n => n.Name == "a").FirstOrDefault(); if (nodeA == null) { continue; } model.Term = Convert.ToInt64(nodeA.InnerText.Trim()); optimizeUrl = nodeA.Attributes["href"].Value; model.DetailUrl = new Uri(url, optimizeUrl).AbsoluteUri; model.OpenTime = Convert.ToDateTime(tds[9].InnerText); if (tds[1].ChildNodes.Count == 0) { continue; } var opencodeNode = tds[1].ChildNodes.Where(n => n.Name.ToLower() == "i").ToList(); if (opencodeNode.Count < 5) { continue; } model.OpenCode1 = Convert.ToInt32(opencodeNode[0].InnerText.Trim()); model.OpenCode2 = Convert.ToInt32(opencodeNode[1].InnerText.Trim()); model.OpenCode3 = Convert.ToInt32(opencodeNode[2].InnerText.Trim()); model.OpenCode4 = Convert.ToInt32(opencodeNode[3].InnerText.Trim()); model.OpenCode5 = Convert.ToInt32(opencodeNode[4].InnerText.Trim()); model.OpenCode6 = Convert.ToInt32(opencodeNode[5].InnerText.Trim()); model.OpenCode7 = Convert.ToInt32(opencodeNode[6].InnerText.Trim()); model.OpenCode8 = Convert.ToInt32(opencodeNode[7].InnerText.Trim()); var details = GetKaijiangDetails(tds); model.Spare = details; result.Add(model); } var checkDataHelper = new CheckDataHelper(); var dbdata = services.GetListS <OpenCode8DTModel>(currentLottery) .ToDictionary(w => w.Term.ToString(), w => w.GetCodeStr()); checkDataHelper.CheckData(dbdata, result.ToDictionary(w => w.Term.ToString(), w => w.GetCodeStr()), Config.Area, currentLottery); result = result.OrderByDescending(S => S.Term).ToList(); } catch (Exception ex) { log.Error(GetType(), string.Format("【{0}】通过主站点抓取开奖列表时发生错误,错误信息【{1}】", Config.Area + currentLottery, ex.Message)); } return(result); }
public string ReturnDuration(HtmlNode line) { return(ReturnDataFromInfoNode(line, 3)); }
private string GetDivByClass(HtmlNode node, string className) { return(node.GetHtmlFormat("div", "class", className)); }
public string ReturnTitle(HtmlNode line) { var title = ReturnPartOfRow(line, 0).Element("h3").Element("a").InnerText.Trim(); return(title); }
public double ReturnRating(HtmlNode line) { var rating = ReturnDataFromInfoNode(line, 1).Replace(".", ","); return(Convert.ToDouble(rating)); }
int countOccurencesOfText(IContentCommon c, string find, bool caseSensitive, bool searchHtmlSource) { int count = 0; if (c is ContentBase) { count += countOccurencesOfText(((ContentBase)c).Name, find, caseSensitive); } foreach (object p in c.GetAllPropertyValues(PropertyBaseClass.LongTextProperty)) { try { LongStringPropertyValue pv = (LongStringPropertyValue)p; count += countOccurencesOfText(pv.Value, find, caseSensitive); } catch { } } foreach (object p in c.GetAllPropertyValues(PropertyBaseClass.HTMLProperty)) { HTMLPropertyValue pv = (HTMLPropertyValue)p; if (!searchHtmlSource) { string textOnly = new HtmlNode(pv.Value, HtmlFilter.TextFilter).ToString(); count += countOccurencesOfText(textOnly, find, caseSensitive); } else { count += countOccurencesOfText(pv.Value, find, caseSensitive); } } foreach (object p in c.GetAllPropertyValues(PropertyBaseClass.ShortTextProperty)) { if (p is ShortStringPropertyValue) { ShortStringPropertyValue pv = (ShortStringPropertyValue)p; count += countOccurencesOfText(pv.Value, find, caseSensitive); } } foreach (IInnerContentsPropertyValue icp in c.GetAllPropertyValues(PropertyBaseClass.InnerContents)) { foreach (IInnerContent ic in icp.GetAllContents()) { count += countOccurencesOfText(ic, find, caseSensitive, searchHtmlSource); } } return count; }
public string ReturnDataFromInfoNode(HtmlNode line, int index) { return(ReturnPartOfRow(line, 2).Element("div").Elements("div").ToList()[index].InnerText); }