private void Parse() { HtmlNode description = null; // Parse the <title /> of the web page and set it to Name // Parse the <meta name="Description" /> foreach (var n in htmlDocument.DocumentNode.ChildNodes) { if (n.Name == "html") { try { HtmlNode name = n.FirstChild.FirstChild.NextSibling.NextSibling; Name = name.InnerText; } catch { Name = ""; try { HtmlNodeCollection names = n.FirstChild.NextSibling.ChildNodes; foreach (var v in names) { if (v.Name == "title") { Name = v.InnerText; description = v.NextSibling; break; } } } catch { Name = ""; } } try { HtmlNode description2 = n.FirstChild.FirstChild.NextSibling.NextSibling.NextSibling.NextSibling; if (description2.Name == "meta" && description2.Attributes["name"].Value == "Description") { Description = description2.Attributes["content"].Value; } else { Description = ""; } } catch { Description = ""; try { //description = n.FirstChild.ChildNodes; if (description != null) { //foreach (var i in description) HtmlNode i = description.NextSibling; if (i.Name == "meta" && i.HasAttributes && i.Attributes["name"].Value == "Description") { Description = i.Attributes["content"].Value; } else { Description = ""; } } else { Description = ""; } } catch { Description = ""; } } break; } } // For parsing the prices the first two tags we need to look for are. // <div id="prices" class="gf-table-wrapper sfe-break-bottom-16"> // <table id="historical_price" class="gf-table"> HtmlNode nodePrices = null; HtmlNode historicalPricesNode = htmlDocument.GetElementbyId("historical_price"); HtmlNodeCollection historicalPricesNode_ChildNodes = historicalPricesNode.ChildNodes; foreach (var tbodyChild in historicalPricesNode_ChildNodes) { if (tbodyChild.Name == "tbody") { // We know that we have now found the table's body. nodePrices = tbodyChild; break; } } // if tbody element was not found then we must be getting the htmldocument // from the web. if (nodePrices == null) { nodePrices = htmlDocument.GetElementbyId("historical_price"); } foreach (var p in nodePrices.ChildNodes) { if (p.Name == "tr" && !p.HasAttributes) { // TODO: we could do some cleaning up here. string inner = p.InnerText.Trim(new char[] { '\n' }).Replace("\n\n", "\n"); string[] items = inner.Split(new char[] { '\n' }); DateTime d = DateTime.Parse(items[0]); double o = double.Parse(items[1]); double h = double.Parse(items[2]); double l = double.Parse(items[3]); double c = double.Parse(items[4]); int v = int.Parse(items[5].Trim(new char[] { '\n' }).Replace(",", "")); HistoricalPrice historicalPrice = new HistoricalPrice() { Date = d, Open = o, High = h, Low = l, Close = c, Volume = v }; Prices.Add(historicalPrice); } } }
private void Parse() { HtmlNode description = null; // Parse the <title /> of the web page and set it to Name // Parse the <meta name="Description" /> foreach (var n in htmlDocument.DocumentNode.ChildNodes) { if (n.Name == "html") { try { HtmlNode name = n.FirstChild.FirstChild.NextSibling.NextSibling; Name = name.InnerText; } catch { Name = ""; try { HtmlNodeCollection names = n.FirstChild.NextSibling.ChildNodes; foreach (var v in names) { if (v.Name == "title") { Name = v.InnerText; description = v.NextSibling; break; } } } catch { Name = ""; } } try { HtmlNode description2 = n.FirstChild.FirstChild.NextSibling.NextSibling.NextSibling.NextSibling; if (description2.Name == "meta" && description2.Attributes["name"].Value == "Description") { Description = description2.Attributes["content"].Value; } else { Description = ""; } } catch { Description = ""; try { //description = n.FirstChild.ChildNodes; if (description != null) { //foreach (var i in description) HtmlNode i = description.NextSibling; if (i.Name == "meta" && i.HasAttributes && i.Attributes["name"].Value == "Description") { Description = i.Attributes["content"].Value; } else { Description = ""; } } else { Description = ""; } } catch { Description = ""; } } break; } } // For parsing the prices the first two tags we need to look for are. // <div id="prices" class="gf-table-wrapper sfe-break-bottom-16"> // <table id="historical_price" class="gf-table"> HtmlNode nodePrices = null; HtmlNode historicalPricesNode = htmlDocument.GetElementbyId("historical_price"); HtmlNodeCollection historicalPricesNode_ChildNodes = historicalPricesNode.ChildNodes; foreach (var tbodyChild in historicalPricesNode_ChildNodes) { if (tbodyChild.Name == "tbody") { // We know that we have now found the table's body. nodePrices = tbodyChild; break; } } // if tbody element was not found then we must be getting the htmldocument // from the web. if (nodePrices == null) { nodePrices = htmlDocument.GetElementbyId("historical_price"); } foreach (var p in nodePrices.ChildNodes) { if (p.Name == "tr" && !p.HasAttributes) { // TODO: we could do some cleaning up here. string inner = p.InnerText.Trim(new char[] { '\n' }).Replace("\n\n","\n"); string[] items = inner.Split(new char[] { '\n' }); DateTime d = DateTime.Parse(items[0]); double o = double.Parse(items[1]); double h = double.Parse(items[2]); double l = double.Parse(items[3]); double c = double.Parse(items[4]); int v = int.Parse(items[5].Trim(new char[] { '\n' }).Replace(",","")); HistoricalPrice historicalPrice = new HistoricalPrice() { Date = d, Open = o, High = h, Low = l, Close = c, Volume = v }; Prices.Add(historicalPrice); } } }