private bool IsMatch(HtmlNodeCollection childNodes, string textToFind, bool partial) { if (partial) { return(childNodes.Any(n => CleanSpace(n.InnerText.Replace(Environment.NewLine, "")).Contains(textToFind))); } else { return(childNodes.Any(n => CleanSpace(n.InnerText.Replace(Environment.NewLine, "")).Equals(textToFind))); } }
public string CleanHtml(IDataMap map, string itemPath, string html, Item importRow) { if (String.IsNullOrEmpty(html)) { return(html); } var document = new HtmlDocument(); document.LoadHtml(html); HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()"); if (tryGetNodes == null || !tryGetNodes.Any()) { return(html); } var nodes = new Queue <HtmlNode>(tryGetNodes); while (nodes.Any()) { HandleNextNode(nodes, map, itemPath, importRow); } return(document.DocumentNode.InnerHtml); }
public string ConsolidateRepeatedTags(string html, string encode, string Tag = "span") { if (string.IsNullOrEmpty(html)) { return(null); } var document = GetHtmlDocument(html, encode); HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()"); if (tryGetNodes == null || !tryGetNodes.Any()) { return(html); } var nodes = new Queue <HtmlNode>(tryGetNodes); HtmlNode lastnode = null; while (nodes.Count > 0) { var node = nodes.Dequeue(); var parentNode = node.ParentNode; var childNodes = node.SelectNodes("./*|./text()"); if (childNodes != null) { foreach (var child in childNodes) { nodes.Enqueue(child); } } if (node.Name.ToUpper() == Tag.ToUpper() && lastnode != null && lastnode.Name == node.Name && lastnode.ParentNode != null && node.ParentNode != null && lastnode.ParentNode.XPath == node.ParentNode.XPath) { if (SerializedAttributes(lastnode).ToUpper() == SerializedAttributes(node).ToUpper()) { lastnode.InnerHtml += node.InnerHtml; parentNode.RemoveChild(node); } else { lastnode = node; } } else { lastnode = node; } } return(document.DocumentNode.InnerHtml); }
// This method will check if a course exists, returning true if it does. // This method makes a series of HTTP requests to gain authorization, then attempts to find the input course. public async Task <bool> CheckCourseExists(CourseInfo course) { HttpRequestMessage request = requestsHelper.CreateHttpRequestMessage(HttpMethod.Get, Constants.WebAdvisorInitialConnectionUrl); HttpResponseMessage response = await httpClient.SendAsync(request); string token = requestsHelper.GetTokenFromResponse(response); request = requestsHelper.CreateHttpRequestMessage(HttpMethod.Get, Constants.WebAdvisorInitialConnectionUrl + token); response = await httpClient.SendAsync(request); token = requestsHelper.GetTokenFromResponse(response); string postUrl = requestsHelper.CreatePostUrl(token); request = requestsHelper.CreateHttpRequestMessage(HttpMethod.Post, postUrl); request.Content = requestsHelper.CreateFormData(course); response = await httpClient.SendAsync(request); string responseHtml = await response.Content.ReadAsStringAsync(); HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(responseHtml); HtmlNode mainContentNode = htmlDoc.GetElementbyId("main"); HtmlNodeCollection errorNodes = mainContentNode.SelectNodes("//div[contains(@class, 'errorText')]"); if (errorNodes != null && errorNodes.Any()) { return(false); } return(true); }
private List <AchievementDTO> GetHtmlAchievementList(HtmlNodeCollection nodes) { if (nodes == null || !nodes.Any()) { return(null); } var achievements = new List <AchievementDTO>(); foreach (var acvNode in nodes) { if (acvNode.InnerText == ErrorMessages.NoAchievements) { return(null); } var achievement = new AchievementDTO(); achievement.Rarity = acvNode.SelectNodes("td[1]/img").Count; achievement.Name = GetHtmlString(acvNode, "td[2]"); achievement.IsSecret = acvNode.SelectNodes("td[2]/img") != null; achievements.Add(achievement); } return(achievements); }
public string CleanTitleHtml(string html) { List <string> unwantedTags = new List <string>() { "a", "b", "body", "blockquote", "br", "button", "center", "td", "tr", "em", "i", "embed", "form", "frame", "iframe", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "img", "legend", "li", "ul", "ol", "map", "script", "strong", "sup", "sub", "p", "thead", "tbody", "u", "span", "table", "div", "label", "font" }; if (String.IsNullOrEmpty(html)) { return(html); } var document = new HtmlDocument(); document.LoadHtml(html); HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()"); if (tryGetNodes == null || !tryGetNodes.Any()) { return(html); } var nodes = new Queue <HtmlNode>(tryGetNodes); int i = 0; while (nodes.Count > 0) { var node = nodes.Dequeue(); var nodeName = node.Name.ToLower(); var parentNode = node.ParentNode; var childNodes = node.SelectNodes("./*|./text()"); if (childNodes != null) { foreach (var child in childNodes) { nodes.Enqueue(child); } } if (unwantedTags.Any(tag => tag == nodeName)) { // if this node is one to remove if (childNodes != null) { // make sure children are added back foreach (var child in childNodes) { parentNode.InsertBefore(child, node); } } parentNode.RemoveChild(node); } } return(document.DocumentNode.InnerHtml); }
/// <summary> /// Ensure <paramref name="imageNodes"/> have valid src attributes, add FQDN if necessary /// </summary> /// <param name="targetUrl">FQDN where images originate from</param> /// <param name="imageNodes">Node collection to iterate</param> /// <returns></returns> private static IEnumerable <DisplayImage> AssembleImageListForViewModel(Uri targetUrl, HtmlNodeCollection imageNodes) { var imageList = new List <DisplayImage>(); if ((null != imageNodes) && (imageNodes.Any())) { foreach (var item in imageNodes) { if (null == item.Attributes["src"]) //this scenario can present if custom, client-side lazy-loaders use random data-attributes +/ have no src defined { continue; //no need to go on } var imgSrc = item.Attributes["src"].Value; //ensure all image src's are fully-qualified to their respective domain, //otherwise they won't render because we're on a totally different domain if (Uri.IsWellFormedUriString(imgSrc, UriKind.Relative)) { imgSrc = targetUrl.GetLeftPart(UriPartial.Authority) + imgSrc; } var displayImage = new DisplayImage { ImageUrl = imgSrc, AltText = item.Attributes["alt"]?.Value ?? string.Empty }; imageList.Add(displayImage); } } return(imageList); }
private int?ExtractNumber(HtmlNode playerRow) { try { HtmlNodeCollection tdChildNodes = playerRow.SelectNodes("td")[0].ChildNodes; if (!tdChildNodes.Any()) { return(null); } string numberText = tdChildNodes.Single().InnerText; if (string.IsNullOrWhiteSpace(numberText) || !int.TryParse(numberText, out int number)) { return(null); } return(number); } catch (Exception ex) { _logger.LogError(ex, "Failed to extract player's number from a player row."); return(null); } }
static void Main(string[] args) { WebClient webClient = new WebClient(); string page = webClient.DownloadString("https://finance.yahoo.com/calendar/earnings?day=2018-10-03"); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(page); foreach (HtmlNode table in doc.DocumentNode.SelectNodes("//table[contains(@class, 'data-table W(100%) Bdcl(c) Pos(r) BdB Bdc($c-fuji-grey-c)')]")) { foreach (HtmlNode tableBody in table.SelectNodes("tbody")) { Console.WriteLine("No. of Earnings: " + tableBody.SelectNodes("tr").Count()); Console.WriteLine(); foreach (HtmlNode tableRow in tableBody.SelectNodes("tr")) { HtmlNodeCollection tableRows = tableRow.SelectNodes("td"); if (tableRows.Any()) { Console.WriteLine("Symbol: " + tableRows[1].InnerText); Console.WriteLine("Company: " + tableRows[2].InnerText); Console.WriteLine("Earnings Call Time: " + tableRows[3].InnerText); Console.WriteLine("EPS Estimate: " + tableRows[4].InnerText); Console.WriteLine("Reported EPS: " + tableRows[5].InnerText); Console.WriteLine("Surprise (%): " + tableRows[6].InnerText); Console.WriteLine(); } } } } Console.ReadKey(); }
public IEnumerable <IDomainModel> ParseNodes(HtmlNodeCollection nodes) { List <Earthquake> lst = new List <Earthquake>(); if (nodes != null && nodes.Any()) { foreach (var node in nodes) { if (!string.IsNullOrWhiteSpace(node.InnerText)) { string[] content = node.InnerText.Split('\n').Select(o => o.Trim()).ToArray(); int startIndex = this.GetStartIndex(content); if (startIndex < 2) { continue; } Earthquake entity = new Earthquake(); entity.Scale = content[startIndex]; entity.CreateTime = DateTime.Parse(content[startIndex + 1]); entity.Latitude = content[startIndex + 2]; entity.Logitude = content[startIndex + 3]; entity.Depth = content[startIndex + 4]; entity.Position = content[startIndex + 5]; lst.Add(entity); } } } return(lst); }
private void GetUrlsFromPlayListWrapper(HtmlNodeCollection mp3AnchorSet, ref ParserResult parserResult) { /* * <div class="sm2-playlist-wrapper"> * <ul class="sm2-playlist-bd"> * <li> * <div part="1" class="sm2-row sm2-wide" id="file-8490384"> * ===> <a href="https://vltava.rozhlas.cz/sites/default/files/audios/8823b0fd947daa76167e9014d6ed4014.mp3?uuid=5c17536947ad0"> * <div class="filename" title="Steinar Bragi: Planina"> * <div class="filename__text" title="Steinar Bragi: Planina">1. díl: Steinar Bragi: Planina</div> * </div> * </a> * <div class="audio-info-wrap"> * <span class="playlist-audio-time-to-expire"> * <span class="caption__desktop-only">k poslechu </span>ještě 3 dny</span> * <span class="playlist-audio-length">28:14</span> * </div> * </div> * </li> */ if (parserResult == null) { return; } if (mp3AnchorSet != null || mp3AnchorSet.Any()) { foreach (var mp3A in mp3AnchorSet) { // each single anchor: // <a href = "https://vltava.rozhlas.cz/sites/default/files/audios/8823b0fd947daa76167e9014d6ed4014.mp3?uuid=5c17536947ad0" > // <div class="filename" title="Steinar Bragi: Planina"> // <div class="filename__text" title="Steinar Bragi: Planina">1. díl: Steinar Bragi: Planina</div> // </div> // </a> var url = mp3A.Attributes["href"]?.Value; var filenameTextNode = mp3A.ChildNodes.SelectMany(p => p.ChildNodes).FirstOrDefault(p => p.Attributes.Any(a => a.Name == "class" && a.Value == "filename__text")); // verze - napr cetba, serial - vice dilu var title = filenameTextNode?.InnerHtml?.Trim(); if (string.IsNullOrEmpty(title)) { // verze - jen jeden dil nejakeho poradu title = mp3A?.InnerHtml; } parserResult.AddUrl(url, title); } } else { parserResult.AddLog($"ParsePrehrat2018Html - mp3AnchorSet is null."); } }
public async Task <IEnumerable <OriginRow> > CrawlerMaster(string path) { List <OriginRow> rows = new List <OriginRow>(); string html = await File.ReadAllTextAsync(path); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); HtmlNode root = doc.DocumentNode; HtmlNodeCollection items = root.SelectNodes("//article/div[@class='list']/div[@class='item']"); if (items != null && items.Any()) { foreach (HtmlNode item in items) { OriginRow row = new OriginRow(); row.Summary = WebUtility.HtmlDecode(item.SelectSingleNode("./div[@class='item-summary']").InnerText).Trim(); HtmlNodeCollection details = item.SelectNodes("./div[@class='item-details']/p"); foreach (HtmlNode detail in details) { string key = detail.SelectSingleNode("./span[1]").InnerText.Trim(); string value = detail.SelectSingleNode("./span[2]").InnerText.Trim(); switch (key) { case "Notice Type:": break; case "Approval Number:": break; case "Executing Agency:": row.ExecutingAgency = value; break; case "Contractor Name:": row.ContractorName = value; break; case "Address:": row.ContractorAddress = value; break; case "Total Contract Amount (US$):": row.TotalContractAmount = value; break; case "Contract Amount Financed by ADB (US$):": row.FinancedByAdb = value; break; } } rows.Add(row); } } return(rows); }
public static bool RemoveAll(HtmlDocument document) { HtmlNodeCollection collection = document.DocumentNode.SelectNodes("//style"); foreach (HtmlNode node in collection) { node.Remove(); } return(collection.Any()); }
public string RemoveUnwantedHtmlTags(string html, string encode, List <string> unwantedTags) { if (string.IsNullOrEmpty(html)) { return(html); } var document = GetHtmlDocument(html, encode); HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()"); if (tryGetNodes == null || !tryGetNodes.Any()) { return(html); } var nodes = new Queue <HtmlNode>(tryGetNodes); while (nodes.Count > 0) { var node = nodes.Dequeue(); var parentNode = node.ParentNode; var childNodes = node.SelectNodes("./*|./text()"); if (childNodes != null) { foreach (var child in childNodes) { nodes.Enqueue(child); } } if (unwantedTags.Any(tag => tag == node.Name)) { if (childNodes != null) { foreach (var child in childNodes) { parentNode.InsertBefore(child, node); } } parentNode.RemoveChild(node); } } return(document.DocumentNode.InnerHtml); }
public async Task <bool> CrawlerMaster(List <OriginRow> rows, string url) { string html = await GetHtml(url); if (html == null) { return(false); } HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); HtmlNode root = doc.DocumentNode; HtmlNodeCollection items = root.SelectNodes("//tbody[@id='posts']/tr"); if (items != null && items.Any()) { foreach (HtmlNode item in items) { OriginRow row = new OriginRow(); row.IssueDate = item.SelectSingleNode("./td[1]").InnerText.Trim(); row.ClosingDate = item.SelectSingleNode("./td[2]").InnerText.Trim(); row.Location = item.SelectSingleNode("./td[3]").InnerText.Trim(); row.ProjectName = item.SelectSingleNode("./td[4]/a").InnerText.Trim(); string href = WebUtility.HtmlDecode(item.SelectSingleNode("./td[4]/a").Attributes["href"].Value); if (href.StartsWith("//")) { row.ProjectLink = $"https:{href}"; } else if (href.StartsWith("/")) { row.ProjectLink = $"https://www.ebrd.com{href}"; } else if (href.StartsWith("http:") || href.StartsWith("https:")) { row.ProjectLink = href; } else { row.ProjectLink = $"https://www.ebrd.com/{href}"; } row.ProjectDetail = await CrawlerDetail(row.ProjectLink); row.Sector = item.SelectSingleNode("./td[5]").InnerText.Trim(); row.Contract = item.SelectSingleNode("./td[6]").InnerText.Trim(); row.Type = item.SelectSingleNode("./td[7]").InnerText.Trim(); rows.Add(row); } return(true); } return(false); }
public bool Ping(Uri address) { Uri address2; if (address.IsAbsoluteUri) { address2 = address; } else { address2 = new Uri(string.Format("http://{0}", address)); } string text; try { text = this.webClient.Download(address2); } catch (WebException ex) { this.logger.Error(ex.Message, ex, this); throw new HttpResponseException(new HttpResponseMessage(HttpStatusCode.InternalServerError) { ReasonPhrase = ex.Message }); } if (string.IsNullOrEmpty(text)) { this.logger.Warn(string.Format("[BeaconController - Ping] Response body for request {0} was empty", address), this); return(false); } this.logger.Debug(string.Format("[Ping]: Page: {0}", address), null); HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(text); HtmlNodeCollection htmlNodeCollection = htmlDocument.DocumentNode.SelectNodes("//script"); if (htmlNodeCollection == null || !htmlNodeCollection.Any <HtmlNode>()) { return(false); } foreach (HtmlNode current in ((IEnumerable <HtmlNode>)htmlNodeCollection)) { string attributeValue = current.GetAttributeValue("src", string.Empty); string beaconHostName = this.GetBeaconHostName(); if (attributeValue.Equals(this.GetBeaconBundleAddress(beaconHostName), StringComparison.InvariantCultureIgnoreCase)) { return(true); } } return(false); }
private static async Task <string> ImportFromMtgGoldfish(string url, IReporter reporter) { var web = new HtmlWeb(); reporter.Report("Unraveling skeins..."); HtmlDocument doc = await web.LoadFromWebAsync(url); var decklistBuilder = new StringBuilder(); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//table[@class='deck-view-deck-table']/tr"); List <HtmlNode> deckNodes = nodes.TakeWhile(node => !node.OuterHtml.Contains("Cards Total")).ToList(); if (nodes == null || !nodes.Any()) { throw new InvalidOperationException("Could not find a valid deck at the URL. Make sure the link provided is pointing to the root of the deck."); } reporter.StartProgress(); for (var i = 0; i < deckNodes.Count; i++) { await Task.Delay(1); reporter.Progress(i, 0, deckNodes.Count); reporter.Report($"Bifurcating the furcate {i}/{deckNodes.Count}"); HtmlNode node = deckNodes[i]; try { HtmlNodeCollection qtyNodes = node.SelectNodes(".//td[@class='deck-col-qty']"); HtmlNodeCollection nameNodes = node.SelectNodes(".//td[@class='deck-col-card']"); if (qtyNodes?.Count != 1 || nameNodes?.Count != 1) { continue; } int qty = int.Parse(qtyNodes[0].InnerText.Trim()); string name = HttpUtility.HtmlDecode(nameNodes[0].InnerText.Trim()); var line = new SearchLine(name, qty); decklistBuilder.AppendLine(line.ToString()); } catch (Exception) { reporter.Report($"Failed to import node #{i} from {url}", true); } } reporter.StopProgress(); return(decklistBuilder.ToString()); }
public void DownloadCouncilPdfFiles() { var docs = this.LoadDocumentsDoneSQL(); var queries = this.LoadQueriesDoneSQL(); // var docs = new List<Documents>(); //var queries = new List<QueryResult>(); WebClient c = new WebClient(); HtmlWeb web = new HtmlWeb(); Regex dateReg = new Regex("[A-Za-z]+[\\s]{0,1}[0-9]{1,2},[\\s]{0,1}[0-9]{4}"); foreach (string url in this.docUrls) { var subUrl = url.Split('*')[1]; var category = url.Split('*')[0]; HtmlDocument doc = web.Load(subUrl); HtmlNodeCollection list = doc.DocumentNode.SelectNodes("//a[contains(@href,'/LinkClick.aspx')]"); if (list == null || !list.Any()) { list = doc.DocumentNode.SelectNodes("//a[contains(@href,'/Portals/')]"); } foreach (var r in list) { var fileType = "pdf"; var dateStr = r.InnerText; if (dateStr.ToUpper().IndexOf("Canceled".ToUpper()) > 0) { continue; } string meetingDateText = dateReg.Match(dateStr).ToString(); DateTime meetingDate; if (!DateTime.TryParse(meetingDateText, out meetingDate)) { Console.WriteLine(dateStr); Console.WriteLine("date format incorrect..."); continue; } if (meetingDate < this.dtStartFrom) { Console.WriteLine("Early..."); continue; } if (r.Attributes["href"].Value.IndexOf("doc") > 0) { fileType = "docx"; } this.ExtractADoc(c, this.cityEntity.CityUrl + r.Attributes["href"].Value, category, "pdf", meetingDate, ref docs, ref queries); } } Console.WriteLine("docs:" + docs.Count + "--- query:" + queries.Count); }
protected void ReplaceHyperlinks(HtmlDocument htmlDoc) { HtmlNodeCollection effects = htmlDoc.DocumentNode.SelectNodes("//a"); if (effects?.Any() == true) { foreach (HtmlNode link in effects) { string relativeLink = link.Attributes["href"].Value.TrimStart('/'); string absoluteLink = MakeAbsoluteLink(relativeLink); HtmlNode newNode = HtmlNode.CreateNode($"[{link.InnerText}]({absoluteLink})"); link.ParentNode.ReplaceChild(newNode, link); } } }
/// <summary> /// Replace the elements found with the specified selector with the replacement element, /// preserving the content, and optionally applying the specified class name. /// </summary> private static void ReplaceHtmlTag(ref HtmlNode htmlNode, string elementSelector, string replacementTag, string className) { HtmlNodeCollection tryGetNodes = htmlNode.SelectNodes(elementSelector); if (tryGetNodes == null || !tryGetNodes.Any()) { return; } foreach (HtmlNode node in tryGetNodes) { var classString = className != null ? $" class={className}" : ""; var replacementNode = HtmlNode.CreateNode($"<{replacementTag}{classString}>{node.InnerHtml}</{replacementTag}>"); node.ParentNode.ReplaceChild(replacementNode, node); } }
public List <string> GetAllImageSrcs(string str) { HtmlDocument doc = new HtmlDocument(); doc.Load(str.ToStream()); HtmlNodeCollection imgs = new HtmlNodeCollection(doc.DocumentNode.ParentNode); imgs = doc.DocumentNode.SelectNodes("//img"); if (imgs == null || !imgs.Any()) { return(null); } var result = imgs.Select(i => @"https://rally1.rallydev.com" + i.Attributes[@"src"].Value).ToList(); return(result); }
public static HtmlDocument RemoveUnwantedTags(HtmlDocument document, List <string> unwantedTags) { HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()"); if (tryGetNodes == null || !tryGetNodes.Any()) { return(document); } var nodes = new Queue <HtmlNode>(tryGetNodes); while (nodes.Count > 0) { var node = nodes.Dequeue(); var parentNode = node.ParentNode; var childNodes = node.SelectNodes("./*|./text()"); if (childNodes != null) { foreach (var child in childNodes) { nodes.Enqueue(child); } } if (unwantedTags.Any(tag => tag == node.Name)) { if (childNodes != null) { foreach (var child in childNodes) { parentNode.InsertBefore(child, node); } } parentNode.RemoveChild(node); } } return(document); }
private List <CharacterDeathDTO> GetHtmlCharacterDeaths(HtmlNodeCollection nodes) { if (nodes == null || !nodes.Any()) { return(null); } var listDeaths = new List <CharacterDeathDTO>(); foreach (var node in nodes) { var death = new CharacterDeathDTO(); death.Date = GetHtmlDateTime(node, "td[1]"); death.Message = GetHtmlString(node, "td[2]"); listDeaths.Add(death); } return(listDeaths); }
// adapted from source: https://stackoverflow.com/a/28298882/773798 /// <summary> /// Remove the specified unwanted tags while preserving their inner content. /// </summary> private static void RemoveUnwantedHtmlTags(ref HtmlNode htmlNode, List <string> unwantedTags) { HtmlNodeCollection tryGetNodes = htmlNode.SelectNodes("./*|./text()"); if (tryGetNodes == null || !tryGetNodes.Any()) { return; } var nodes = new Queue <HtmlNode>(tryGetNodes); while (nodes.Count > 0) { var node = nodes.Dequeue(); var parentNode = node.ParentNode; var childNodes = node.SelectNodes("./*|./text()"); if (childNodes != null) { foreach (var child in childNodes) { nodes.Enqueue(child); } } if (unwantedTags.Any(tag => tag == node.Name)) { if (childNodes != null) { foreach (var child in childNodes) { parentNode.InsertBefore(child, node); } } parentNode.RemoveChild(node); } } }
public IEnumerable <IDomainModel> ParseNodes(HtmlNodeCollection nodes) { List <AgriculturalProducts> lst = new List <AgriculturalProducts>(); if (nodes != null && nodes.Any()) { for (int startIndex = 8, arrLength = 8; 0 < nodes.Count - startIndex; startIndex += arrLength) { var row = nodes.Skip(startIndex).Take(arrLength).ToArray(); AgriculturalProducts entity = new AgriculturalProducts(); entity.LowPrice = row[1].InnerText; entity.AveragePrice = row[2].InnerText; entity.HighPrice = row[3].InnerText; entity.Category = row[4].InnerText; entity.Unit = row[5].InnerText; entity.CreateTime = DateTime.Parse(row[6].InnerText); entity.ProductName = row[0].InnerText; lst.Add(entity); } } return(lst); }
private static async Task <string> ImportFromTappedOut(string url, IReporter reporter) { var web = new HtmlWeb(); reporter.Report("Unraveling skeins..."); HtmlDocument doc = await web.LoadFromWebAsync(url); var decklistBuilder = new StringBuilder(); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//ul[@class='boardlist']/li/a"); // Looking for data-name in span from these nodes if (nodes == null || !nodes.Any()) { throw new InvalidOperationException("Could not find a valid deck at the URL. Make sure the link provided is pointing to the root of the deck."); } for (var i = 0; i < nodes.Count; i++) { await Task.Delay(1); reporter.Progress(i, 0, nodes.Count); reporter.Report($"Bifurcating the furcate {i}/{nodes.Count}"); try { HtmlNode node = nodes[i]; string name = HttpUtility.HtmlDecode(node.Attributes.Single(a => a.Name == "data-name").Value.Trim()); int qty = int.Parse(node.Attributes.Single(a => a.Name == "data-qty").Value); var line = new SearchLine(name, qty); decklistBuilder.AppendLine(line.ToString()); } catch (Exception) { reporter.Report($"Failed to import node #{i} from {url}", true); } } return(decklistBuilder.ToString()); }
private string GetHtmlString(HtmlNodeCollection nodes, string xpath) { if (nodes == null || !nodes.Any()) { return(String.Empty); } var xpathNode = nodes.FirstOrDefault(x => x.SelectNodes(xpath) != null); if (xpathNode == null) { return(String.Empty); } var value = xpathNode.SelectNodes(xpath).First().InnerText; if (String.IsNullOrEmpty(value)) { return(String.Empty); } return(HtmlEntity.DeEntitize(value).Replace(" ", " ").Trim()); // removes special space }
public string CleanHtml(IDataMap map, string itemPath, string html) { if (String.IsNullOrEmpty(html)) { return(html); } var document = new HtmlDocument(); document.LoadHtml(html); HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()"); if (tryGetNodes == null || !tryGetNodes.Any()) { return(html); } var nodes = new Queue <HtmlNode>(tryGetNodes); while (nodes.Any()) { HandleNextNode(nodes, map, itemPath); } var cleanedHtml = document.DocumentNode.InnerHtml; bool modified = false; string fixedHtml = HtmlService.FixOrphanedText(cleanedHtml, out modified); if (modified) { map.Logger.Log("Fixed Orphaned Text in Rich Text.", itemPath); } return(fixedHtml); }
public void generateEarnings() { List <Earnings> lstEarning = new List <Earnings>(); WebClient webClient = new WebClient(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); magentadbDataContext dbProd = new magentadbDataContext(); DateTime currentDate = Convert.ToDateTime(txtDateStart.Value.ToString("yyyy-MM-dd")); DateTime endDate = Convert.ToDateTime(txtDateEnd.Value.ToString("yyyy-MM-dd")); int dateDiff = Convert.ToInt32(endDate.Subtract(currentDate).Days.ToString()); int offset = 0; progressBar1.Maximum = dateDiff + 1; progressBar1.Value = 0; while (currentDate <= endDate) { string urlDateString = currentDate.ToString("yyyy-MM-dd"); string url = webClient.DownloadString("https://finance.yahoo.com/calendar/earnings?day=" + urlDateString + "&offset=" + offset); doc.LoadHtml(url); if (doc.DocumentNode.SelectNodes("//table[contains(@class, 'data-table W(100%) Bdcl(c) Pos(r) BdB Bdc($c-fuji-grey-c)')]") == null) { txtLogActivity.Text += urlDateString + ": No data. \r\n"; } else { foreach (HtmlNode table in doc.DocumentNode.SelectNodes("//table[contains(@class, 'data-table W(100%) Bdcl(c) Pos(r) BdB Bdc($c-fuji-grey-c)')]")) { foreach (HtmlNode tableBody in table.SelectNodes("tbody")) { txtLogActivity.Text += urlDateString + ": " + tableBody.SelectNodes("tr").Count() + " symbols \r\n"; if (tableBody.SelectNodes("tr").Count() == 100) { offset = offset + 100; } else { offset = 0; } foreach (HtmlNode tableRow in tableBody.SelectNodes("tr")) { HtmlNodeCollection tableRows = tableRow.SelectNodes("td"); if (tableRows.Any()) { try { txtLogActivity.Text += tableRows[1].InnerText + "\r\n"; txtLogActivity.SelectionStart = txtLogActivity.Text.Length; txtLogActivity.ScrollToCaret(); //txtLogActivity.Text = "Symbol: " + tableRows[1].InnerText + "\r\n" + txtLogActivity.Text; //txtLogActivity.Text = "Company: " + tableRows[2].InnerText + "\r\n" + txtLogActivity.Text; //txtLogActivity.Text = "Earnings Call Time: " + tableRows[3].InnerText + "\r\n" + txtLogActivity.Text; //txtLogActivity.Text = "EPS Estimate: " + tableRows[4].InnerText + "\r\n" + txtLogActivity.Text; //txtLogActivity.Text = "Reported EPS: " + tableRows[5].InnerText + "\r\n" + txtLogActivity.Text; //txtLogActivity.Text = "Surprise (%): " + tableRows[6].InnerText + "\r\n" + txtLogActivity.Text; //txtLogActivity.Text = "\r\n\n" + txtLogActivity.Text; TrnStockEarning newTrnStockEarning = new TrnStockEarning(); string earningSymbol = tableRows[1].InnerText.ToUpper(); string earningPosition = "Before Market Open"; if (tableRows[3].InnerText.Equals("After Market Close")) { earningPosition = "After Market Close"; } if (dbProd.TrnStockEarnings.Where(e => e.Symbol == earningSymbol && e.EarningDate.Date == currentDate.Date).Count() == 0) { var MstSymbol = from s in dbProd.MstSymbols where s.Symbol == earningSymbol && (s.Exchange == "NASDAQ" || s.Exchange == "NYSE" || s.Exchange == "AMEX") select new { Id = s.Id, }; if (MstSymbol.Any()) { newTrnStockEarning.Symbol = earningSymbol; newTrnStockEarning.SymbolId = MstSymbol.FirstOrDefault().Id; newTrnStockEarning.EarningDate = currentDate.Date; newTrnStockEarning.EarningTime = earningPosition; dbProd.TrnStockEarnings.InsertOnSubmit(newTrnStockEarning); dbProd.SubmitChanges(); } } } catch { txtLogActivity.Text += "Error saving. \r\n"; } } } } } } if (offset == 0) { currentDate = currentDate.AddDays(1); progressBar1.Value += 1; } } MessageBox.Show("Completed ", "", MessageBoxButtons.OK, MessageBoxIcon.Information); }
/// <summary> /// /// </summary> /// <param name="text"></param> /// <param name="apiKey"></param> /// <returns></returns> public List <BrokenLinkModel> Check(string text, string apiKey) { List <BrokenLinkModel> response = new List <BrokenLinkModel>(); if (!apiKey.HasValue()) { return(response); } var doc = new HtmlDocument(); doc.LoadHtml(text); HtmlNodeCollection links = doc.DocumentNode.SelectNodes(KnownStrings.HrefXPath); if (links == null || !links.Any()) { return(response); } string[] hrefs = links.Select(l => l.GetAttributeValue("href", string.Empty)) .Where(l => l.StartsWith("http")).ToArray(); // check for cached responses - avoids request when page is being resaved List <BrokenLinkModel> fromCache = new List <BrokenLinkModel>(); foreach (string href in hrefs) { var cacheItem = Current.AppCaches.RuntimeCache.GetCacheItem <BrokenLinkModel>(KnownStrings.CacheKey + href); if (null == cacheItem) { continue; } fromCache.Add(cacheItem); hrefs = hrefs.Except(href.AsEnumerableOfOne()).ToArray(); } SafeBrowsingResponseModel safeBrowsingResult = SafeBrowsingLookup(hrefs, apiKey); if (safeBrowsingResult.Matches.Any()) { response.AddRange(safeBrowsingResult.Matches.Select(m => new BrokenLinkModel { Href = m.Threat.Url, Status = m.ThreatType, Unsafe = true, Text = links.First(l => l.GetAttributeValue("href", string.Empty) == m.Threat.Url) .InnerText })); foreach (BrokenLinkModel item in response) { Current.AppCaches.RuntimeCache.InsertCacheItem(KnownStrings.CacheKey + item.Href, () => item, new TimeSpan(24, 0, 0), false); } } // add cached results response.AddRange(fromCache); return(response); }