示例#1
0
 private bool IsMatch(HtmlNodeCollection childNodes, string textToFind, bool partial)
 {
     if (partial)
     {
         return(childNodes.Any(n => CleanSpace(n.InnerText.Replace(Environment.NewLine, "")).Contains(textToFind)));
     }
     else
     {
         return(childNodes.Any(n => CleanSpace(n.InnerText.Replace(Environment.NewLine, "")).Equals(textToFind)));
     }
 }
示例#2
0
        public string CleanHtml(IDataMap map, string itemPath, string html, Item importRow)
        {
            if (String.IsNullOrEmpty(html))
            {
                return(html);
            }

            var document = new HtmlDocument();

            document.LoadHtml(html);

            HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()");

            if (tryGetNodes == null || !tryGetNodes.Any())
            {
                return(html);
            }

            var nodes = new Queue <HtmlNode>(tryGetNodes);

            while (nodes.Any())
            {
                HandleNextNode(nodes, map, itemPath, importRow);
            }

            return(document.DocumentNode.InnerHtml);
        }
示例#3
0
        public string ConsolidateRepeatedTags(string html, string encode, string Tag = "span")
        {
            if (string.IsNullOrEmpty(html))
            {
                return(null);
            }

            var document = GetHtmlDocument(html, encode);

            HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()");


            if (tryGetNodes == null || !tryGetNodes.Any())
            {
                return(html);
            }


            var nodes = new Queue <HtmlNode>(tryGetNodes);

            HtmlNode lastnode = null;

            while (nodes.Count > 0)
            {
                var node       = nodes.Dequeue();
                var parentNode = node.ParentNode;
                var childNodes = node.SelectNodes("./*|./text()");


                if (childNodes != null)
                {
                    foreach (var child in childNodes)
                    {
                        nodes.Enqueue(child);
                    }
                }

                if (node.Name.ToUpper() == Tag.ToUpper() && lastnode != null && lastnode.Name == node.Name &&
                    lastnode.ParentNode != null && node.ParentNode != null &&
                    lastnode.ParentNode.XPath == node.ParentNode.XPath)
                {
                    if (SerializedAttributes(lastnode).ToUpper() == SerializedAttributes(node).ToUpper())
                    {
                        lastnode.InnerHtml += node.InnerHtml;
                        parentNode.RemoveChild(node);
                    }
                    else
                    {
                        lastnode = node;
                    }
                }
                else
                {
                    lastnode = node;
                }
            }


            return(document.DocumentNode.InnerHtml);
        }
        // This method will check if a course exists, returning true if it does.
        // This method makes a series of HTTP requests to gain authorization, then attempts to find the input course.
        public async Task <bool> CheckCourseExists(CourseInfo course)
        {
            HttpRequestMessage  request  = requestsHelper.CreateHttpRequestMessage(HttpMethod.Get, Constants.WebAdvisorInitialConnectionUrl);
            HttpResponseMessage response = await httpClient.SendAsync(request);

            string token = requestsHelper.GetTokenFromResponse(response);

            request  = requestsHelper.CreateHttpRequestMessage(HttpMethod.Get, Constants.WebAdvisorInitialConnectionUrl + token);
            response = await httpClient.SendAsync(request);

            token = requestsHelper.GetTokenFromResponse(response);

            string postUrl = requestsHelper.CreatePostUrl(token);

            request         = requestsHelper.CreateHttpRequestMessage(HttpMethod.Post, postUrl);
            request.Content = requestsHelper.CreateFormData(course);
            response        = await httpClient.SendAsync(request);

            string responseHtml = await response.Content.ReadAsStringAsync();

            HtmlDocument htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(responseHtml);

            HtmlNode           mainContentNode = htmlDoc.GetElementbyId("main");
            HtmlNodeCollection errorNodes      = mainContentNode.SelectNodes("//div[contains(@class, 'errorText')]");

            if (errorNodes != null && errorNodes.Any())
            {
                return(false);
            }
            return(true);
        }
        private List <AchievementDTO> GetHtmlAchievementList(HtmlNodeCollection nodes)
        {
            if (nodes == null || !nodes.Any())
            {
                return(null);
            }

            var achievements = new List <AchievementDTO>();

            foreach (var acvNode in nodes)
            {
                if (acvNode.InnerText == ErrorMessages.NoAchievements)
                {
                    return(null);
                }

                var achievement = new AchievementDTO();
                achievement.Rarity   = acvNode.SelectNodes("td[1]/img").Count;
                achievement.Name     = GetHtmlString(acvNode, "td[2]");
                achievement.IsSecret = acvNode.SelectNodes("td[2]/img") != null;

                achievements.Add(achievement);
            }

            return(achievements);
        }
示例#6
0
        public string CleanTitleHtml(string html)
        {
            List <string> unwantedTags = new List <string>()
            {
                "a", "b", "body", "blockquote", "br", "button", "center", "td", "tr", "em", "i",
                "embed", "form", "frame", "iframe", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "img", "legend", "li", "ul", "ol", "map",
                "script", "strong", "sup", "sub", "p", "thead", "tbody", "u", "span", "table", "div", "label", "font"
            };

            if (String.IsNullOrEmpty(html))
            {
                return(html);
            }

            var document = new HtmlDocument();

            document.LoadHtml(html);

            HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()");

            if (tryGetNodes == null || !tryGetNodes.Any())
            {
                return(html);
            }

            var nodes = new Queue <HtmlNode>(tryGetNodes);

            int i = 0;

            while (nodes.Count > 0)
            {
                var node       = nodes.Dequeue();
                var nodeName   = node.Name.ToLower();
                var parentNode = node.ParentNode;
                var childNodes = node.SelectNodes("./*|./text()");

                if (childNodes != null)
                {
                    foreach (var child in childNodes)
                    {
                        nodes.Enqueue(child);
                    }
                }

                if (unwantedTags.Any(tag => tag == nodeName))
                {                 // if this node is one to remove
                    if (childNodes != null)
                    {             // make sure children are added back
                        foreach (var child in childNodes)
                        {
                            parentNode.InsertBefore(child, node);
                        }
                    }

                    parentNode.RemoveChild(node);
                }
            }

            return(document.DocumentNode.InnerHtml);
        }
示例#7
0
        /// <summary>
        ///  Ensure <paramref name="imageNodes"/> have valid src attributes, add FQDN if necessary
        /// </summary>
        /// <param name="targetUrl">FQDN where images originate from</param>
        /// <param name="imageNodes">Node collection to iterate</param>
        /// <returns></returns>
        private static IEnumerable <DisplayImage> AssembleImageListForViewModel(Uri targetUrl, HtmlNodeCollection imageNodes)
        {
            var imageList = new List <DisplayImage>();


            if ((null != imageNodes) && (imageNodes.Any()))
            {
                foreach (var item in imageNodes)
                {
                    if (null == item.Attributes["src"])   //this scenario can present if custom, client-side lazy-loaders use random data-attributes +/ have no src defined
                    {
                        continue;                         //no need to go on
                    }
                    var imgSrc = item.Attributes["src"].Value;

                    //ensure all image src's are fully-qualified to their respective domain,
                    //otherwise they won't render because we're on a totally different domain
                    if (Uri.IsWellFormedUriString(imgSrc, UriKind.Relative))
                    {
                        imgSrc = targetUrl.GetLeftPart(UriPartial.Authority) + imgSrc;
                    }


                    var displayImage = new DisplayImage {
                        ImageUrl = imgSrc, AltText = item.Attributes["alt"]?.Value ?? string.Empty
                    };
                    imageList.Add(displayImage);
                }
            }
            return(imageList);
        }
示例#8
0
        private int?ExtractNumber(HtmlNode playerRow)
        {
            try
            {
                HtmlNodeCollection tdChildNodes = playerRow.SelectNodes("td")[0].ChildNodes;

                if (!tdChildNodes.Any())
                {
                    return(null);
                }

                string numberText = tdChildNodes.Single().InnerText;

                if (string.IsNullOrWhiteSpace(numberText) || !int.TryParse(numberText, out int number))
                {
                    return(null);
                }

                return(number);
            }
            catch (Exception ex)
            {
                _logger.LogError(ex, "Failed to extract player's number from a player row.");
                return(null);
            }
        }
示例#9
0
        static void Main(string[] args)
        {
            WebClient webClient = new WebClient();
            string    page      = webClient.DownloadString("https://finance.yahoo.com/calendar/earnings?day=2018-10-03");

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(page);

            foreach (HtmlNode table in doc.DocumentNode.SelectNodes("//table[contains(@class, 'data-table W(100%) Bdcl(c) Pos(r) BdB Bdc($c-fuji-grey-c)')]"))
            {
                foreach (HtmlNode tableBody in table.SelectNodes("tbody"))
                {
                    Console.WriteLine("No. of Earnings: " + tableBody.SelectNodes("tr").Count());
                    Console.WriteLine();

                    foreach (HtmlNode tableRow in tableBody.SelectNodes("tr"))
                    {
                        HtmlNodeCollection tableRows = tableRow.SelectNodes("td");
                        if (tableRows.Any())
                        {
                            Console.WriteLine("Symbol: " + tableRows[1].InnerText);
                            Console.WriteLine("Company: " + tableRows[2].InnerText);
                            Console.WriteLine("Earnings Call Time: " + tableRows[3].InnerText);
                            Console.WriteLine("EPS Estimate: " + tableRows[4].InnerText);
                            Console.WriteLine("Reported EPS: " + tableRows[5].InnerText);
                            Console.WriteLine("Surprise (%): " + tableRows[6].InnerText);
                            Console.WriteLine();
                        }
                    }
                }
            }

            Console.ReadKey();
        }
示例#10
0
        public IEnumerable <IDomainModel> ParseNodes(HtmlNodeCollection nodes)
        {
            List <Earthquake> lst = new List <Earthquake>();

            if (nodes != null && nodes.Any())
            {
                foreach (var node in nodes)
                {
                    if (!string.IsNullOrWhiteSpace(node.InnerText))
                    {
                        string[] content    = node.InnerText.Split('\n').Select(o => o.Trim()).ToArray();
                        int      startIndex = this.GetStartIndex(content);
                        if (startIndex < 2)
                        {
                            continue;
                        }
                        Earthquake entity = new Earthquake();
                        entity.Scale      = content[startIndex];
                        entity.CreateTime = DateTime.Parse(content[startIndex + 1]);
                        entity.Latitude   = content[startIndex + 2];
                        entity.Logitude   = content[startIndex + 3];
                        entity.Depth      = content[startIndex + 4];
                        entity.Position   = content[startIndex + 5];
                        lst.Add(entity);
                    }
                }
            }
            return(lst);
        }
示例#11
0
        private void GetUrlsFromPlayListWrapper(HtmlNodeCollection mp3AnchorSet, ref ParserResult parserResult)
        {
            /*
             *  <div class="sm2-playlist-wrapper">
             *        <ul class="sm2-playlist-bd">
             *                                    <li>
             *                                                                                    <div part="1" class="sm2-row sm2-wide" id="file-8490384">
             *                                  ===>                        <a href="https://vltava.rozhlas.cz/sites/default/files/audios/8823b0fd947daa76167e9014d6ed4014.mp3?uuid=5c17536947ad0">
             *                                                                                          <div class="filename" title="Steinar Bragi: Planina">
             *                                                                                              <div class="filename__text" title="Steinar Bragi: Planina">1. díl: Steinar Bragi: Planina</div>
             *                                                                                          </div>
             *                                                                                      </a>
             *                                                                                    <div class="audio-info-wrap">
             *                                                                                    <span class="playlist-audio-time-to-expire">
             *                                                                                    <span class="caption__desktop-only">k poslechu </span>ještě 3 dny</span>
             *                                                                                    <span class="playlist-audio-length">28:14</span>
             *                                                                                    </div>
             *                                                                                    </div>
             *                                                                                    </li>
             */

            if (parserResult == null)
            {
                return;
            }

            if (mp3AnchorSet != null || mp3AnchorSet.Any())
            {
                foreach (var mp3A in mp3AnchorSet)
                {
                    // each single anchor:
                    // <a href = "https://vltava.rozhlas.cz/sites/default/files/audios/8823b0fd947daa76167e9014d6ed4014.mp3?uuid=5c17536947ad0" >
                    //      <div class="filename" title="Steinar Bragi: Planina">
                    //          <div class="filename__text" title="Steinar Bragi: Planina">1. díl: Steinar Bragi: Planina</div>
                    //      </div>
                    // </a>

                    var url = mp3A.Attributes["href"]?.Value;

                    var filenameTextNode = mp3A.ChildNodes.SelectMany(p => p.ChildNodes).FirstOrDefault(p => p.Attributes.Any(a => a.Name == "class" && a.Value == "filename__text"));

                    // verze - napr cetba, serial - vice dilu
                    var title = filenameTextNode?.InnerHtml?.Trim();
                    if (string.IsNullOrEmpty(title))
                    {
                        // verze - jen jeden dil nejakeho poradu
                        title = mp3A?.InnerHtml;
                    }

                    parserResult.AddUrl(url, title);
                }
            }
            else
            {
                parserResult.AddLog($"ParsePrehrat2018Html - mp3AnchorSet is null.");
            }
        }
示例#12
0
            public async Task <IEnumerable <OriginRow> > CrawlerMaster(string path)
            {
                List <OriginRow> rows = new List <OriginRow>();
                string           html = await File.ReadAllTextAsync(path);

                HtmlDocument doc = new HtmlDocument();

                doc.LoadHtml(html);
                HtmlNode           root  = doc.DocumentNode;
                HtmlNodeCollection items = root.SelectNodes("//article/div[@class='list']/div[@class='item']");

                if (items != null && items.Any())
                {
                    foreach (HtmlNode item in items)
                    {
                        OriginRow row = new OriginRow();
                        row.Summary = WebUtility.HtmlDecode(item.SelectSingleNode("./div[@class='item-summary']").InnerText).Trim();
                        HtmlNodeCollection details = item.SelectNodes("./div[@class='item-details']/p");
                        foreach (HtmlNode detail in details)
                        {
                            string key   = detail.SelectSingleNode("./span[1]").InnerText.Trim();
                            string value = detail.SelectSingleNode("./span[2]").InnerText.Trim();
                            switch (key)
                            {
                            case "Notice Type:":
                                break;

                            case "Approval Number:":
                                break;

                            case "Executing Agency:":
                                row.ExecutingAgency = value;
                                break;

                            case "Contractor Name:":
                                row.ContractorName = value;
                                break;

                            case "Address:":
                                row.ContractorAddress = value;
                                break;

                            case "Total Contract Amount (US$):":
                                row.TotalContractAmount = value;
                                break;

                            case "Contract Amount Financed by ADB (US$):":
                                row.FinancedByAdb = value;
                                break;
                            }
                        }
                        rows.Add(row);
                    }
                }
                return(rows);
            }
示例#13
0
        public static bool RemoveAll(HtmlDocument document)
        {
            HtmlNodeCollection collection = document.DocumentNode.SelectNodes("//style");

            foreach (HtmlNode node in collection)
            {
                node.Remove();
            }
            return(collection.Any());
        }
示例#14
0
        public string RemoveUnwantedHtmlTags(string html, string encode, List <string> unwantedTags)
        {
            if (string.IsNullOrEmpty(html))
            {
                return(html);
            }


            var document = GetHtmlDocument(html, encode);
            HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()");


            if (tryGetNodes == null || !tryGetNodes.Any())
            {
                return(html);
            }


            var nodes = new Queue <HtmlNode>(tryGetNodes);


            while (nodes.Count > 0)
            {
                var node       = nodes.Dequeue();
                var parentNode = node.ParentNode;
                var childNodes = node.SelectNodes("./*|./text()");


                if (childNodes != null)
                {
                    foreach (var child in childNodes)
                    {
                        nodes.Enqueue(child);
                    }
                }


                if (unwantedTags.Any(tag => tag == node.Name))
                {
                    if (childNodes != null)
                    {
                        foreach (var child in childNodes)
                        {
                            parentNode.InsertBefore(child, node);
                        }
                    }


                    parentNode.RemoveChild(node);
                }
            }


            return(document.DocumentNode.InnerHtml);
        }
示例#15
0
            public async Task <bool> CrawlerMaster(List <OriginRow> rows, string url)
            {
                string html = await GetHtml(url);

                if (html == null)
                {
                    return(false);
                }

                HtmlDocument doc = new HtmlDocument();

                doc.LoadHtml(html);
                HtmlNode           root  = doc.DocumentNode;
                HtmlNodeCollection items = root.SelectNodes("//tbody[@id='posts']/tr");

                if (items != null && items.Any())
                {
                    foreach (HtmlNode item in items)
                    {
                        OriginRow row = new OriginRow();
                        row.IssueDate   = item.SelectSingleNode("./td[1]").InnerText.Trim();
                        row.ClosingDate = item.SelectSingleNode("./td[2]").InnerText.Trim();
                        row.Location    = item.SelectSingleNode("./td[3]").InnerText.Trim();
                        row.ProjectName = item.SelectSingleNode("./td[4]/a").InnerText.Trim();
                        string href = WebUtility.HtmlDecode(item.SelectSingleNode("./td[4]/a").Attributes["href"].Value);
                        if (href.StartsWith("//"))
                        {
                            row.ProjectLink = $"https:{href}";
                        }
                        else if (href.StartsWith("/"))
                        {
                            row.ProjectLink = $"https://www.ebrd.com{href}";
                        }
                        else if (href.StartsWith("http:") || href.StartsWith("https:"))
                        {
                            row.ProjectLink = href;
                        }
                        else
                        {
                            row.ProjectLink = $"https://www.ebrd.com/{href}";
                        }
                        row.ProjectDetail = await CrawlerDetail(row.ProjectLink);

                        row.Sector   = item.SelectSingleNode("./td[5]").InnerText.Trim();
                        row.Contract = item.SelectSingleNode("./td[6]").InnerText.Trim();
                        row.Type     = item.SelectSingleNode("./td[7]").InnerText.Trim();
                        rows.Add(row);
                    }
                    return(true);
                }
                return(false);
            }
示例#16
0
        public bool Ping(Uri address)
        {
            Uri address2;

            if (address.IsAbsoluteUri)
            {
                address2 = address;
            }
            else
            {
                address2 = new Uri(string.Format("http://{0}", address));
            }
            string text;

            try
            {
                text = this.webClient.Download(address2);
            }
            catch (WebException ex)
            {
                this.logger.Error(ex.Message, ex, this);
                throw new HttpResponseException(new HttpResponseMessage(HttpStatusCode.InternalServerError)
                {
                    ReasonPhrase = ex.Message
                });
            }
            if (string.IsNullOrEmpty(text))
            {
                this.logger.Warn(string.Format("[BeaconController - Ping] Response body for request {0} was empty", address), this);
                return(false);
            }
            this.logger.Debug(string.Format("[Ping]: Page: {0}", address), null);
            HtmlDocument htmlDocument = new HtmlDocument();

            htmlDocument.LoadHtml(text);
            HtmlNodeCollection htmlNodeCollection = htmlDocument.DocumentNode.SelectNodes("//script");

            if (htmlNodeCollection == null || !htmlNodeCollection.Any <HtmlNode>())
            {
                return(false);
            }
            foreach (HtmlNode current in ((IEnumerable <HtmlNode>)htmlNodeCollection))
            {
                string attributeValue = current.GetAttributeValue("src", string.Empty);
                string beaconHostName = this.GetBeaconHostName();
                if (attributeValue.Equals(this.GetBeaconBundleAddress(beaconHostName), StringComparison.InvariantCultureIgnoreCase))
                {
                    return(true);
                }
            }
            return(false);
        }
示例#17
0
        private static async Task <string> ImportFromMtgGoldfish(string url, IReporter reporter)
        {
            var web = new HtmlWeb();

            reporter.Report("Unraveling skeins...");
            HtmlDocument doc = await web.LoadFromWebAsync(url);

            var decklistBuilder = new StringBuilder();

            HtmlNodeCollection nodes     = doc.DocumentNode.SelectNodes("//table[@class='deck-view-deck-table']/tr");
            List <HtmlNode>    deckNodes = nodes.TakeWhile(node => !node.OuterHtml.Contains("Cards Total")).ToList();

            if (nodes == null || !nodes.Any())
            {
                throw new InvalidOperationException("Could not find a valid deck at the URL. Make sure the link provided is pointing to the root of the deck.");
            }

            reporter.StartProgress();

            for (var i = 0; i < deckNodes.Count; i++)
            {
                await Task.Delay(1);

                reporter.Progress(i, 0, deckNodes.Count);
                reporter.Report($"Bifurcating the furcate {i}/{deckNodes.Count}");

                HtmlNode node = deckNodes[i];
                try
                {
                    HtmlNodeCollection qtyNodes  = node.SelectNodes(".//td[@class='deck-col-qty']");
                    HtmlNodeCollection nameNodes = node.SelectNodes(".//td[@class='deck-col-card']");

                    if (qtyNodes?.Count != 1 || nameNodes?.Count != 1)
                    {
                        continue;
                    }

                    int    qty  = int.Parse(qtyNodes[0].InnerText.Trim());
                    string name = HttpUtility.HtmlDecode(nameNodes[0].InnerText.Trim());
                    var    line = new SearchLine(name, qty);
                    decklistBuilder.AppendLine(line.ToString());
                }
                catch (Exception)
                {
                    reporter.Report($"Failed to import node #{i} from {url}", true);
                }
            }

            reporter.StopProgress();
            return(decklistBuilder.ToString());
        }
示例#18
0
        public void DownloadCouncilPdfFiles()
        {
            var docs    = this.LoadDocumentsDoneSQL();
            var queries = this.LoadQueriesDoneSQL();
            // var docs = new List<Documents>();
            //var queries = new List<QueryResult>();
            WebClient c       = new WebClient();
            HtmlWeb   web     = new HtmlWeb();
            Regex     dateReg = new Regex("[A-Za-z]+[\\s]{0,1}[0-9]{1,2},[\\s]{0,1}[0-9]{4}");

            foreach (string url in this.docUrls)
            {
                var                subUrl   = url.Split('*')[1];
                var                category = url.Split('*')[0];
                HtmlDocument       doc      = web.Load(subUrl);
                HtmlNodeCollection list     = doc.DocumentNode.SelectNodes("//a[contains(@href,'/LinkClick.aspx')]");
                if (list == null || !list.Any())
                {
                    list = doc.DocumentNode.SelectNodes("//a[contains(@href,'/Portals/')]");
                }
                foreach (var r in list)
                {
                    var fileType = "pdf";
                    var dateStr  = r.InnerText;
                    if (dateStr.ToUpper().IndexOf("Canceled".ToUpper()) > 0)
                    {
                        continue;
                    }
                    string   meetingDateText = dateReg.Match(dateStr).ToString();
                    DateTime meetingDate;
                    if (!DateTime.TryParse(meetingDateText, out meetingDate))
                    {
                        Console.WriteLine(dateStr);
                        Console.WriteLine("date format incorrect...");
                        continue;
                    }
                    if (meetingDate < this.dtStartFrom)
                    {
                        Console.WriteLine("Early...");
                        continue;
                    }
                    if (r.Attributes["href"].Value.IndexOf("doc") > 0)
                    {
                        fileType = "docx";
                    }
                    this.ExtractADoc(c, this.cityEntity.CityUrl + r.Attributes["href"].Value, category, "pdf", meetingDate, ref docs, ref queries);
                }
            }
            Console.WriteLine("docs:" + docs.Count + "--- query:" + queries.Count);
        }
        protected void ReplaceHyperlinks(HtmlDocument htmlDoc)
        {
            HtmlNodeCollection effects = htmlDoc.DocumentNode.SelectNodes("//a");

            if (effects?.Any() == true)
            {
                foreach (HtmlNode link in effects)
                {
                    string   relativeLink = link.Attributes["href"].Value.TrimStart('/');
                    string   absoluteLink = MakeAbsoluteLink(relativeLink);
                    HtmlNode newNode      = HtmlNode.CreateNode($"[{link.InnerText}]({absoluteLink})");
                    link.ParentNode.ReplaceChild(newNode, link);
                }
            }
        }
示例#20
0
        /// <summary>
        /// Replace the elements found with the specified selector with the replacement element,
        /// preserving the content, and optionally applying the specified class name.
        /// </summary>
        private static void ReplaceHtmlTag(ref HtmlNode htmlNode, string elementSelector, string replacementTag, string className)
        {
            HtmlNodeCollection tryGetNodes = htmlNode.SelectNodes(elementSelector);

            if (tryGetNodes == null || !tryGetNodes.Any())
            {
                return;
            }

            foreach (HtmlNode node in tryGetNodes)
            {
                var classString     = className != null ? $" class={className}" : "";
                var replacementNode = HtmlNode.CreateNode($"<{replacementTag}{classString}>{node.InnerHtml}</{replacementTag}>");
                node.ParentNode.ReplaceChild(replacementNode, node);
            }
        }
示例#21
0
        public List <string> GetAllImageSrcs(string str)
        {
            HtmlDocument doc = new HtmlDocument();

            doc.Load(str.ToStream());

            HtmlNodeCollection imgs = new HtmlNodeCollection(doc.DocumentNode.ParentNode);

            imgs = doc.DocumentNode.SelectNodes("//img");
            if (imgs == null || !imgs.Any())
            {
                return(null);
            }
            var result = imgs.Select(i => @"https://rally1.rallydev.com" + i.Attributes[@"src"].Value).ToList();

            return(result);
        }
示例#22
0
        public static HtmlDocument RemoveUnwantedTags(HtmlDocument document, List <string> unwantedTags)
        {
            HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()");

            if (tryGetNodes == null || !tryGetNodes.Any())
            {
                return(document);
            }

            var nodes = new Queue <HtmlNode>(tryGetNodes);

            while (nodes.Count > 0)
            {
                var node       = nodes.Dequeue();
                var parentNode = node.ParentNode;

                var childNodes = node.SelectNodes("./*|./text()");

                if (childNodes != null)
                {
                    foreach (var child in childNodes)
                    {
                        nodes.Enqueue(child);
                    }
                }

                if (unwantedTags.Any(tag => tag == node.Name))
                {
                    if (childNodes != null)
                    {
                        foreach (var child in childNodes)
                        {
                            parentNode.InsertBefore(child, node);
                        }
                    }

                    parentNode.RemoveChild(node);
                }
            }

            return(document);
        }
        private List <CharacterDeathDTO> GetHtmlCharacterDeaths(HtmlNodeCollection nodes)
        {
            if (nodes == null || !nodes.Any())
            {
                return(null);
            }

            var listDeaths = new List <CharacterDeathDTO>();

            foreach (var node in nodes)
            {
                var death = new CharacterDeathDTO();
                death.Date    = GetHtmlDateTime(node, "td[1]");
                death.Message = GetHtmlString(node, "td[2]");

                listDeaths.Add(death);
            }

            return(listDeaths);
        }
示例#24
0
        // adapted from source: https://stackoverflow.com/a/28298882/773798
        /// <summary>
        /// Remove the specified unwanted tags while preserving their inner content.
        /// </summary>
        private static void RemoveUnwantedHtmlTags(ref HtmlNode htmlNode, List <string> unwantedTags)
        {
            HtmlNodeCollection tryGetNodes = htmlNode.SelectNodes("./*|./text()");

            if (tryGetNodes == null || !tryGetNodes.Any())
            {
                return;
            }

            var nodes = new Queue <HtmlNode>(tryGetNodes);

            while (nodes.Count > 0)
            {
                var node       = nodes.Dequeue();
                var parentNode = node.ParentNode;

                var childNodes = node.SelectNodes("./*|./text()");

                if (childNodes != null)
                {
                    foreach (var child in childNodes)
                    {
                        nodes.Enqueue(child);
                    }
                }

                if (unwantedTags.Any(tag => tag == node.Name))
                {
                    if (childNodes != null)
                    {
                        foreach (var child in childNodes)
                        {
                            parentNode.InsertBefore(child, node);
                        }
                    }

                    parentNode.RemoveChild(node);
                }
            }
        }
        public IEnumerable <IDomainModel> ParseNodes(HtmlNodeCollection nodes)
        {
            List <AgriculturalProducts> lst = new List <AgriculturalProducts>();

            if (nodes != null && nodes.Any())
            {
                for (int startIndex = 8, arrLength = 8; 0 < nodes.Count - startIndex; startIndex += arrLength)
                {
                    var row = nodes.Skip(startIndex).Take(arrLength).ToArray();
                    AgriculturalProducts entity = new AgriculturalProducts();
                    entity.LowPrice     = row[1].InnerText;
                    entity.AveragePrice = row[2].InnerText;
                    entity.HighPrice    = row[3].InnerText;
                    entity.Category     = row[4].InnerText;
                    entity.Unit         = row[5].InnerText;
                    entity.CreateTime   = DateTime.Parse(row[6].InnerText);
                    entity.ProductName  = row[0].InnerText;
                    lst.Add(entity);
                }
            }
            return(lst);
        }
示例#26
0
        private static async Task <string> ImportFromTappedOut(string url, IReporter reporter)
        {
            var web = new HtmlWeb();

            reporter.Report("Unraveling skeins...");
            HtmlDocument doc = await web.LoadFromWebAsync(url);

            var decklistBuilder = new StringBuilder();

            HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//ul[@class='boardlist']/li/a"); // Looking for data-name in span from these nodes

            if (nodes == null || !nodes.Any())
            {
                throw new InvalidOperationException("Could not find a valid deck at the URL. Make sure the link provided is pointing to the root of the deck.");
            }

            for (var i = 0; i < nodes.Count; i++)
            {
                await Task.Delay(1);

                reporter.Progress(i, 0, nodes.Count);
                reporter.Report($"Bifurcating the furcate {i}/{nodes.Count}");

                try
                {
                    HtmlNode node = nodes[i];
                    string   name = HttpUtility.HtmlDecode(node.Attributes.Single(a => a.Name == "data-name").Value.Trim());
                    int      qty  = int.Parse(node.Attributes.Single(a => a.Name == "data-qty").Value);
                    var      line = new SearchLine(name, qty);
                    decklistBuilder.AppendLine(line.ToString());
                }
                catch (Exception)
                {
                    reporter.Report($"Failed to import node #{i} from {url}", true);
                }
            }

            return(decklistBuilder.ToString());
        }
        private string GetHtmlString(HtmlNodeCollection nodes, string xpath)
        {
            if (nodes == null || !nodes.Any())
            {
                return(String.Empty);
            }

            var xpathNode = nodes.FirstOrDefault(x => x.SelectNodes(xpath) != null);

            if (xpathNode == null)
            {
                return(String.Empty);
            }

            var value = xpathNode.SelectNodes(xpath).First().InnerText;

            if (String.IsNullOrEmpty(value))
            {
                return(String.Empty);
            }

            return(HtmlEntity.DeEntitize(value).Replace(" ", " ").Trim()); // removes special space
        }
示例#28
0
        public string CleanHtml(IDataMap map, string itemPath, string html)
        {
            if (String.IsNullOrEmpty(html))
            {
                return(html);
            }

            var document = new HtmlDocument();

            document.LoadHtml(html);

            HtmlNodeCollection tryGetNodes = document.DocumentNode.SelectNodes("./*|./text()");

            if (tryGetNodes == null || !tryGetNodes.Any())
            {
                return(html);
            }

            var nodes = new Queue <HtmlNode>(tryGetNodes);

            while (nodes.Any())
            {
                HandleNextNode(nodes, map, itemPath);
            }

            var cleanedHtml = document.DocumentNode.InnerHtml;

            bool   modified  = false;
            string fixedHtml = HtmlService.FixOrphanedText(cleanedHtml, out modified);

            if (modified)
            {
                map.Logger.Log("Fixed Orphaned Text in Rich Text.", itemPath);
            }

            return(fixedHtml);
        }
示例#29
0
        public void generateEarnings()
        {
            List <Earnings> lstEarning = new List <Earnings>();

            WebClient webClient = new WebClient();

            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();

            magentadbDataContext dbProd = new magentadbDataContext();

            DateTime currentDate = Convert.ToDateTime(txtDateStart.Value.ToString("yyyy-MM-dd"));
            DateTime endDate     = Convert.ToDateTime(txtDateEnd.Value.ToString("yyyy-MM-dd"));

            int dateDiff = Convert.ToInt32(endDate.Subtract(currentDate).Days.ToString());
            int offset   = 0;

            progressBar1.Maximum = dateDiff + 1;
            progressBar1.Value   = 0;

            while (currentDate <= endDate)
            {
                string urlDateString = currentDate.ToString("yyyy-MM-dd");
                string url           = webClient.DownloadString("https://finance.yahoo.com/calendar/earnings?day=" + urlDateString + "&offset=" + offset);

                doc.LoadHtml(url);

                if (doc.DocumentNode.SelectNodes("//table[contains(@class, 'data-table W(100%) Bdcl(c) Pos(r) BdB Bdc($c-fuji-grey-c)')]") == null)
                {
                    txtLogActivity.Text += urlDateString + ": No data. \r\n";
                }
                else
                {
                    foreach (HtmlNode table in doc.DocumentNode.SelectNodes("//table[contains(@class, 'data-table W(100%) Bdcl(c) Pos(r) BdB Bdc($c-fuji-grey-c)')]"))
                    {
                        foreach (HtmlNode tableBody in table.SelectNodes("tbody"))
                        {
                            txtLogActivity.Text += urlDateString + ": " + tableBody.SelectNodes("tr").Count() + " symbols \r\n";

                            if (tableBody.SelectNodes("tr").Count() == 100)
                            {
                                offset = offset + 100;
                            }
                            else
                            {
                                offset = 0;
                            }

                            foreach (HtmlNode tableRow in tableBody.SelectNodes("tr"))
                            {
                                HtmlNodeCollection tableRows = tableRow.SelectNodes("td");

                                if (tableRows.Any())
                                {
                                    try
                                    {
                                        txtLogActivity.Text          += tableRows[1].InnerText + "\r\n";
                                        txtLogActivity.SelectionStart = txtLogActivity.Text.Length;
                                        txtLogActivity.ScrollToCaret();
                                        //txtLogActivity.Text = "Symbol: " + tableRows[1].InnerText + "\r\n" + txtLogActivity.Text;
                                        //txtLogActivity.Text = "Company: " + tableRows[2].InnerText + "\r\n" + txtLogActivity.Text;
                                        //txtLogActivity.Text = "Earnings Call Time: " + tableRows[3].InnerText + "\r\n" + txtLogActivity.Text;
                                        //txtLogActivity.Text = "EPS Estimate: " + tableRows[4].InnerText + "\r\n" + txtLogActivity.Text;
                                        //txtLogActivity.Text = "Reported EPS: " + tableRows[5].InnerText + "\r\n" + txtLogActivity.Text;
                                        //txtLogActivity.Text = "Surprise (%): " + tableRows[6].InnerText + "\r\n" + txtLogActivity.Text;
                                        //txtLogActivity.Text = "\r\n\n" + txtLogActivity.Text;


                                        TrnStockEarning newTrnStockEarning = new TrnStockEarning();

                                        string earningSymbol   = tableRows[1].InnerText.ToUpper();
                                        string earningPosition = "Before Market Open";

                                        if (tableRows[3].InnerText.Equals("After Market Close"))
                                        {
                                            earningPosition = "After Market Close";
                                        }

                                        if (dbProd.TrnStockEarnings.Where(e => e.Symbol == earningSymbol && e.EarningDate.Date == currentDate.Date).Count() == 0)
                                        {
                                            var MstSymbol = from s in dbProd.MstSymbols
                                                            where s.Symbol == earningSymbol &&
                                                            (s.Exchange == "NASDAQ" || s.Exchange == "NYSE" || s.Exchange == "AMEX")
                                                            select new
                                            {
                                                Id = s.Id,
                                            };

                                            if (MstSymbol.Any())
                                            {
                                                newTrnStockEarning.Symbol      = earningSymbol;
                                                newTrnStockEarning.SymbolId    = MstSymbol.FirstOrDefault().Id;
                                                newTrnStockEarning.EarningDate = currentDate.Date;
                                                newTrnStockEarning.EarningTime = earningPosition;

                                                dbProd.TrnStockEarnings.InsertOnSubmit(newTrnStockEarning);
                                                dbProd.SubmitChanges();
                                            }
                                        }
                                    }
                                    catch
                                    {
                                        txtLogActivity.Text += "Error saving. \r\n";
                                    }
                                }
                            }
                        }
                    }
                }

                if (offset == 0)
                {
                    currentDate         = currentDate.AddDays(1);
                    progressBar1.Value += 1;
                }
            }

            MessageBox.Show("Completed  ", "", MessageBoxButtons.OK, MessageBoxIcon.Information);
        }
示例#30
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="text"></param>
        /// <param name="apiKey"></param>
        /// <returns></returns>
        public List <BrokenLinkModel> Check(string text, string apiKey)
        {
            List <BrokenLinkModel> response = new List <BrokenLinkModel>();

            if (!apiKey.HasValue())
            {
                return(response);
            }

            var doc = new HtmlDocument();

            doc.LoadHtml(text);

            HtmlNodeCollection links = doc.DocumentNode.SelectNodes(KnownStrings.HrefXPath);

            if (links == null || !links.Any())
            {
                return(response);
            }

            string[] hrefs = links.Select(l => l.GetAttributeValue("href", string.Empty))
                             .Where(l => l.StartsWith("http")).ToArray();

            // check for cached responses - avoids request when page is being resaved
            List <BrokenLinkModel> fromCache = new List <BrokenLinkModel>();

            foreach (string href in hrefs)
            {
                var cacheItem = Current.AppCaches.RuntimeCache.GetCacheItem <BrokenLinkModel>(KnownStrings.CacheKey + href);
                if (null == cacheItem)
                {
                    continue;
                }

                fromCache.Add(cacheItem);
                hrefs = hrefs.Except(href.AsEnumerableOfOne()).ToArray();
            }

            SafeBrowsingResponseModel safeBrowsingResult = SafeBrowsingLookup(hrefs, apiKey);

            if (safeBrowsingResult.Matches.Any())
            {
                response.AddRange(safeBrowsingResult.Matches.Select(m => new BrokenLinkModel
                {
                    Href   = m.Threat.Url,
                    Status = m.ThreatType,
                    Unsafe = true,
                    Text   = links.First(l => l.GetAttributeValue("href", string.Empty) == m.Threat.Url)
                             .InnerText
                }));

                foreach (BrokenLinkModel item in response)
                {
                    Current.AppCaches.RuntimeCache.InsertCacheItem(KnownStrings.CacheKey + item.Href, () => item, new TimeSpan(24, 0, 0), false);
                }
            }

            // add cached results
            response.AddRange(fromCache);

            return(response);
        }