/// <summary>
        /// 从Url地址下载页面
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public async static ValueTask <HtmlDocument> LoadHtmlFromUrlAsync(string url)
        {
            HtmlWeb web = new HtmlWeb();

            return(await
                   web?.LoadFromWebAsync(url));
        }
        public async Task <List <News> > GetNewsItems()
        {
            var document = await newsPageWeb.LoadFromWebAsync($"https://d2-megaten-l.sega.com/en/news/index.html");

            var urls = document.DocumentNode.SelectNodes("//*[@class='news-list-title']/a");
            var info = document.DocumentNode.SelectNodes("//*[@class='newslist-hed cf']");

            var newsItems = new List <News>();

            for (int i = 0; i < urls.Count; i++)
            {
                var link = urls[i].GetAttributeValue("href", "");

                var newsItem = new News();
                newsItem.Title = urls[i].SelectSingleNode("h3").InnerText.Replace("\"", "\"\"");
                newsItem.Url   = baseUrl + link;
                newsItem.Image = urls[i].SelectSingleNode("div/img")?.GetAttributeValue("src", "");
                newsItems.Add(newsItem);
            }

            return(newsItems);
        }
Beispiel #3
0
        static async Task <CoinsRate> parseCoin(string xpath, string acronim)
        {
            HtmlWeb web = new HtmlWeb();

            var htmlDoc = await web.LoadFromWebAsync(UrlParseHelper.Site9999d);

            var selector = "//div[@class='catalog item-views table catalog_table_2' and @data-slice='Y']";

            var node = htmlDoc.DocumentNode.SelectSingleNode(selector);

            var htmlDoc2 = new HtmlDocument();

            htmlDoc2.LoadHtml("<div>" + node.InnerHtml + "</div>");

            var nodeCoin = htmlDoc2.DocumentNode.SelectSingleNode(xpath);

            var innerText = Regex.Replace(nodeCoin.InnerText, @"\s+", " ");

            var prices = GetBetweenTwoWords("ПРОДАЖА", "Цена за грамм", innerText);

            var pricePair = prices.Split(new string[1] {
                "ПОКУПКА"
            }, StringSplitOptions.RemoveEmptyEntries);

            pricePair = pricePair.Select(x => x.Replace("₽", "")).ToArray();

            CoinsRate coin = new CoinsRate()
            {
                Date    = DateTime.Now,
                Site    = UrlParseHelper.Site9999d,
                Acronim = acronim
            };

            coin.Sell = pricePair[0].ParseToDoubleFormat();
            coin.Buy  = pricePair[1].ParseToDoubleFormat();

            return(coin);
        }
Beispiel #4
0
        public async Task <SubjectSummary[]> SearchSubjects(SearchCriteria criteria)
        {
            if (criteria == null)
            {
                throw new ArgumentNullException(nameof(criteria));
            }

            var sb = new StringBuilder();

            sb.Append("https://or.justice.cz/ias/ui/rejstrik-$firma?p%3A%3Asubmit=x&.%2Frejstrik-%24firma=");
            if (!string.IsNullOrWhiteSpace(criteria.Name))
            {
                sb.AppendFormat("&nazev={0}", Uri.EscapeDataString(criteria.Name));
            }
            if (!string.IsNullOrWhiteSpace(criteria.FictIdNumber))
            {
                sb.AppendFormat("&ico={0}", Uri.EscapeDataString(criteria.FictIdNumber));
            }
            sb.Append("&obec=&ulice=&forma=&oddil=&vlozka=&soud=&polozek=50&typHledani=STARTS_WITH&jenPlatne=VSECHNY");

            var web          = new HtmlWeb();
            var htmlDocument = await web.LoadFromWebAsync(sb.ToString());

            var searchResults = htmlDocument.DocumentNode.SelectNodes("//table[@class='result-details']/..");

            if (searchResults == null)
            {
                return(new SubjectSummary[0]);
            }

            return(searchResults
                   .Select(x => new SubjectSummary
            {
                FictIdNumber = Regex.Replace(x.SelectSingleNode(".//tbody/tr[1]/td[2]").InnerText, @"\s+", string.Empty),
                Name = HtmlEntity.DeEntitize(x.SelectSingleNode(".//tbody/tr[1]/td[1]").InnerText.Trim()),
                SubjectId = GetSubjectId(x, ".//li[1]/a")
            }).ToArray());
        }
Beispiel #5
0
        public static async Task <(string ageRange, string sex)> GetAgeRangeAndSex(string uid, string memberId)
        {
            string url = $"https://www.tripadvisor.com.au/MemberOverlay?Mode=owa&uid={uid}&c=&src={memberId}&fus=false&partner=false&LsoId=&metaReferer=ShowUserReviewsAttractions";

            var doc = await Web.LoadFromWebAsync(url);

            var memberdescriptionReviewEnhancementsNode = doc.DocumentNode.Descendants().FirstOrDefault(x => x.HasClass("memberdescriptionReviewEnhancements"));

            if (memberdescriptionReviewEnhancementsNode == null)
            {
                return(null, null);
            }


            var liNodes = memberdescriptionReviewEnhancementsNode?
                          .Descendants()?
                          .Where(n => n.Name.Equals("li", System.StringComparison.OrdinalIgnoreCase))?
                          .ToList();   // <li> nodes

            if (liNodes != null && liNodes.Any() && liNodes.Count >= 2)
            {
                //get 2nd liNode
                var ageSexNode = liNodes[1];
                if (ageSexNode != null)
                {
                    var match = ageSexRegEx.Match(ageSexNode.InnerText);
                    if (match.Success)
                    {
                        var ageRange = match.Groups["age"].Value;
                        var sex      = match.Groups["sex"].Value;

                        return(ageRange, sex);
                    }
                }
            }

            return(null, null);
        }
        /// <summary>
        /// Gets Trending repositories in a time range for a language
        /// </summary>
        /// <param name="range"></param>
        /// <param name="language"></param>
        /// <returns></returns>
        public async static Task <List <Tuple <string, string> > > ExtractTrendingRepoNames(TimeRange range, Language language)
        {
            string url = "";

            List <Tuple <string, string> > repoNames = new List <Tuple <string, string> >();

            if (range == TimeRange.TODAY)
            {
                url = "https://github.com/trending/" + language.ToString() + "?since=daily";
            }
            if (range == TimeRange.WEEKLY)
            {
                url = "https://github.com/trending/" + language.ToString() + "?since=weekly";
            }
            if (range == TimeRange.MONTHLY)
            {
                url = "https://github.com/trending/" + language.ToString() + "?since=monthly";
            }

            //
            // The following code loads the HTML page and looks for specific tags so that we get what we want.
            // This code will have to be changed as and when the Github Trending page changes its HTML.
            // We will have to revise the logic as per the new page structure.
            //

            HtmlWeb      web = new HtmlWeb();
            HtmlDocument doc = await web.LoadFromWebAsync(url);

            var h3 = doc.DocumentNode.Descendants("h3");

            foreach (var i in h3)
            {
                var s     = i.Descendants("a").First();
                var names = s.Attributes["href"].Value.Split('/');
                repoNames.Add(new Tuple <string, string>(names[1], names[2]));
            }
            return(repoNames);
        }
Beispiel #7
0
        public async Task <Mini> ParseFromUrl(Uri url)
        {
            HtmlWeb      web     = new HtmlWeb();
            HtmlDocument htmlDoc = await web.LoadFromWebAsync(url, null, null);

            // atelierstoria.com only has minis created by atelierstoria
            Creator creator = new Creator
            {
                Name = "atelierstoria"
            };
            AtelierStoriaSource source = new AtelierStoriaSource(creator);

            creator.Sites.Add(source);

            // URLs come in the form 'https://atelierstoria.com/collections/collection/mini?variant=variantId'
            // Some minis have variants, in which case they have a single query parameter named variant.
            var variantId = url.Query.Split("=").Last();

            // If the variant is not specified in the URL query, the default variant will be used when avaiable.
            // If the default variant doesn't exist, variantId will still be an empty string.
            if (variantId == "")
            {
                variantId = GetValueFromMeta(htmlDoc, "product:defaultvariant", "");
            }

            Mini mini = new Mini()
            {
                Creator   = creator,
                Name      = GetValueFromMeta(htmlDoc, "product:title:", variantId),
                Thumbnail = GetValueFromMeta(htmlDoc, "product:image:", variantId),
                Cost      = GetCost(htmlDoc, variantId),
                Link      = url.ToString(),
            };

            mini.Sources.Add(new MiniSourceSite(mini, source, url));

            return(mini);
        }
        public static async Task <OverwatchViewModel> GetOverwatchData(string Uri, string btag)
        {
            HtmlWeb web = new HtmlWeb();
            await Task.Delay(5000);

            HtmlDoc = await web.LoadFromWebAsync(Uri);

            if (!IsValidAccount())
            {
                return(null);
            }
            return(new OverwatchViewModel
            {
                BattleTag = btag,
                Level = GetLevel(),
                RankPoint = GetRankPoint(),
                PortraitLink = GetPortraitLink(),
                Rank = GetRank(),
                FavouriteHeroImage = GetHeroLink(),
                TimePlayed = GetTimePlayed(),
                GamesWon = GetGamesWon()
            });
        }
        private async Task <HtmlDocument> GetPage_ScrappyAsync(int pageNumber, ScraperHomeLessStateModel state)
        {
            HtmlDocument result     = null;
            var          needReplay = false;
            var          web        = new HtmlWeb();

            do
            {
                needReplay = false;
                try
                {
                    result = await web.LoadFromWebAsync($"https://www.homeless.co.il/rent/{pageNumber}");
                }
                catch (Exception exception)
                {
                    _log($"Error-g1. Wait 1 sec. {exception.Message}");
                    needReplay = true;
                    Thread.Sleep(1000 * 1);
                }
            } while (needReplay);

            return(result);
        }
Beispiel #10
0
        public async Task <string> GetMagnetLink(Uri uri)
        {
            HtmlWeb web     = new HtmlWeb();
            var     htmlDoc = await web.LoadFromWebAsync(uri.AbsoluteUri);

            if (htmlDoc == null)
            {
                throw new HtmlWebException($"Null html document loaded, url: {uri.AbsoluteUri}");
            }
            else if (htmlDoc.DocumentNode == null)
            {
                throw new HtmlWebException($"Null node in html document, url: {uri.AbsoluteUri}");
            }

            var magnetLinkNode = htmlDoc.DocumentNode.SelectSingleNode("//*[@id='torrent_url']");

            if (magnetLinkNode == null)
            {
                throw new HtmlWebException($"magnetLinkNode is null, url: {uri.AbsoluteUri}");
            }

            return(magnetLinkNode.InnerText);
        }
        public override async Task <Fuel[]> GetActiveFuelsAsync()
        {
            var url = "http://www.lukoil.ge/?m=328";
            var web = new HtmlWeb();
            var doc = await web.LoadFromWebAsync(url);

            var fuels = doc.DocumentNode.Descendants("table").First(x => x.HasClass("pricetable")).Descendants("tr").Select(x =>
            {
                var tds = x.Descendants("td").ToArray();
                return(
                    name: tds[0].InnerText.Trim(),
                    price: decimal.Parse(tds[1].InnerText.Trim())
                    );
            }).Where(x => x.price > 0).ToArray();


            return(fuels.Select(x => new Fuel
            {
                Key = ConvertFuelNameToKey(x.name),
                Name = x.name,
                Price = x.price
            }).ToArray());
        }
        public async Task <IEnumerable <InStockItem> > GetItemInStock(ItemsIWant item)
        {
            var list = new List <InStockItem>();

            if (string.IsNullOrEmpty(item.KomplettUrl))
            {
                return(list);
            }

            if (!item.Name.Contains("Komplett a240 Epic Gaming PC"))
            {
                return(list);
            }

            var webCrawler = new HtmlWeb()
            {
                AutoDetectEncoding = false,
                OverrideEncoding   = Encoding.UTF8
            };
            var doc = await webCrawler.LoadFromWebAsync(item.KomplettUrl, Encoding.UTF8, CancellationToken.None);

            var products = doc.DocumentNode.SelectNodes("//div[contains(concat(' ', @class, ' '), ' buy-button-section ')]");

            foreach (var product in products)
            {
                var inStock = product.SelectSingleNode(".//div[contains(@class, 'buy-button')]");

                if (inStock != null && inStock.InnerText.Contains("Legg i handlevogn"))
                {
                    list.Add(new InStockItem {
                        Url = item.KomplettUrl, Name = item.Name, Count = 0, Channel = item.DiscordChannel, Store = "Komplett.no", ChannelId = item.DiscordChannelId
                    });
                }
            }

            return(list);
        }
Beispiel #13
0
        private async Task <bool> ProcessPage(string url)
        {
            HtmlWeb web = new HtmlWeb();

            HtmlDocument html = await web.LoadFromWebAsync(url);

            HtmlNode cap = html.DocumentNode.SelectSingleNode("/html/body/div[1]/div[2]/div/div/h6");

            if (cap?.InnerText == "Please solve the reCAPTCHA to continue:")
            {
                return(false);
            }

            HtmlNode headbar = html.DocumentNode.SelectSingleNode("//*[@id=\"headbar\"]");

            if (headbar != null)
            {
                string code  = headbar.GetDataAttribute("code").Value;
                string token = headbar.GetDataAttribute("token").Value;
                int    timer = int.Parse(headbar.GetDataAttribute("timer").Value);
                await Task.Delay(timer * 1000);

                Dictionary <string, string> parameters = new Dictionary <string, string>
                {
                    { "code", code },
                    { "token", token }
                };
                HttpContent content = new FormUrlEncodedContent(parameters);
                await httpClient.PostAsync(rewardUrl, content);
            }
            else
            {
                await Task.Delay(10000);
            }

            return(true);
        }
Beispiel #14
0
        public async Task <IList <Image> > DownloadMangaAsync(string url)
        {
            string mangaUrl = url.Contains("/manga/")
                ? url.Replace("/manga/", "/online/")
                : url;

            var web     = new HtmlWeb();
            var htmlDoc = await web.LoadFromWebAsync(mangaUrl);

            var content       = htmlDoc.GetElementbyId("content");
            var nodes         = content.Descendants("script");
            var contentScript = nodes.FirstOrDefault(x => x.InnerHtml.Contains("fullimg"));

            var imagesRegex = new Regex("\"fullimg\":\\[(?<images>[-a-zA-Z0-9/_\\.\",:]+)\\]");
            var match       = imagesRegex.Match(contentScript.InnerText);
            var imagesGroup = match.Groups["images"];

            var imageUrls = imagesGroup.Value
                            .Replace("\"", string.Empty)
                            .Split(',')
                            .Where(x => !string.IsNullOrEmpty(x));

            var images = new List <Image>();

            using (var httpClient = new HttpClient())
            {
                int i = 1;
                foreach (var imageUrl in imageUrls)
                {
                    var bytes = await httpClient.GetByteArrayAsync(imageUrl);

                    images.Add(new Image($"{i++}.jpg", bytes));
                }
            }

            return(images);
        }
        public async Task <WsprnetSpot[]> GetSpots(WsprnetBand band, WsprnetSortOrder sort, int numberOfSpots = 10000, string searchForCall = null, string showSpotsHeardBy = null, bool findUniqueCalls = false, bool findUniqueReporters = false)
        {
            var url     = $"http://wsprnet.org/olddb?mode=html&band={band}&limit={Math.Min(10000, numberOfSpots)}&findcall={searchForCall}&findreporter={showSpotsHeardBy}{(findUniqueCalls ? "&unique=on" : "")}{(findUniqueReporters ? "&uniquereporters=on" : "")}&sort={sort}";
            var htmlWeb = new HtmlWeb();

            if (!string.IsNullOrWhiteSpace(userAgent))
            {
                htmlWeb.UserAgent = userAgent;
            }

            var doc = await htmlWeb.LoadFromWebAsync(url);

            var rows = doc.DocumentNode.SelectNodes("/body[1]/table[3]/tr").Skip(2);

            var spots = new List <WsprnetSpot>();

            foreach (var row in rows)
            {
                var spot = new WsprnetSpot();
                spot.Timestamp        = DateTime.ParseExact(StripNbsp(row.ChildNodes[0].InnerText), "yyyy-MM-dd HH:mm", CultureInfo.InvariantCulture, DateTimeStyles.AdjustToUniversal);
                spot.Call             = StripNbsp(row.ChildNodes[1].InnerText);
                spot.Frequency        = (long)(double.Parse(StripNbsp(row.ChildNodes[2].InnerText)) * 1000000);
                spot.Snr              = int.Parse(StripNbsp(row.ChildNodes[3].InnerText));
                spot.Drift            = int.Parse(StripNbsp(row.ChildNodes[4].InnerText));
                spot.Grid             = StripNbsp(row.ChildNodes[5].InnerText);
                spot.Power            = int.Parse(StripNbsp(row.ChildNodes[6].InnerText));
                spot.ReporterCallsign = StripNbsp(row.ChildNodes[8].InnerText);
                spot.ReporterLocator  = StripNbsp(row.ChildNodes[9].InnerText);
                spot.Km   = int.Parse(StripNbsp(row.ChildNodes[10].InnerText));
                spot.Mode = StripNbsp(row.ChildNodes[12].InnerText);

                spots.Add(spot);
            }

            return(spots.ToArray());
        }
Beispiel #16
0
        public async Task Load()
        {
            //using var httpClient = new HttpClient();
            //var html = await httpClient.GetStringAsync("https://gist.githubusercontent.com/andrew-from-toronto/69b87a099237f207c23767b4c1531558/raw/74bc8742c763cf41583bf96c9318be6dd1d69af5/output.html");

            //var htmlDoc = new HtmlDocument();
            //htmlDoc.LoadHtml(html);

            var htmlWeb = new HtmlWeb();
            //var doc = htmlWeb.Load("https://gist.githubusercontent.com/andrew-from-toronto/69b87a099237f207c23767b4c1531558/raw/74bc8742c763cf41583bf96c9318be6dd1d69af5/output.html");
            var htmlDoc = await htmlWeb.LoadFromWebAsync("https://pharmaconnect.ca/Appointment/8ab18efb-b158-4ca1-8103-34792852814d/Slots?serviceType=ImmunizationCovid");

            var docNode = htmlDoc.DocumentNode;

            var days            = docNode.QuerySelectorAll(SELECTOR_APPOINTMENT_AVAILABILITY_DAY_ITEM).Select(x => x.Attributes[ATTRIBUTE_DATA_SELECTED_ID].Value).ToArray();
            var dayAppointments = (from day in days
                                   from appointment in docNode.QuerySelectorAll(string.Format(SELECTOR_APPOINTMENT_AVAILABILITY_TIME_ITEM, day))
                                   let dayParsed = DateTime.Parse(day)
                                                   select new
            {
                Day = dayParsed,
                time = appointment.InnerText
            }).ToArray();
        }
Beispiel #17
0
        static async Task <CoinsRate> getRshbRuCoin(string xpath, string acronim)
        {
            var site = @"https://www.rshb.ru/natural/coins/";

            HtmlWeb web = new HtmlWeb();

            var htmlDoc = await web.LoadFromWebAsync(site);

            var idLink = htmlDoc.DocumentNode.SelectSingleNode(xpath);

            var coinNode = idLink.ParentNode.ParentNode.ParentNode;

            var coinHtmlDoc = new HtmlDocument();

            coinHtmlDoc.LoadHtml("<div>" + coinNode.InnerHtml + "</div>");


            string buySelect  = "//span[@class='b-coins-items-item-cost-b']";
            string sellSelect = "//div[@class='b-coins-items-item-quotes-price  ']";

            string buyPrice  = coinHtmlDoc.DocumentNode.SelectSingleNode(buySelect).InnerText.Replace("Р", "");
            string sellPrice = coinHtmlDoc.DocumentNode.SelectSingleNode(sellSelect).InnerText.Replace("Р", "");


            CoinsRate coin = new CoinsRate()
            {
                Acronim = acronim,
                Date    = DateTime.Now,
                Site    = site
            };

            coin.Buy  = buyPrice.ParseToDoubleFormat();
            coin.Sell = sellPrice.ParseToDoubleFormat();

            return(coin);
        }
Beispiel #18
0
        public static async Task <ExtractedDocument> Run(
            [ActivityTrigger] string url,
            ILogger log)
        {
            var result = new ExtractedDocument(url);

            try
            {
                var doc = await Web.LoadFromWebAsync(url, Encoding.UTF8);

                var anchors = doc.DocumentNode.SelectNodes("//a[@href]");
                var sources = anchors
                              .Select(a => a.GetAttributeValue("href", string.Empty))
                              .Where(a => a.StartsWith("http"));

                result.ChildUrls = sources.ToList();
            }
            catch (Exception e)
            {
                log.LogError(e, $"Exception while processing {url}.");
            }

            return(result);
        }
Beispiel #19
0
        private async Task <IList <HnItem> > ScrapePageAsync(string uri, CancellationToken token)
        {
            var webGet  = new HtmlWeb();
            var htmlDoc = await webGet.LoadFromWebAsync(uri, token);

            var rows = htmlDoc.DocumentNode.CssSelect("table.itemlist tr.athing");

            var hnItems = new List <HnItem>();

            foreach (var row in rows)
            {
                var anchor         = row.CssSelect("td.title a").First();
                var anchorUri      = anchor.GetAttributeValue("href");
                var rank           = row.CssSelect("td.title span.rank").First();
                var trSubtext      = row.NextSibling;
                var tdSubtext      = trSubtext.CssSelect("td.subtext").First();
                var author         = tdSubtext.CssSelect("a.hnuser").FirstOrDefault();
                var points         = tdSubtext.CssSelect("span.score").FirstOrDefault();
                var subtextAnchors = trSubtext.CssSelect("td.subtext > a").ToList();

                var commentsAnchor = subtextAnchors.SingleOrDefault(x =>
                                                                    x.InnerText.Contains("comment", StringComparison.InvariantCultureIgnoreCase));

                var hnItem = HnItemBuilder.Build(
                    anchor?.InnerText,
                    anchorUri,
                    author?.InnerText,
                    ExtractPoints(points),
                    ExtractComments(commentsAnchor),
                    ExtractRank(rank));

                hnItems.Add(hnItem);
            }

            return(hnItems);
        }
Beispiel #20
0
        public async Task <string> DownloadPageAsync(string url)
        {
            int retries = 0;

            while (true)
            {
                try
                {
                    var doc = await _client.LoadFromWebAsync(url).ConfigureAwait(false);

                    var innerHtml = doc.DocumentNode.InnerHtml;

                    return(innerHtml);
                }
                catch (Exception ex)
                {
                    retries += 1;
                    Thread.Sleep(500 * retries);

                    _log.Log($"{ex.GetType().Name} occurred on retry {retries} loading {url}");
                    continue;
                }
            }
        }
Beispiel #21
0
        public async Task <string> ReadContent()
        {
            var webReader = new HtmlWeb
            {
                UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"
            };

            var dom = await webReader.LoadFromWebAsync(_config.WebsiteAddress, Encoding.UTF8);

            var contentNode = GetContentNode(dom);

            if (contentNode == null)
            {
                _logger.LogWarning(
                    "No content found at {Address} with {XPath}",
                    _config.WebsiteAddress,
                    _config.XPathSelector
                    );

                return(String.Empty);
            }

            return(_config.UseInnerHtml ? contentNode.InnerHtml : contentNode.WriteTo());
        }
Beispiel #22
0
        public async Task <HtmlDocument> Download(string path)
        {
            //this method downloads html from web or disc space
            this.Path = path;
            try
            {
                var doc = await web.LoadFromWebAsync(Path);

                if (doc != null)
                {
                    return(doc);
                }
            }
            catch (Exception e)
            {
                //Console.WriteLine(e);
            }

            try
            {
                var docf = new HtmlDocument();
                docf.Load(path);
                if (docf != null)
                {
                    return(docf);
                }
                return(null);
            }
            catch (Exception e)
            {
                //Console.WriteLine(e);
            }


            return(null);
        }
        public async Task <Mini> ParseFromUrl(Uri url)
        {
            HtmlWeb      web     = new HtmlWeb();
            HtmlDocument htmlDoc = await web.LoadFromWebAsync(url, null, null);

            HtmlNode nameNode  = htmlDoc.DocumentNode.SelectNodes("//meta[@property=\"og:title\"]").First();
            HtmlNode imageNode = htmlDoc.DocumentNode.SelectNodes("//meta[@property=\"og:image\"]").First();
            HtmlNode urlNode   = htmlDoc.DocumentNode.SelectNodes("//meta[@property=\"og:url\"]").First();
            HtmlNode costNode  = htmlDoc.DocumentNode.SelectNodes("//meta[@property=\"product:price:amount\"]").First();

            Uri link = new Uri(urlNode.GetAttributeValue("content", url.ToString()));

            string creatorName = link.Host.Split('.').First().Split('/').Last();

            Creator creator = new Creator
            {
                Name = creatorName
            };
            GumroadSource source = new GumroadSource(creator, creatorName);

            creator.Sites.Add(source);

            Mini mini = new Mini()
            {
                Creator   = creator,
                Name      = System.Web.HttpUtility.HtmlDecode(nameNode.GetAttributeValue("content", null)),
                Thumbnail = imageNode.GetAttributeValue("content", null),
                Link      = "https://gumroad.com" + link.AbsolutePath
            };

            mini.Cost = Convert.ToInt32(Math.Round(Convert.ToDouble(costNode.GetAttributeValue("content", "0"))));
            ;
            mini.Sources.Add(new MiniSourceSite(mini, source, url));

            return(mini);
        }
Beispiel #24
0
        public async static Task <HtmlDocument> LoadSiteAsync(string url)
        {
            bool         success    = false;
            int          retryCount = maxRetry;
            HtmlDocument document   = new HtmlDocument();

            while (!success && retryCount > 0)
            {
                try
                {
                    HtmlWeb web = new HtmlWeb();
                    document = await web.LoadFromWebAsync(url);

                    success = true;
                }
                catch (Exception ex)
                {
                    retryCount--;

                    using (StreamWriter logWriter = File.AppendText("logs.txt"))
                    {
                        string errorInfo = $"Error: [{ex.InnerException}] @ [{url}], [{ex.StackTrace}]";
                        logWriter.WriteLine(errorInfo);
                    }

                    Thread.Sleep(1000);

                    if (retryCount == 0)
                    {
                        throw;
                    }
                }
            }

            return(document);
        }
Beispiel #25
0
        public async Task <IList <HorribleSubsLink>?> Download(string name = "")
        {
            const string url = baseUrl + "/shows/";
            var          web = new HtmlWeb();
            var          doc = await web.LoadFromWebAsync(url).ConfigureAwait(false);

            var trackedShowList = doc.DocumentNode
                                  .SelectNodes("//*[contains(@class, 'ind-show')]" +
                                               "/a")
                                  .Select(node => new HorribleSubsAnime
            {
                Title = node.Attributes["title"].Value,
                Url   = baseUrl + node.Attributes["href"].Value
            })
                                  .Where(anime => anime.Title == name);

            if (!trackedShowList.Any())
            {
                return(null);
            }
            var trackedShow = trackedShowList.First();

            return(await GetDls(trackedShow.Url, name).ConfigureAwait(false));
        }
Beispiel #26
0
        public async Task ExecuteAsync()
        {
            var hotnewsUrls = new List <HotNewsJobItem <string> >
            {
                new HotNewsJobItem <string> {
                    Result = "https://www.cnblogs.com", Source = HotNewsEnum.cnblogs
                },
                new HotNewsJobItem <string> {
                    Result = "https://www.v2ex.com/?tab=hot", Source = HotNewsEnum.v2ex
                },
                new HotNewsJobItem <string> {
                    Result = "https://segmentfault.com/hottest", Source = HotNewsEnum.segmentfault
                },
                new HotNewsJobItem <string> {
                    Result = "https://web-api.juejin.im/query", Source = HotNewsEnum.juejin
                },
                new HotNewsJobItem <string> {
                    Result = "https://weixin.sogou.com", Source = HotNewsEnum.weixin
                },
                new HotNewsJobItem <string> {
                    Result = "https://www.douban.com/group/explore", Source = HotNewsEnum.douban
                },
                new HotNewsJobItem <string> {
                    Result = "https://www.ithome.com", Source = HotNewsEnum.ithome
                },
                new HotNewsJobItem <string> {
                    Result = "https://36kr.com/newsflashes", Source = HotNewsEnum.kr36
                },
                new HotNewsJobItem <string> {
                    Result = "http://tieba.baidu.com/hottopic/browse/topicList", Source = HotNewsEnum.tieba
                },
                new HotNewsJobItem <string> {
                    Result = "http://top.baidu.com/buzz?b=341", Source = HotNewsEnum.baidu
                },
                new HotNewsJobItem <string> {
                    Result = "https://s.weibo.com/top/summary/summary", Source = HotNewsEnum.weibo
                },
                new HotNewsJobItem <string> {
                    Result = "https://www.zhihu.com/api/v3/feed/topstory/hot-lists/total?limit=50&desktop=true", Source = HotNewsEnum.zhihu
                },
                new HotNewsJobItem <string> {
                    Result = "https://daily.zhihu.com", Source = HotNewsEnum.zhihudaily
                },
                new HotNewsJobItem <string> {
                    Result = "http://news.163.com/special/0001386F/rank_whole.html", Source = HotNewsEnum.news163
                },
                new HotNewsJobItem <string> {
                    Result = "https://github.com/trending", Source = HotNewsEnum.github
                },
                new HotNewsJobItem <string> {
                    Result = "https://www.iesdouyin.com/web/api/v2/hotsearch/billboard/word", Source = HotNewsEnum.douyin_hot
                },
                new HotNewsJobItem <string> {
                    Result = "https://www.iesdouyin.com/web/api/v2/hotsearch/billboard/aweme", Source = HotNewsEnum.douyin_video
                },
                new HotNewsJobItem <string> {
                    Result = "https://www.iesdouyin.com/web/api/v2/hotsearch/billboard/aweme/?type=positive", Source = HotNewsEnum.douyin_positive
                },
            };

            var web       = new HtmlWeb();
            var list_task = new List <Task <HotNewsJobItem <object> > >();

            hotnewsUrls.ForEach(item =>
            {
                var task = Task.Run(async() =>
                {
                    var obj = new object();

                    if (item.Source == HotNewsEnum.juejin)
                    {
                        using var client = _httpClient.CreateClient();
                        client.DefaultRequestHeaders.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.14 Safari/537.36 Edg/83.0.478.13");
                        client.DefaultRequestHeaders.Add("X-Agent", "Juejin/Web");
                        var data        = "{\"extensions\":{\"query\":{ \"id\":\"21207e9ddb1de777adeaca7a2fb38030\"}},\"operationName\":\"\",\"query\":\"\",\"variables\":{ \"first\":20,\"after\":\"\",\"order\":\"THREE_DAYS_HOTTEST\"}}";
                        var buffer      = data.SerializeUtf8();
                        var byteContent = new ByteArrayContent(buffer);
                        byteContent.Headers.ContentType = new MediaTypeHeaderValue("application/json");

                        var httpResponse = await client.PostAsync(item.Result, byteContent);
                        obj = await httpResponse.Content.ReadAsStringAsync();
                    }
                    else
                    {
                        // 针对GBK、GB2312编码网页,注册提供程序,否则获取到的数据乱码
                        Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
                        obj = await web.LoadFromWebAsync(item.Result, (item.Source == HotNewsEnum.baidu || item.Source == HotNewsEnum.news163) ? Encoding.GetEncoding("GB2312") : Encoding.UTF8);
                    }

                    return(new HotNewsJobItem <object>
                    {
                        Result = obj,
                        Source = item.Source
                    });
                });
                list_task.Add(task);
            });
            Task.WaitAll(list_task.ToArray());

            var hotNews = new List <HotNews>();

            foreach (var list in list_task)
            {
                var item     = await list;
                var sourceId = (int)item.Source;

                // 博客园
                if (item.Source == HotNewsEnum.cnblogs)
                {
                    var nodes = ((HtmlDocument)item.Result).DocumentNode.SelectNodes("//div[@class='post_item_body']/h3/a").ToList();
                    nodes.ForEach(x =>
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = x.InnerText,
                            Url        = x.GetAttributeValue("href", ""),
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    });
                }

                //// V2EX
                //if (item.Source == HotNewsEnum.v2ex)
                //{
                //    var nodes = ((HtmlDocument)item.Result).DocumentNode.SelectNodes("//span[@class='item_title']/a").ToList();
                //    nodes.ForEach(x =>
                //    {
                //        hotNews.Add(new HotNews
                //        {
                //            Title = x.InnerText,
                //            Url = $"https://www.v2ex.com{x.GetAttributeValue("href", "")}",
                //            SourceId = sourceId,
                //            CreateTime = DateTime.Now
                //        });
                //    });
                //}

                // SegmentFault
                if (item.Source == HotNewsEnum.segmentfault)
                {
                    var nodes = ((HtmlDocument)item.Result).DocumentNode.SelectNodes("//div[@class='news__item-info clearfix']/a").Where(x => x.InnerText.IsNotNullOrEmpty()).ToList();
                    nodes.ForEach(x =>
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = x.SelectSingleNode(".//h4").InnerText,
                            Url        = $"https://segmentfault.com{x.GetAttributeValue("href", "")}",
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    });
                }

                // 掘金
                if (item.Source == HotNewsEnum.juejin)
                {
                    var obj   = JObject.Parse((string)item.Result);
                    var nodes = obj["data"]["articleFeed"]["items"]["edges"];
                    foreach (var node in nodes)
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = node["node"]["title"].ToString(),
                            Url        = node["node"]["originalUrl"].ToString(),
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    }
                }

                // 微信热门
                if (item.Source == HotNewsEnum.weixin)
                {
                    var nodes = ((HtmlDocument)item.Result).DocumentNode.SelectNodes("//ul[@class='news-list']/li/div[@class='txt-box']/h3/a").ToList();
                    nodes.ForEach(x =>
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = x.InnerText,
                            Url        = x.GetAttributeValue("href", ""),
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    });
                }

                // 豆瓣精选
                if (item.Source == HotNewsEnum.douban)
                {
                    var nodes = ((HtmlDocument)item.Result).DocumentNode.SelectNodes("//div[@class='channel-item']/div[@class='bd']/h3/a").ToList();
                    nodes.ForEach(x =>
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = x.InnerText,
                            Url        = x.GetAttributeValue("href", ""),
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    });
                }

                // IT之家
                if (item.Source == HotNewsEnum.ithome)
                {
                    var nodes = ((HtmlDocument)item.Result).DocumentNode.SelectNodes("//div[@class='lst lst-2 hot-list']/div[1]/ul/li/a").ToList();
                    nodes.ForEach(x =>
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = x.InnerText,
                            Url        = x.GetAttributeValue("href", ""),
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    });
                }

                // 36氪
                if (item.Source == HotNewsEnum.kr36)
                {
                    var nodes = ((HtmlDocument)item.Result).DocumentNode.SelectNodes("//div[@class='hotlist-main']/div[@class='hotlist-item-toptwo']/a[2]|//div[@class='hotlist-main']/div[@class='hotlist-item-other clearfloat']/div[@class='hotlist-item-other-info']/a").ToList();
                    nodes.ForEach(x =>
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = x.InnerText,
                            Url        = $"https://36kr.com{x.GetAttributeValue("href", "")}",
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    });
                }

                // 百度贴吧
                if (item.Source == HotNewsEnum.tieba)
                {
                    var obj   = JObject.Parse(((HtmlDocument)item.Result).ParsedText);
                    var nodes = obj["data"]["bang_topic"]["topic_list"];
                    foreach (var node in nodes)
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = node["topic_name"].ToString(),
                            Url        = node["topic_url"].ToString().Replace("amp;", ""),
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    }
                }

                // 百度热搜
                if (item.Source == HotNewsEnum.baidu)
                {
                    var nodes = ((HtmlDocument)item.Result).DocumentNode.SelectNodes("//table[@class='list-table']//tr/td[@class='keyword']/a[@class='list-title']").ToList();
                    nodes.ForEach(x =>
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = x.InnerText,
                            Url        = x.GetAttributeValue("href", ""),
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    });
                }

                // 微博热搜
                if (item.Source == HotNewsEnum.weibo)
                {
                    var nodes = ((HtmlDocument)item.Result).DocumentNode.SelectNodes("//table/tbody/tr/td[2]/a").ToList();
                    nodes.ForEach(x =>
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = x.InnerText,
                            Url        = $"https://s.weibo.com{x.GetAttributeValue("href", "").Replace("#", "%23")}",
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    });
                }

                // 知乎热榜
                if (item.Source == HotNewsEnum.zhihu)
                {
                    var obj   = JObject.Parse(((HtmlDocument)item.Result).ParsedText);
                    var nodes = obj["data"];
                    foreach (var node in nodes)
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = node["target"]["title"].ToString(),
                            Url        = $"https://www.zhihu.com/question/{node["target"]["id"]}",
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    }
                }

                // 知乎日报
                if (item.Source == HotNewsEnum.zhihudaily)
                {
                    var nodes = ((HtmlDocument)item.Result).DocumentNode.SelectNodes("//div[@class='box']/a").ToList();
                    nodes.ForEach(x =>
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = x.InnerText,
                            Url        = $"https://daily.zhihu.com{x.GetAttributeValue("href", "")}",
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    });
                }

                // 网易新闻
                if (item.Source == HotNewsEnum.news163)
                {
                    var nodes = ((HtmlDocument)item.Result).DocumentNode.SelectNodes("//div[@class='area-half left']/div[@class='tabBox']/div[@class='tabContents active']/table//tr/td[1]/a").ToList();
                    nodes.ForEach(x =>
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = x.InnerText,
                            Url        = x.GetAttributeValue("href", ""),
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    });
                }

                // GitHub
                if (item.Source == HotNewsEnum.github)
                {
                    var nodes = ((HtmlDocument)item.Result).DocumentNode.SelectNodes("//article[@class='Box-row']/h1/a").ToList();
                    nodes.ForEach(x =>
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = x.InnerText.Trim().Replace("\n", "").Replace(" ", ""),
                            Url        = $"https://github.com{x.GetAttributeValue("href", "")}",
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    });
                }

                // 抖音热点
                if (item.Source == HotNewsEnum.douyin_hot)
                {
                    var obj   = JObject.Parse(((HtmlDocument)item.Result).ParsedText);
                    var nodes = obj["word_list"];
                    foreach (var node in nodes)
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = node["word"].ToString(),
                            Url        = $"#{node["hot_value"]}",
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    }
                }

                // 抖音视频 & 抖音正能量
                if (item.Source == HotNewsEnum.douyin_video || item.Source == HotNewsEnum.douyin_positive)
                {
                    var obj   = JObject.Parse(((HtmlDocument)item.Result).ParsedText);
                    var nodes = obj["aweme_list"];
                    foreach (var node in nodes)
                    {
                        hotNews.Add(new HotNews
                        {
                            Title      = node["aweme_info"]["desc"].ToString(),
                            Url        = node["aweme_info"]["share_url"].ToString(),
                            SourceId   = sourceId,
                            CreateTime = DateTime.Now
                        });
                    }
                }
            }

            if (hotNews.Any())
            {
                await _hotNewsRepository.DeleteAsync(x => true);

                await _hotNewsRepository.BulkInsertAsync(hotNews);
            }

            // 发送Email
            var message = new MimeMessage
            {
                Subject = "【定时任务】每日热点数据抓取任务推送",
                Body    = new BodyBuilder
                {
                    HtmlBody = $"本次抓取到{hotNews.Count()}条数据,时间:{DateTime.Now:yyyy-MM-dd HH:mm:ss}"
                }.ToMessageBody()
Beispiel #27
0
        public async Task <IActionResult> GetAllData()
        {
            var playerName      = "Fatso";
            var region          = "Northrend";
            var pageNo          = 1;
            var gameDetailsList = new List <GameDetail>();

            var url = "http://classic.battle.net/war3/ladder/w3xp-player-logged-games.aspx?Gateway=" + region + "&PlayerName=" + playerName + "&SortField=Game_Date&SortDir=Asc&PageNo=" + pageNo;

            var web = new HtmlWeb();
            var doc = await web.LoadFromWebAsync(url);

            if (doc == null)
            {
                return(NotFound("Could not get data from the page."));
            }

            var maxPageString = doc.DocumentNode.SelectNodes("//td[@class='rankingFiller']//a").Last().InnerText.Trim(' ');
            var maxPageNumber = Convert.ToInt32(maxPageString);

            for (int p = 1; p <= maxPageNumber; p++)
            {
                url = "http://classic.battle.net/war3/ladder/w3xp-player-logged-games.aspx?Gateway=" + region + "&PlayerName=" + playerName + "&SortField=Game_Date&SortDir=Asc&PageNo=" + p;

                web = new HtmlWeb();
                doc = await web.LoadFromWebAsync(url);

                if (doc == null)
                {
                    return(NotFound("Could not get data from the page."));
                }

                var htmlRankingRankingRowDataAll = doc.DocumentNode.SelectNodes("//tr[@class='rankingRow']//td");
                if (htmlRankingRankingRowDataAll == null)
                {
                    return(NotFound("Could not get data from the page."));
                }

                var rankingRowList = new List <string>();
                foreach (var item in htmlRankingRankingRowDataAll)
                {
                    rankingRowList.Add(item.InnerText.Replace("\r", "").Replace("\n", "").Replace("\t", ""));
                }

                for (int i = 0; i < rankingRowList.Count; i += 11)
                {
                    var gameDetail = new GameDetail()
                    {
                        Id         = p * 100 + (i / 11),
                        Date       = rankingRowList[i + 1],
                        GameType   = rankingRowList[i + 2],
                        Map        = rankingRowList[i + 3],
                        Allies     = rankingRowList[i + 6],
                        Opponents  = rankingRowList[i + 8],
                        GameLength = rankingRowList[i + 9],
                        Result     = rankingRowList[i + 10],
                    };
                    gameDetailsList.Add(gameDetail);
                }
            }

            // Getting all games that I want here with the info I want now I need to store it in DB
            // Maybe show it in a model of some kind at the page instead not sure.
            return(Json(gameDetailsList));
        }
Beispiel #28
0
        private async void TimerCallback(object sender, ElapsedEventArgs args)
        {
            var timeoutCancellationTokenSource = new CancellationTokenSource(_configuration.PollingInterval * 5);
            CancellationTokenSource cancellationTokenSource;

            if (_cancellationTokenSource != null)
            {
                cancellationTokenSource = CancellationTokenSource.CreateLinkedTokenSource(
                    _cancellationTokenSource.Token,
                    timeoutCancellationTokenSource.Token);
            }
            else
            {
                cancellationTokenSource = timeoutCancellationTokenSource;
            }

            NowPlayingInfo nowPlayingInfo = null;

            try
            {
                var web = new HtmlWeb();
                var doc = await web.LoadFromWebAsync(
                    _configuration.SourceUrl,
                    Encoding.UTF8,
                    cancellationTokenSource.Token);

                var infoDiv = doc.DocumentNode.FirstDescendantWithClass("div", "card horizontal");
                if (infoDiv != null)
                {
                    var imgUrl = infoDiv.Descendants("img").FirstOrDefault()?.GetAttributeValue("src", default(string));

                    var content = infoDiv.FirstDescendantWithClass("div", "card-content")?.InnerText;
                    if (content != null)
                    {
                        var lines = content
                                    .Split(new[] { "\n", "\r\n" }, StringSplitOptions.RemoveEmptyEntries)
                                    .Select(x => x.Trim())
                                    .Take(2)
                                    .ToList();

                        var song   = lines[0];
                        var artist = lines[1];
                        nowPlayingInfo = new NowPlayingInfo(artist, song, imgUrl);
                    }
                }
            }
            catch (TaskCanceledException)
            {
                // Ignore - these are expected during timeouts or while quitting
            }
            catch (Exception ex)
            {
                _logger?.Log(ex);
            }
            finally
            {
                timeoutCancellationTokenSource.Dispose();
                cancellationTokenSource.Dispose();
            }
            OnNowPlayingChanged(nowPlayingInfo);
        }
Beispiel #29
0
        public static async Task <F95ZoneGame> LoadGame(string url, ILogger logger)
        {
            var web      = new HtmlWeb();
            var document = await web.LoadFromWebAsync(url);

            if (document == null)
            {
                return(null);
            }

            var game = new F95ZoneGame
            {
                F95Link = url
            };

            var node     = document.DocumentNode;
            var bodyNode = node.SelectSingleNode("//div[@class='uix_contentWrapper']/div[@class='p-body-main  ']/div[@class='p-body-content']");

            if (bodyNode.IsNull(logger, "Body", url))
            {
                return(null);
            }

            var headerNode =
                bodyNode.SelectSingleNode(
                    "//div[@class='pageContent']/div[@class='uix_headerInner']");

            if (headerNode.IsNull(logger, "Header", url))
            {
                return(null);
            }

            var labels = headerNode.SelectNodes("div[@class='p-title ']/h1[@class='p-title-value']/a[@class='labelLink']");

            if (!labels.IsNullOrEmpty(logger, "Labels", url))
            {
                game.LabelList = labels.Select(x =>
                                               !x.TryGetInnerText("span", logger, "Label", url, out var label)
                        ? null
                        : label)
                                 .NotNull().ToList();
            }

            if (headerNode.TryGetInnerText(
                    "div[@class='p-title ']/h1[@class='p-title-value']",
                    logger, "Title", url, out var id))
            {
                if (game.LabelList == null)
                {
                    game.Name = id;
                }
                else
                {
                    game.LabelList = game.LabelList.Select(label =>
                    {
                        if (id.Contains(label))
                        {
                            id = id.Replace(label, "");
                        }

                        if (label.StartsWith("["))
                        {
                            label = label.Substring(1, label.Length - 1);
                        }

                        if (label.EndsWith("]"))
                        {
                            label = label.Substring(0, label.Length - 1);
                        }

                        return(label);
                    }).ToList();

                    id = id.Trim();

                    var lastStartingBracket = id.LastIndexOf('[');
                    var lastClosingBracket  = id.LastIndexOf(']');

                    if (lastStartingBracket != -1 && lastClosingBracket != -1)
                    {
                        var dev = id.Substring(lastStartingBracket + 1, lastClosingBracket - lastStartingBracket - 1);
                        game.Developer = dev;
                    }

                    id        = id.Substring(0, lastStartingBracket).Trim();
                    game.Name = id;
                }
            }
            else
            {
                return(null);
            }

            var tags = headerNode.SelectNodes(
                "div[@class='p-description']/ul/li[@class='groupedTags']/a[@class='tagItem']");

            if (!tags.IsNullOrEmpty(logger, "Tags", id))
            {
                game.Genres = tags.Select(x =>
                {
                    var innerText = x.DecodeInnerText();
                    return(innerText.IsEmpty(logger, "Tag", id)
                        ? null
                        : innerText);

                    /*var ti = new CultureInfo("en-US").TextInfo;
                     * if (innerText.IsEmpty(logger, "Tag", id))
                     *  return null;
                     *
                     * if (innerText == "2dcg")
                     *  return "2DCG";
                     * if (innerText == "3dcg")
                     *  return "3DCG";
                     *
                     * return ti.ToTitleCase(innerText);*/
                }).NotNull().ToList();
            }

            var contentNode = bodyNode.SelectSingleNode("//div[@class='message-inner']/div[@class='message-cell message-cell--main']/div[@class='message-main uix_messageContent js-quickEditTarget']/div/div/article[@class='message-body js-selectToQuote']/div[@class='bbWrapper']");

            if (contentNode.IsNull(logger, "Content", id))
            {
                return(null);
            }

            var topNode = contentNode.SelectSingleNode("div");

            if (!topNode.IsNull(logger, "Top", id))
            {
                var coverImageNode = topNode.SelectSingleNode("a");
                if (!coverImageNode.IsNull(logger, "Cover Image", id))
                {
                    var href = coverImageNode.GetValue("href");
                    if (!href.IsEmpty(logger, "Cover Image", id))
                    {
                        game.CoverImageURL = href;
                    }
                }

                topNode.RemoveChild(coverImageNode);
                game.Overview = HttpUtility.HtmlDecode(topNode.InnerHtml);
            }

            var previewImages = contentNode.SelectNodes("//img[@class='bbImage ']");

            if (!previewImages.IsNullOrEmpty(logger, "Preview Images", id))
            {
                game.PreviewImageURLs = previewImages.Select(x =>
                {
                    var a    = x.ParentNode;
                    var href = a.GetValue("href");
                    return(href.IsEmpty(logger, "Preview Image href", id)
                        ? null
                        : href);
                }).NotNull().ToList();
            }

            return(game);
        }
Beispiel #30
0
        private async void GetSubtitle()
        {
            progress.IsActive   = true;
            listView.Visibility = Visibility.Collapsed;
            CloseError();
            if (GeneralHelper.IsNetworkAvailable())
            {
                try
                {
                    var web = new HtmlWeb();
                    var doc = await web.LoadFromWebAsync(subtitleUrl);

                    var items = doc.DocumentNode.SelectNodes(@"//div[@id='new-link']/ul/li");
                    if (items == null)
                    {
                        ShowError(Constants.NotFoundOrExist);
                    }
                    else
                    {
                        Subtitles?.Clear();
                        foreach (var node in items)
                        {
                            var displayName = node.SelectSingleNode(".//div[@class='new-link-1']").InnerText;
                            var status      = node.SelectSingleNode(".//div[@class='new-link-2']").InnerText;
                            var link        = node.SelectSingleNode(".//a")?.Attributes["href"]?.Value;

                            if (status.Contains("&nbsp;"))
                            {
                                status = status.Replace("&nbsp;", "");
                            }

                            displayName = displayName.Trim() + " - " + status.Trim();

                            var item = new DownloadModel
                            {
                                DisplayName  = displayName,
                                DownloadLink = link
                            };
                            Subtitles.Add(item);
                        }
                    }
                    progress.IsActive   = false;
                    listView.Visibility = Visibility.Visible;
                }
                catch (ArgumentOutOfRangeException)
                {
                }
                catch (ArgumentNullException)
                {
                }
                catch (NullReferenceException)
                {
                }
                catch (WebException ex)
                {
                    if (!string.IsNullOrEmpty(ex.Message))
                    {
                        ShowError(ex.Message);
                    }
                }
                catch (HttpRequestException hx)
                {
                    if (!string.IsNullOrEmpty(hx.Message))
                    {
                        ShowError(hx.Message);
                    }
                }
                finally
                {
                    progress.IsActive   = false;
                    listView.Visibility = Visibility.Visible;
                }
            }
            else
            {
                ShowError(Constants.InternetIsNotAvailable, Constants.InternetIsNotAvailableTitle);
            }
        }
Beispiel #31
0
 public async void TestHtmlWebBasicCall()
 {
     var html = new HtmlWeb();
     var doc = await html.LoadFromWebAsync("http://www.google.com");
     Assert.IsNotNull(doc);
 }