/// <summary> /// Método para generar el DataTable a partir de una tabla HTML. /// </summary> /// <param name="infoHTML"></param> /// <returns></returns> public DataTable ObtenerDatosTablaHTML(string infoHTML) { HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(infoHTML); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//table//tr"); DataTable table = new DataTable("ReporteTablaHtml"); IEnumerable <string> headers = nodes[0] .Elements("th") .Select(th => th.InnerText.Trim()); foreach (var header in headers) { table.Columns.Add(header.ToUpper()); } var rows = nodes.Skip(1).Select(tr => tr .Elements("td") .Select(td => td.InnerText.Trim()) .ToArray()); foreach (var row in rows) { table.Rows.Add(row); } return(table); }
/******************************************************************/ public void addrows_from_nodes(HtmlNodeCollection n) { int shift = 0; /* если таблица разделена на части, то наименование части * (например имя площадки в сделках или тип репо в табличке с репо) * помещаем в дополнительную колонку в начало массива row * по этой причине к самому массиву применяем сдвиг, когда конвертируем значения в тип, определенный в колонках */ String[] add_row = { }; /*массив, который будет добавлен в начало массива row, если определим, что таблица раздалена на части*/ // получаем все строки, кроме заголовков var rows = n.Skip(this.skip).Select(tr => tr.Elements("td").Select(td => td.InnerText.Trim()).ToArray()); foreach (string[] row in rows) { // все Итого и пустые строки игнорируем if (!row[0].Contains("Итого") && !row[0].Contains("Общий итог") && !(row.Length == 1 && row[0] == "")) { /* предположительно, если строка состоит из одного элемента, * то это подзаголовок и нужно сформировать из этого значения дополнительную колонку */ if (row.Length == 1) { add_row = new string[] { row[0].Replace("Площадка:", "").Trim() }; shift = 1; continue; } // конвертация строки в double / datetime for (int i = 0; i <= table.Columns.Count - 1; i++) { if (table.Columns[i].DataType == System.Type.GetType("System.Double")) { if (row[i - shift] == "") { row[i - shift] = "0"; } // если должно быть число, то "пусто" заменяем на 0 row[i - shift] = row[i - shift].Replace(" ", ""); // если число разделено пробелами, убираем его } if (table.Columns[i].DataType == System.Type.GetType("System.DateTime")) { if (row[i - shift] == "") { row[i - shift] = "01.01.1970"; } // если должна быть дата, то "пусто" заменяем на '1970-01-01' } } // либо добавляем новую колонку в начало строки, либо нет if (shift != 0) { table.Rows.Add(add_row.Concat(row).ToArray()); } else { table.Rows.Add(row); } } } }
public async Task <IEnumerable <BankCurrencies> > Parse() { var banksCurrencies = new List <BankCurrencies>(); var url = Banks.MyFinURL; var web = new HtmlWeb(); HtmlDocument doc; try { doc = await web.LoadFromWebAsync(url); HtmlNodeCollection bankNodes = doc.DocumentNode.SelectNodes(Banks.Path); foreach (HtmlNode node in bankNodes.Skip(5)) { var info = node.ParentNode.ParentNode.Descendants("td"); string bankCurrenciesString = ""; foreach (var i in info) { bankCurrenciesString += i.InnerText.Trim(); bankCurrenciesString += Banks.Separator; } var bankCurrencies = BankCurrencies.Parse(bankCurrenciesString); banksCurrencies.Add(bankCurrencies); } } catch (Exception e) { Console.WriteLine(Banks.ErrorMessage, e.Message); } return(banksCurrencies); }
private List <TWISINOffclCode> GetISINList(HtmlNode rootNode) { var offclCodeISINList = new List <TWISINOffclCode>(); HtmlNodeCollection collection = rootNode.SelectNodes("//tr"); string kindOfOption = ""; foreach (var node in collection.Skip(1)) { if (node.SelectSingleNode("./td").Attributes["colspan"] != null) { kindOfOption = MiscUtil.GetCleanTextFromHtml(node.InnerText); continue; } var ISINOffclCode = new TWISINOffclCode(); HtmlNodeCollection tempCollection = node.SelectNodes("./td"); int index = tempCollection[0].InnerText.IndexOf(" "); if (index < 0) { index = tempCollection[0].InnerText.IndexOf(" "); } ISINOffclCode.OffclCode = tempCollection[0].InnerText.Substring(0, index); ISINOffclCode.ISIN = tempCollection[1].InnerText; ISINOffclCode.FutureOptionType = kindOfOption; offclCodeISINList.Add(ISINOffclCode); } return(offclCodeISINList); }
private List <WftdaRankingData> GetWftdaRankingsData() { HtmlWeb web = new HtmlWeb(); HtmlDocument doc = web.Load(s_wftdaUrl); // get the first table from the page HtmlNode containingDiv = doc.DocumentNode.SelectSingleNode("//div[@id='pageContent']/div[@class='markdown'][1]"); DateTime rankingsDate = Convert.ToDateTime(containingDiv.SelectSingleNode("//h2[1]").InnerText); HtmlNodeCollection rows = containingDiv.SelectNodes("//table[1]/tbody/tr"); return(ProcessWftdaRows(rows.Skip(1))); }
public List <MovieMagnet> SearchMagnetFromSukebei(Movie movie) { string javbusUrl = string.Format(_movieMagnetService.LoadMagSourceUrl(MagnetSource.Sukebei), movie.Number); HtmlDocument htmlDocument = _htmlService.GetHtmlDocumentAsync(javbusUrl, maxRetry: 1).Result; List <MovieMagnet> lstMovieMagnets = new List <MovieMagnet>(); if (htmlDocument != null) { string xpath = "//tr"; HtmlNodeCollection nodes = htmlDocument.DocumentNode.SelectNodes(xpath); if (nodes != null && nodes.Count > 1) { foreach (var node in nodes.Skip(1)) { MovieMagnet movieMagnet = new MovieMagnet() { IdMovie = movie.IdMovie, MovieNumber = movie.Number, IdMagSource = MagnetSource.Sukebei }; movieMagnet.MagName = node.ChildNodes[3].InnerText.Trim(); var magHref = node.ChildNodes[5].OuterHtml; var size = node.ChildNodes[7].InnerText.Trim(); movieMagnet.Size = size.GetByteSize(); DateTime dtMag = DateTime.MinValue; if (!string.IsNullOrEmpty(node.ChildNodes[9].InnerText) && DateTime.TryParse(node.ChildNodes[9].InnerText.Trim(), out dtMag)) { movieMagnet.DtMagnet = dtMag; } var url = magHref.Substring(magHref.IndexOf("<a href=\"magnet:?xt") + 9); movieMagnet.MagnetUrl = url.Substring(0, url.IndexOf("\"")); movieMagnet.GenerateHash(); if (movieMagnet.MagName.Contains("高清") || movieMagnet.MagName.ToLower().Contains("hd") || movieMagnet.Size > 2048) { movieMagnet.IsHD = true; } if ((movieMagnet.MagName.Contains("字幕") || movieMagnet.MagName.Contains("中文")) && movieMagnet.IsHD) { movieMagnet.HasSub = true; } lstMovieMagnets.Add(movieMagnet); } } } return(lstMovieMagnets); }
private static void PopulateRows(HtmlNodeCollection nodes, DataTable table) { var rows = nodes.Skip(1).Select(tr => tr .Elements("td") .Where(x => x.Attributes["class"].Value.Contains("guide")) .Select(td => td.InnerText.Trim()) .ToArray()); foreach (var row in rows) { table.Rows.Add(row); } }
private static PriceInfo[] ParseNextPriceInfos(HtmlNode node) { HtmlNodeCollection nodes = node.SelectNodes(@".//li"); IList <PriceInfo> priceInfos = new List <PriceInfo>(); foreach (HtmlNode oneNode in nodes.Skip(1)) { // MatchCollection matches = Regex.Matches(oneNode.InnerText.Trim(), @"(\d+)"); // Single minAge = Convert.ToSingle(matches[0].Value); // Single maxAge = Convert.ToSingle(matches[1].Value); string[] ss = oneNode.InnerText.Split(new[] { @" - " }, StringSplitOptions.None); PriceInfo priceInfo = new PriceInfo(); if (ss.Length > 0) { priceInfo.Size = ss[0]; if (ss.Length >= 3) { priceInfo.Price = Convert.ToDecimal(ss[1]); priceInfo.Stock = ss[2]; } else { if (ss.Length == 2) { decimal price = 0; if (!Decimal.TryParse(ss[1], out price)) { // Get the price from another html node. HtmlNode priceNode = node.SelectSingleNode(@"//div[@class=""nowPrice""]"); string priceString = priceNode.InnerText.Trim(); priceInfo.Price = Convert.ToDecimal(new string(priceString.Where(x => Char.IsDigit(x)).ToArray())); priceInfo.Stock = ss[1]; } else { priceInfo.Price = price; priceInfo.Stock = @"In stock"; } } } priceInfo.PriceCN = GBP2RMB(priceInfo.Price); priceInfos.Add(priceInfo); } } return(priceInfos.ToArray()); }
public IEnumerable <IDomainModel> ParseNodes(HtmlNodeCollection nodes) { List <AgriculturalProducts> lst = new List <AgriculturalProducts>(); if (nodes != null && nodes.Any()) { for (int startIndex = 8, arrLength = 8; 0 < nodes.Count - startIndex; startIndex += arrLength) { var row = nodes.Skip(startIndex).Take(arrLength).ToArray(); AgriculturalProducts entity = new AgriculturalProducts(); entity.LowPrice = row[1].InnerText; entity.AveragePrice = row[2].InnerText; entity.HighPrice = row[3].InnerText; entity.Category = row[4].InnerText; entity.Unit = row[5].InnerText; entity.CreateTime = DateTime.Parse(row[6].InnerText); entity.ProductName = row[0].InnerText; lst.Add(entity); } } return(lst); }
public static IEnumerable <StationInLine> QueryBusInfo(string BusGuid) { try { var s = DownloadString(LineUrl + "&LineGuid=" + BusGuid); HtmlNodeCollection tr = GetTable(s); var list = new List <StationInLine>(); foreach (HtmlNode node in tr.Skip(1)) { try { var entry = new StationInLine(node); list.Add(entry); } catch (ArgumentException e) { continue; } } return(list); } catch (WebException e) { return(new List <StationInLine>()); } }
public MangaObject ParseMangaObject(String content) { Int32 MangaInformationContentStart = content.IndexOf("<!-- Intro Series -->"), MangaInformationContentEnd = content.IndexOf("<!-- **END: Intro Series -->", MangaInformationContentStart); String MangaInformationContent = content.Substring(MangaInformationContentStart, MangaInformationContentEnd - MangaInformationContentStart); Int32 MangaChaptersContentStart = content.IndexOf("<!-- Main Content -->"), MangaChaptersContentEnd = content.IndexOf("<!-- **END: Main Content -->", MangaChaptersContentStart); String MangaChaptersContent = content.Substring(MangaChaptersContentStart, MangaChaptersContentEnd - MangaChaptersContentStart); HtmlDocument MangaObjectDocument = new HtmlDocument(); MangaObjectDocument.LoadHtml(MangaInformationContent); HtmlNode MangaObjectNode = MangaObjectDocument.DocumentNode.SelectSingleNode(".//div/div"); String MangaName = String.Empty, Description = String.Empty; List <String> AlternateNames = new List <String>(), AuthorsArtists = new List <String>(), Genres = new List <String>(); foreach (HtmlNode DetailNode in MangaObjectNode.SelectNodes(".//div[2]/div[contains(@class,'row')]")) { HtmlNode DetailTypeNode = DetailNode.SelectSingleNode(".//div[1]/b[1] | .//div[1]/strong[1]"), DetailTextNode = (DetailTypeNode != null) ? DetailTypeNode.NextSibling : null, DetailDescriptionNode = (DetailTextNode != null) ? DetailTextNode.NextSibling : null, MangaNameNode = DetailNode.SelectSingleNode(".//div[1]/h1"); HtmlNodeCollection DetailLinkNodes = DetailNode.SelectNodes(".//div[1]/a"); String DetailType = (DetailTypeNode != null) ? DetailTypeNode.InnerText.Trim().TrimEnd(':') : "MangaName", DetailValue = String.Empty; String[] DetailValues = { }; if (DetailLinkNodes != null) { DetailValues = (from HtmlNode LinkNode in DetailLinkNodes select HtmlEntity.DeEntitize(LinkNode.InnerText.Trim())).ToArray(); } else if (MangaNameNode != null) { DetailValue = HtmlEntity.DeEntitize(MangaNameNode.InnerText.Trim()); } else if (DetailDescriptionNode != null) { DetailValue = HtmlEntity.DeEntitize(DetailDescriptionNode.InnerText.Trim()); } else if (DetailTextNode != null) { DetailValue = HtmlEntity.DeEntitize(DetailTextNode.InnerText.Trim()); } switch (DetailType) { default: break; case "MangaName": MangaName = DetailValue; break; case "Alternate Names": AlternateNames = (from String AltName in DetailValue.Split(',') select HtmlEntity.DeEntitize(AltName.Trim())).ToList(); break; case "Author": AuthorsArtists = DetailValues.ToList(); break; case "Genre": Genres = DetailValues.ToList(); break; case "Description": Description = DetailValue; break; } } String Cover = ExtensionDescriptionAttribute.RootUrl + MangaObjectNode.SelectSingleNode(".//div[1]/img/@src").Attributes["src"].Value; List <ChapterObject> Chapters = new List <ChapterObject>(); MangaObjectDocument.LoadHtml(MangaChaptersContent); HtmlNodeCollection RawChapterList = MangaObjectDocument.DocumentNode.SelectNodes(".//div[contains(@class,'row')]"); foreach (HtmlNode RawChapterNode in RawChapterList.Skip(1)) { HtmlNode ChapterNumberNode = RawChapterNode.SelectSingleNode(".//div[1]/a"), ReleaseDate = RawChapterNode.SelectSingleNode(".//div[2]/time"); String ChapterNumber = Regex.Match(ChapterNumberNode.InnerText, @"\d+(\.\d+)?").Value; String[] ChapterSub = ChapterNumber.Trim().Split('.'); DateTime Released = DateTime.Now; String ReleasedTxt = ReleaseDate.InnerText.ToLower(); if (ReleasedTxt.StartsWith("today")) { Released = DateTime.Today; } else if (ReleasedTxt.StartsWith("yesterday")) { Released = DateTime.Today.AddDays(-1); } else if (ReleasedTxt.EndsWith("ago")) { Int32 tDelta = 0; Int32.TryParse(ReleasedTxt.Split(' ')[0], out tDelta); if (ReleasedTxt.Contains("hours")) { Released = Released.AddHours(0 - tDelta); } if (ReleasedTxt.Contains("minutes")) { Released = Released.AddMinutes(0 - tDelta); } if (ReleasedTxt.Contains("seconds")) { Released = Released.AddSeconds(0 - tDelta); } } else { Released = DateTime.ParseExact(ReleasedTxt, "MM/dd/yyyy", CultureInfo.InvariantCulture); } ChapterObject Chapter = new ChapterObject() { Name = HtmlEntity.DeEntitize(RawChapterNode.SelectSingleNode(".//div[1]/gray").InnerText), Chapter = UInt32.Parse(ChapterSub[0]), Locations = { new LocationObject() { ExtensionName = ExtensionDescriptionAttribute.Name, ExtensionLanguage = ExtensionDescriptionAttribute.Language, Url = ExtensionDescriptionAttribute.RootUrl + ChapterNumberNode.Attributes["href"].Value }, }, Released = Released }; if (ChapterSub.Length == 2) { Chapter.SubChapter = UInt32.Parse(ChapterSub[1]); } Chapters.Add(Chapter); } Chapters.Reverse(); MangaObject MangaObj = new MangaObject() { Name = HtmlEntity.DeEntitize(MangaName), Description = HtmlEntity.DeEntitize(Description), AlternateNames = AlternateNames.ToList(), CoverLocations = { new LocationObject() { Url = Cover, ExtensionName = ExtensionDescriptionAttribute.Name, ExtensionLanguage = ExtensionDescriptionAttribute.Language } }, Authors = AuthorsArtists.ToList(), Artists = AuthorsArtists.ToList(), Genres = Genres.ToList(), Released = Chapters.First().Released, Chapters = Chapters }; MangaObj.AlternateNames.RemoveAll(an => an.ToLower().Equals("none")); MangaObj.Genres.RemoveAll(g => g.ToLower().Equals("none")); return(MangaObj); }
public static List <SeedMagnetSearchModel> SearchSukebei(string id, CookieContainer cc = null) { //if (cc == null) //{ // var c = HtmlManager.GetCookies("https://sukebei.nyaa.si/"); // cc = new CookieContainer(); // cc.Add(c); //} List <SeedMagnetSearchModel> ret = new List <SeedMagnetSearchModel>(); try { //var serachContent = "https://sukebei.nyaa.pro/search/c_0_0_k_" + id; //var htmlRet = HtmlManager.GetHtmlWebClient("https://sukebei.nyaa.pro", serachContent, cc); var serachContent = "https://sukebei.nyaa.si?f=0&c=0_0&q=" + id; var htmlRet = HtmlManager.GetHtmlWebClient("https://sukebei.nyaa.si", serachContent, cc); if (htmlRet.Success) { HtmlDocument htmlDocument = new HtmlDocument(); htmlDocument.LoadHtml(htmlRet.Content); string xpath = "//tr"; HtmlNodeCollection nodes = htmlDocument.DocumentNode.SelectNodes(xpath); foreach (var node in nodes.Skip(1)) { var text = FileUtility.ReplaceInvalidChar(node.ChildNodes[3].InnerText.Trim()); var a = node.ChildNodes[5].OuterHtml; var size = node.ChildNodes[7].InnerText.Trim(); var date = node.ChildNodes[9].OuterHtml.Trim().Replace("<td class=\"text-center\" data-timestamp=\"", "").Replace("\"></td>", ""); //var complete = node.ChildNodes[15].InnerText.Trim(); var url = a.Substring(a.IndexOf("<a href=\"magnet:?xt") + 9); url = url.Substring(0, url.IndexOf("\"")); int seconds = 0; int.TryParse(date, out seconds); DateTime startTime = TimeZone.CurrentTimeZone.ToLocalTime(new System.DateTime(1970, 1, 1)); // 当地时区 DateTime dt = startTime.AddSeconds(seconds); SeedMagnetSearchModel temp = new SeedMagnetSearchModel { Title = text, Size = FileUtility.GetFileSizeFromString(size), Date = dt, Url = "", //CompleteCount = int.Parse(complete), MagUrl = url, Source = SearchSeedSiteEnum.Sukebei }; ret.Add(temp); } } } catch (Exception ee) { } return(ret.Where(x => x.Size >= 0).OrderByDescending(x => x.CompleteCount).ThenByDescending(x => x.Size).ToList()); }
public static List <Data.SymbolInfo> GetSymbolsFromWeb() { List <Data.SymbolInfo> symbols = new List <Data.SymbolInfo>(); HtmlWeb web = new HtmlWeb(); HtmlDocument doc = new HtmlDocument(); int page = 1; int added = 0; Console.WriteLine("GetSymbolsFromWeb()"); while (true) { string data = "http://stooq.pl/t/?i=513&v=1&l=" + page.ToString(); doc = web.Load(data); // XPath of symbol name // *[@id="f10"] HtmlNodeCollection symbolNodes = doc.DocumentNode.SelectNodes("//*/td[@id=\"f10\"]"); foreach (HtmlNode node in symbolNodes.Skip(2)) { string fullName = node.InnerText; string shortName = node.ParentNode.FirstChild.FirstChild.InnerText; Data.SymbolInfo si = new Data.SymbolInfo(fullName, shortName); symbols.Add(si); } if (symbols.Count <= added) { throw new Exception("assert"); } added = symbols.Count; // check if this is a last page string numOfItemsStr = doc.DocumentNode.SelectNodes("//*[@id=\"f13\"]/text()[1]")[0].InnerText; Regex reNumOfItems = new Regex(@".*?(\d+) z (\d+).*"); Match m = reNumOfItems.Match(numOfItemsStr); if (m.Groups[1].ToString() == m.Groups[2].ToString()) { if (symbols.Count.ToString() != m.Groups[2].ToString()) { throw new Exception("assert"); } break; } page += 1; } // manually added { Data.SymbolInfo intel = new Data.SymbolInfo("_US_INTEL", "INTC.US"); symbols.Add(intel); Data.SymbolInfo wig = new Data.SymbolInfo("_WIG", "WIG"); symbols.Add(wig); Data.SymbolInfo wig20 = new Data.SymbolInfo("_WIG20", "WIG20"); symbols.Add(wig20); Data.SymbolInfo usdpln = new Data.SymbolInfo("_FX_USDPLN", "USDPLN"); symbols.Add(usdpln); Data.SymbolInfo eurpln = new Data.SymbolInfo("_FX_EURPLN", "EURPLN"); symbols.Add(eurpln); Data.SymbolInfo chfpln = new Data.SymbolInfo("_FX_CHFPLN", "CHFPLN"); symbols.Add(chfpln); Data.SymbolInfo gbppln = new Data.SymbolInfo("_FX_GBPPLN", "GBPPLN"); symbols.Add(gbppln); } return(symbols); }