Ejemplo n.º 1
0
        /// <summary>
        /// Método para generar  el DataTable a partir de una tabla HTML.
        /// </summary>
        /// <param name="infoHTML"></param>
        /// <returns></returns>
        public DataTable ObtenerDatosTablaHTML(string infoHTML)
        {
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(infoHTML);
            HtmlNodeCollection   nodes   = doc.DocumentNode.SelectNodes("//table//tr");
            DataTable            table   = new DataTable("ReporteTablaHtml");
            IEnumerable <string> headers = nodes[0]
                                           .Elements("th")
                                           .Select(th => th.InnerText.Trim());

            foreach (var header in headers)
            {
                table.Columns.Add(header.ToUpper());
            }
            var rows = nodes.Skip(1).Select(tr => tr
                                            .Elements("td")
                                            .Select(td => td.InnerText.Trim())
                                            .ToArray());

            foreach (var row in rows)
            {
                table.Rows.Add(row);
            }
            return(table);
        }
Ejemplo n.º 2
0
        /******************************************************************/
        public void addrows_from_nodes(HtmlNodeCollection n)
        {
            int shift = 0;          /* если таблица разделена на части, то наименование части
                                     * (например имя площадки в сделках или тип репо в табличке с репо)
                                     * помещаем в дополнительную колонку в начало массива row
                                     * по этой причине к самому массиву применяем сдвиг, когда конвертируем значения в тип, определенный в колонках
                                     */

            String[] add_row = { }; /*массив, который будет добавлен в начало массива row, если определим, что таблица раздалена на части*/

            // получаем все строки, кроме заголовков
            var rows = n.Skip(this.skip).Select(tr => tr.Elements("td").Select(td => td.InnerText.Trim()).ToArray());

            foreach (string[] row in rows)
            {
                // все Итого и пустые строки игнорируем
                if (!row[0].Contains("Итого") && !row[0].Contains("Общий итог") && !(row.Length == 1 && row[0] == ""))
                {
                    /* предположительно, если строка состоит из одного элемента,
                     *  то это подзаголовок и нужно сформировать из этого значения дополнительную колонку
                     */
                    if (row.Length == 1)
                    {
                        add_row = new string[] { row[0].Replace("Площадка:", "").Trim() };
                        shift   = 1;
                        continue;
                    }

                    // конвертация строки в double / datetime
                    for (int i = 0; i <= table.Columns.Count - 1; i++)
                    {
                        if (table.Columns[i].DataType == System.Type.GetType("System.Double"))
                        {
                            if (row[i - shift] == "")
                            {
                                row[i - shift] = "0";
                            }                                                      // если должно быть число, то "пусто" заменяем на 0
                            row[i - shift] = row[i - shift].Replace(" ", "");      // если число разделено пробелами, убираем его
                        }
                        if (table.Columns[i].DataType == System.Type.GetType("System.DateTime"))
                        {
                            if (row[i - shift] == "")
                            {
                                row[i - shift] = "01.01.1970";
                            }                                                               // если должна быть дата, то "пусто" заменяем на '1970-01-01'
                        }
                    }

                    // либо добавляем новую колонку в начало строки, либо нет
                    if (shift != 0)
                    {
                        table.Rows.Add(add_row.Concat(row).ToArray());
                    }
                    else
                    {
                        table.Rows.Add(row);
                    }
                }
            }
        }
        public async Task <IEnumerable <BankCurrencies> > Parse()
        {
            var          banksCurrencies = new List <BankCurrencies>();
            var          url             = Banks.MyFinURL;
            var          web             = new HtmlWeb();
            HtmlDocument doc;

            try
            {
                doc = await web.LoadFromWebAsync(url);

                HtmlNodeCollection bankNodes = doc.DocumentNode.SelectNodes(Banks.Path);

                foreach (HtmlNode node in bankNodes.Skip(5))
                {
                    var    info = node.ParentNode.ParentNode.Descendants("td");
                    string bankCurrenciesString = "";
                    foreach (var i in info)
                    {
                        bankCurrenciesString += i.InnerText.Trim();
                        bankCurrenciesString += Banks.Separator;
                    }
                    var bankCurrencies = BankCurrencies.Parse(bankCurrenciesString);
                    banksCurrencies.Add(bankCurrencies);
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(Banks.ErrorMessage, e.Message);
            }
            return(banksCurrencies);
        }
        private List <TWISINOffclCode> GetISINList(HtmlNode rootNode)
        {
            var offclCodeISINList           = new List <TWISINOffclCode>();
            HtmlNodeCollection collection   = rootNode.SelectNodes("//tr");
            string             kindOfOption = "";

            foreach (var node in collection.Skip(1))
            {
                if (node.SelectSingleNode("./td").Attributes["colspan"] != null)
                {
                    kindOfOption = MiscUtil.GetCleanTextFromHtml(node.InnerText);
                    continue;
                }
                var ISINOffclCode = new TWISINOffclCode();
                HtmlNodeCollection tempCollection = node.SelectNodes("./td");
                int index = tempCollection[0].InnerText.IndexOf(" ");
                if (index < 0)
                {
                    index = tempCollection[0].InnerText.IndexOf(" ");
                }
                ISINOffclCode.OffclCode = tempCollection[0].InnerText.Substring(0, index);
                ISINOffclCode.ISIN      = tempCollection[1].InnerText;

                ISINOffclCode.FutureOptionType = kindOfOption;
                offclCodeISINList.Add(ISINOffclCode);
            }
            return(offclCodeISINList);
        }
Ejemplo n.º 5
0
        private List <WftdaRankingData> GetWftdaRankingsData()
        {
            HtmlWeb      web = new HtmlWeb();
            HtmlDocument doc = web.Load(s_wftdaUrl);
            // get the first table from the page
            HtmlNode           containingDiv = doc.DocumentNode.SelectSingleNode("//div[@id='pageContent']/div[@class='markdown'][1]");
            DateTime           rankingsDate  = Convert.ToDateTime(containingDiv.SelectSingleNode("//h2[1]").InnerText);
            HtmlNodeCollection rows          = containingDiv.SelectNodes("//table[1]/tbody/tr");

            return(ProcessWftdaRows(rows.Skip(1)));
        }
Ejemplo n.º 6
0
        public List <MovieMagnet> SearchMagnetFromSukebei(Movie movie)
        {
            string             javbusUrl       = string.Format(_movieMagnetService.LoadMagSourceUrl(MagnetSource.Sukebei), movie.Number);
            HtmlDocument       htmlDocument    = _htmlService.GetHtmlDocumentAsync(javbusUrl, maxRetry: 1).Result;
            List <MovieMagnet> lstMovieMagnets = new List <MovieMagnet>();

            if (htmlDocument != null)
            {
                string             xpath = "//tr";
                HtmlNodeCollection nodes = htmlDocument.DocumentNode.SelectNodes(xpath);

                if (nodes != null && nodes.Count > 1)
                {
                    foreach (var node in nodes.Skip(1))
                    {
                        MovieMagnet movieMagnet = new MovieMagnet()
                        {
                            IdMovie = movie.IdMovie, MovieNumber = movie.Number, IdMagSource = MagnetSource.Sukebei
                        };

                        movieMagnet.MagName = node.ChildNodes[3].InnerText.Trim();

                        var magHref = node.ChildNodes[5].OuterHtml;
                        var size    = node.ChildNodes[7].InnerText.Trim();
                        movieMagnet.Size = size.GetByteSize();

                        DateTime dtMag = DateTime.MinValue;
                        if (!string.IsNullOrEmpty(node.ChildNodes[9].InnerText) && DateTime.TryParse(node.ChildNodes[9].InnerText.Trim(), out dtMag))
                        {
                            movieMagnet.DtMagnet = dtMag;
                        }

                        var url = magHref.Substring(magHref.IndexOf("<a href=\"magnet:?xt") + 9);
                        movieMagnet.MagnetUrl = url.Substring(0, url.IndexOf("\""));
                        movieMagnet.GenerateHash();

                        if (movieMagnet.MagName.Contains("高清") || movieMagnet.MagName.ToLower().Contains("hd") || movieMagnet.Size > 2048)
                        {
                            movieMagnet.IsHD = true;
                        }

                        if ((movieMagnet.MagName.Contains("字幕") || movieMagnet.MagName.Contains("中文")) && movieMagnet.IsHD)
                        {
                            movieMagnet.HasSub = true;
                        }

                        lstMovieMagnets.Add(movieMagnet);
                    }
                }
            }

            return(lstMovieMagnets);
        }
Ejemplo n.º 7
0
        private static void PopulateRows(HtmlNodeCollection nodes, DataTable table)
        {
            var rows = nodes.Skip(1).Select(tr => tr
                                            .Elements("td")
                                            .Where(x => x.Attributes["class"].Value.Contains("guide"))
                                            .Select(td => td.InnerText.Trim())
                                            .ToArray());

            foreach (var row in rows)
            {
                table.Rows.Add(row);
            }
        }
Ejemplo n.º 8
0
        private static PriceInfo[] ParseNextPriceInfos(HtmlNode node)
        {
            HtmlNodeCollection nodes      = node.SelectNodes(@".//li");
            IList <PriceInfo>  priceInfos = new List <PriceInfo>();

            foreach (HtmlNode oneNode in nodes.Skip(1))
            {
                // MatchCollection matches = Regex.Matches(oneNode.InnerText.Trim(), @"(\d+)");
                // Single minAge = Convert.ToSingle(matches[0].Value);
                // Single maxAge = Convert.ToSingle(matches[1].Value);
                string[]  ss        = oneNode.InnerText.Split(new[] { @" - " }, StringSplitOptions.None);
                PriceInfo priceInfo = new PriceInfo();
                if (ss.Length > 0)
                {
                    priceInfo.Size = ss[0];
                    if (ss.Length >= 3)
                    {
                        priceInfo.Price = Convert.ToDecimal(ss[1]);
                        priceInfo.Stock = ss[2];
                    }
                    else
                    {
                        if (ss.Length == 2)
                        {
                            decimal price = 0;
                            if (!Decimal.TryParse(ss[1], out price))
                            {
                                // Get the price from another html node.
                                HtmlNode priceNode   = node.SelectSingleNode(@"//div[@class=""nowPrice""]");
                                string   priceString = priceNode.InnerText.Trim();
                                priceInfo.Price = Convert.ToDecimal(new string(priceString.Where(x => Char.IsDigit(x)).ToArray()));
                                priceInfo.Stock = ss[1];
                            }
                            else
                            {
                                priceInfo.Price = price;
                                priceInfo.Stock = @"In stock";
                            }
                        }
                    }

                    priceInfo.PriceCN = GBP2RMB(priceInfo.Price);
                    priceInfos.Add(priceInfo);
                }
            }

            return(priceInfos.ToArray());
        }
        public IEnumerable <IDomainModel> ParseNodes(HtmlNodeCollection nodes)
        {
            List <AgriculturalProducts> lst = new List <AgriculturalProducts>();

            if (nodes != null && nodes.Any())
            {
                for (int startIndex = 8, arrLength = 8; 0 < nodes.Count - startIndex; startIndex += arrLength)
                {
                    var row = nodes.Skip(startIndex).Take(arrLength).ToArray();
                    AgriculturalProducts entity = new AgriculturalProducts();
                    entity.LowPrice     = row[1].InnerText;
                    entity.AveragePrice = row[2].InnerText;
                    entity.HighPrice    = row[3].InnerText;
                    entity.Category     = row[4].InnerText;
                    entity.Unit         = row[5].InnerText;
                    entity.CreateTime   = DateTime.Parse(row[6].InnerText);
                    entity.ProductName  = row[0].InnerText;
                    lst.Add(entity);
                }
            }
            return(lst);
        }
Ejemplo n.º 10
0
 public static IEnumerable <StationInLine> QueryBusInfo(string BusGuid)
 {
     try {
         var s = DownloadString(LineUrl + "&LineGuid=" + BusGuid);
         HtmlNodeCollection tr = GetTable(s);
         var list = new List <StationInLine>();
         foreach (HtmlNode node in tr.Skip(1))
         {
             try {
                 var entry = new StationInLine(node);
                 list.Add(entry);
             }
             catch (ArgumentException e) {
                 continue;
             }
         }
         return(list);
     }
     catch (WebException e) {
         return(new List <StationInLine>());
     }
 }
Ejemplo n.º 11
0
        public MangaObject ParseMangaObject(String content)
        {
            Int32 MangaInformationContentStart = content.IndexOf("<!-- Intro Series -->"),
                  MangaInformationContentEnd   = content.IndexOf("<!-- **END: Intro Series -->", MangaInformationContentStart);
            String MangaInformationContent     = content.Substring(MangaInformationContentStart, MangaInformationContentEnd - MangaInformationContentStart);

            Int32 MangaChaptersContentStart = content.IndexOf("<!-- Main Content -->"),
                  MangaChaptersContentEnd   = content.IndexOf("<!-- **END: Main Content -->", MangaChaptersContentStart);
            String MangaChaptersContent     = content.Substring(MangaChaptersContentStart, MangaChaptersContentEnd - MangaChaptersContentStart);

            HtmlDocument MangaObjectDocument = new HtmlDocument();

            MangaObjectDocument.LoadHtml(MangaInformationContent);

            HtmlNode MangaObjectNode = MangaObjectDocument.DocumentNode.SelectSingleNode(".//div/div");

            String MangaName             = String.Empty,
                   Description           = String.Empty;
            List <String> AlternateNames = new List <String>(),
                          AuthorsArtists = new List <String>(),
                          Genres         = new List <String>();

            foreach (HtmlNode DetailNode in MangaObjectNode.SelectNodes(".//div[2]/div[contains(@class,'row')]"))
            {
                HtmlNode DetailTypeNode             = DetailNode.SelectSingleNode(".//div[1]/b[1] | .//div[1]/strong[1]"),
                              DetailTextNode        = (DetailTypeNode != null) ? DetailTypeNode.NextSibling : null,
                              DetailDescriptionNode = (DetailTextNode != null) ? DetailTextNode.NextSibling : null,
                              MangaNameNode         = DetailNode.SelectSingleNode(".//div[1]/h1");
                HtmlNodeCollection DetailLinkNodes  = DetailNode.SelectNodes(".//div[1]/a");
                String             DetailType       = (DetailTypeNode != null) ? DetailTypeNode.InnerText.Trim().TrimEnd(':') : "MangaName",
                                   DetailValue = String.Empty;
                String[] DetailValues          = { };
                if (DetailLinkNodes != null)
                {
                    DetailValues = (from HtmlNode LinkNode in DetailLinkNodes select HtmlEntity.DeEntitize(LinkNode.InnerText.Trim())).ToArray();
                }
                else if (MangaNameNode != null)
                {
                    DetailValue = HtmlEntity.DeEntitize(MangaNameNode.InnerText.Trim());
                }
                else if (DetailDescriptionNode != null)
                {
                    DetailValue = HtmlEntity.DeEntitize(DetailDescriptionNode.InnerText.Trim());
                }
                else if (DetailTextNode != null)
                {
                    DetailValue = HtmlEntity.DeEntitize(DetailTextNode.InnerText.Trim());
                }

                switch (DetailType)
                {
                default: break;

                case "MangaName": MangaName = DetailValue; break;

                case "Alternate Names": AlternateNames = (from String AltName in DetailValue.Split(',') select HtmlEntity.DeEntitize(AltName.Trim())).ToList(); break;

                case "Author": AuthorsArtists = DetailValues.ToList(); break;

                case "Genre": Genres = DetailValues.ToList(); break;

                case "Description": Description = DetailValue; break;
                }
            }


            String Cover = ExtensionDescriptionAttribute.RootUrl + MangaObjectNode.SelectSingleNode(".//div[1]/img/@src").Attributes["src"].Value;

            List <ChapterObject> Chapters = new List <ChapterObject>();

            MangaObjectDocument.LoadHtml(MangaChaptersContent);
            HtmlNodeCollection RawChapterList = MangaObjectDocument.DocumentNode.SelectNodes(".//div[contains(@class,'row')]");

            foreach (HtmlNode RawChapterNode in RawChapterList.Skip(1))
            {
                HtmlNode ChapterNumberNode = RawChapterNode.SelectSingleNode(".//div[1]/a"),
                              ReleaseDate  = RawChapterNode.SelectSingleNode(".//div[2]/time");
                String   ChapterNumber     = Regex.Match(ChapterNumberNode.InnerText, @"\d+(\.\d+)?").Value;
                String[] ChapterSub        = ChapterNumber.Trim().Split('.');


                DateTime Released    = DateTime.Now;
                String   ReleasedTxt = ReleaseDate.InnerText.ToLower();
                if (ReleasedTxt.StartsWith("today"))
                {
                    Released = DateTime.Today;
                }
                else if (ReleasedTxt.StartsWith("yesterday"))
                {
                    Released = DateTime.Today.AddDays(-1);
                }
                else if (ReleasedTxt.EndsWith("ago"))
                {
                    Int32 tDelta = 0;
                    Int32.TryParse(ReleasedTxt.Split(' ')[0], out tDelta);
                    if (ReleasedTxt.Contains("hours"))
                    {
                        Released = Released.AddHours(0 - tDelta);
                    }
                    if (ReleasedTxt.Contains("minutes"))
                    {
                        Released = Released.AddMinutes(0 - tDelta);
                    }
                    if (ReleasedTxt.Contains("seconds"))
                    {
                        Released = Released.AddSeconds(0 - tDelta);
                    }
                }
                else
                {
                    Released = DateTime.ParseExact(ReleasedTxt, "MM/dd/yyyy", CultureInfo.InvariantCulture);
                }

                ChapterObject Chapter = new ChapterObject()
                {
                    Name      = HtmlEntity.DeEntitize(RawChapterNode.SelectSingleNode(".//div[1]/gray").InnerText),
                    Chapter   = UInt32.Parse(ChapterSub[0]),
                    Locations =
                    {
                        new LocationObject()
                        {
                            ExtensionName     = ExtensionDescriptionAttribute.Name,
                            ExtensionLanguage = ExtensionDescriptionAttribute.Language,
                            Url = ExtensionDescriptionAttribute.RootUrl + ChapterNumberNode.Attributes["href"].Value
                        },
                    },
                    Released = Released
                };
                if (ChapterSub.Length == 2)
                {
                    Chapter.SubChapter = UInt32.Parse(ChapterSub[1]);
                }
                Chapters.Add(Chapter);
            }
            Chapters.Reverse();
            MangaObject MangaObj = new MangaObject()
            {
                Name           = HtmlEntity.DeEntitize(MangaName),
                Description    = HtmlEntity.DeEntitize(Description),
                AlternateNames = AlternateNames.ToList(),
                CoverLocations = { new LocationObject()
                                   {
                                       Url               = Cover,
                                       ExtensionName     = ExtensionDescriptionAttribute.Name,
                                       ExtensionLanguage = ExtensionDescriptionAttribute.Language
                                   } },
                Authors  = AuthorsArtists.ToList(),
                Artists  = AuthorsArtists.ToList(),
                Genres   = Genres.ToList(),
                Released = Chapters.First().Released,
                Chapters = Chapters
            };

            MangaObj.AlternateNames.RemoveAll(an => an.ToLower().Equals("none"));
            MangaObj.Genres.RemoveAll(g => g.ToLower().Equals("none"));
            return(MangaObj);
        }
Ejemplo n.º 12
0
        public static List <SeedMagnetSearchModel> SearchSukebei(string id, CookieContainer cc = null)
        {
            //if (cc == null)
            //{
            //    var c = HtmlManager.GetCookies("https://sukebei.nyaa.si/");
            //    cc = new CookieContainer();
            //    cc.Add(c);
            //}

            List <SeedMagnetSearchModel> ret = new List <SeedMagnetSearchModel>();

            try
            {
                //var serachContent = "https://sukebei.nyaa.pro/search/c_0_0_k_" + id;
                //var htmlRet = HtmlManager.GetHtmlWebClient("https://sukebei.nyaa.pro", serachContent, cc);

                var serachContent = "https://sukebei.nyaa.si?f=0&c=0_0&q=" + id;
                var htmlRet       = HtmlManager.GetHtmlWebClient("https://sukebei.nyaa.si", serachContent, cc);

                if (htmlRet.Success)
                {
                    HtmlDocument htmlDocument = new HtmlDocument();
                    htmlDocument.LoadHtml(htmlRet.Content);

                    string xpath = "//tr";

                    HtmlNodeCollection nodes = htmlDocument.DocumentNode.SelectNodes(xpath);

                    foreach (var node in nodes.Skip(1))
                    {
                        var text = FileUtility.ReplaceInvalidChar(node.ChildNodes[3].InnerText.Trim());
                        var a    = node.ChildNodes[5].OuterHtml;
                        var size = node.ChildNodes[7].InnerText.Trim();
                        var date = node.ChildNodes[9].OuterHtml.Trim().Replace("<td class=\"text-center\" data-timestamp=\"", "").Replace("\"></td>", "");
                        //var complete = node.ChildNodes[15].InnerText.Trim();

                        var url = a.Substring(a.IndexOf("<a href=\"magnet:?xt") + 9);
                        url = url.Substring(0, url.IndexOf("\""));

                        int seconds = 0;

                        int.TryParse(date, out seconds);

                        DateTime startTime = TimeZone.CurrentTimeZone.ToLocalTime(new System.DateTime(1970, 1, 1)); // 当地时区
                        DateTime dt        = startTime.AddSeconds(seconds);

                        SeedMagnetSearchModel temp = new SeedMagnetSearchModel
                        {
                            Title = text,
                            Size  = FileUtility.GetFileSizeFromString(size),
                            Date  = dt,
                            Url   = "",
                            //CompleteCount = int.Parse(complete),
                            MagUrl = url,
                            Source = SearchSeedSiteEnum.Sukebei
                        };

                        ret.Add(temp);
                    }
                }
            }
            catch (Exception ee)
            {
            }

            return(ret.Where(x => x.Size >= 0).OrderByDescending(x => x.CompleteCount).ThenByDescending(x => x.Size).ToList());
        }
Ejemplo n.º 13
0
        public static List <Data.SymbolInfo> GetSymbolsFromWeb()
        {
            List <Data.SymbolInfo> symbols = new List <Data.SymbolInfo>();

            HtmlWeb      web   = new HtmlWeb();
            HtmlDocument doc   = new HtmlDocument();
            int          page  = 1;
            int          added = 0;

            Console.WriteLine("GetSymbolsFromWeb()");

            while (true)
            {
                string data = "http://stooq.pl/t/?i=513&v=1&l=" + page.ToString();
                doc = web.Load(data);

                // XPath of symbol name
                // *[@id="f10"]
                HtmlNodeCollection symbolNodes = doc.DocumentNode.SelectNodes("//*/td[@id=\"f10\"]");
                foreach (HtmlNode node in symbolNodes.Skip(2))
                {
                    string          fullName  = node.InnerText;
                    string          shortName = node.ParentNode.FirstChild.FirstChild.InnerText;
                    Data.SymbolInfo si        = new Data.SymbolInfo(fullName, shortName);
                    symbols.Add(si);
                }

                if (symbols.Count <= added)
                {
                    throw new Exception("assert");
                }
                added = symbols.Count;

                // check if this is a last page
                string numOfItemsStr = doc.DocumentNode.SelectNodes("//*[@id=\"f13\"]/text()[1]")[0].InnerText;
                Regex  reNumOfItems  = new Regex(@".*?(\d+) z (\d+).*");
                Match  m             = reNumOfItems.Match(numOfItemsStr);
                if (m.Groups[1].ToString() == m.Groups[2].ToString())
                {
                    if (symbols.Count.ToString() != m.Groups[2].ToString())
                    {
                        throw new Exception("assert");
                    }
                    break;
                }

                page += 1;
            }

            // manually added
            {
                Data.SymbolInfo intel = new Data.SymbolInfo("_US_INTEL", "INTC.US");
                symbols.Add(intel);

                Data.SymbolInfo wig = new Data.SymbolInfo("_WIG", "WIG");
                symbols.Add(wig);
                Data.SymbolInfo wig20 = new Data.SymbolInfo("_WIG20", "WIG20");
                symbols.Add(wig20);

                Data.SymbolInfo usdpln = new Data.SymbolInfo("_FX_USDPLN", "USDPLN");
                symbols.Add(usdpln);
                Data.SymbolInfo eurpln = new Data.SymbolInfo("_FX_EURPLN", "EURPLN");
                symbols.Add(eurpln);
                Data.SymbolInfo chfpln = new Data.SymbolInfo("_FX_CHFPLN", "CHFPLN");
                symbols.Add(chfpln);
                Data.SymbolInfo gbppln = new Data.SymbolInfo("_FX_GBPPLN", "GBPPLN");
                symbols.Add(gbppln);
            }

            return(symbols);
        }