コード例 #1
1
ファイル: HtmlHelper.cs プロジェクト: Klaudit/inbox2_desktop
        /// <summary>
        /// Returns an XML document from a given URL.
        /// </summary>
        /// <param name="web">The web.</param>
        /// <param name="url">The URL.</param>
        /// <param name="format">The format.</param>
        /// <param name="absolutizeLinks">if set to <c>true</c> [absolutize links].</param>
        /// <returns></returns>
        public static XmlDocument LoadHtmlAsXml(HtmlWeb web, string url, string format,
            bool absolutizeLinks)
        {
            // Declare necessary stream and writer objects
            MemoryStream m = new MemoryStream();
            XmlTextWriter xtw = new XmlTextWriter(m, null);

            // Load the content into the writer
            if (format == "html")
            {
                web.LoadHtmlAsXml(url, xtw);
                // Rewind the memory stream
                m.Position = 0;
                // Create, fill, and return the xml document
                XmlDocument xdoc = new XmlDocument();
                string content = (new StreamReader(m)).ReadToEnd();

                HtmlDocument doc = new HtmlDocument();
                doc.OptionOutputAsXml = true;
                doc.LoadHtml(content);

                if (absolutizeLinks == true)
                {
                    AttributeReferenceAbsolutizer.ExecuteDefaultAbsolutization
                        (doc.DocumentNode, url);
                }

                xdoc.LoadXml(doc.DocumentNode.OuterHtml);

                return xdoc;
            }
            else
            {
                HtmlDocument doc = web.Load(url);
                doc.OptionOutputAsXml = true;
                XmlDocument xdoc = new XmlDocument();

                if (absolutizeLinks == true)
                {
                    AttributeReferenceAbsolutizer.ExecuteDefaultAbsolutization
                        (doc.DocumentNode, url);
                }

                xdoc.LoadXml(doc.DocumentNode.OuterHtml);

                return xdoc;
            }
        }
コード例 #2
0
ファイル: GetDocLinks.cs プロジェクト: ArsenShnurkov/beagle-1
		static void Main(string[] args)
		{
			HtmlWeb hw = new HtmlWeb();
			string url = @"http://www.microsoft.com";
			HtmlDocument doc = hw.Load(url);
			doc.Save("mshome.htm");

			DocumentWithLinks nwl = new DocumentWithLinks(doc);
			Console.WriteLine("Linked urls:");
			for(int i=0;i<nwl.Links.Count;i++)
			{
				Console.WriteLine(nwl.Links[i]);
			}

			Console.WriteLine("Referenced urls:");
			for(int i=0;i<nwl.References.Count;i++)
			{
				Console.WriteLine(nwl.References[i]);
			}
		}
コード例 #3
0
ファイル: Program.cs プロジェクト: BBBocian/Csharp
        static void Main(string[] args)
        {
            /*
            string webAdd = "http://moto.money.pl/ceny-paliw/wroclaw,pb95.html";
            string exprON = ".+gaz_box.+>([0-9].[0-9]+)</span>.+[0-9]";
            GetDataFromWeb ON = new GetDataFromWeb(webAdd, exprON);

            List<string> resultsON = ON.getResults();

            foreach(string x in resultsON)
                Console.WriteLine(x);
            */

            HtmlWeb webGet = new HtmlWeb();
            HtmlDocument document = webGet.Load(@"C:\Users\Bocian\Documents\Visual Studio 2013\Projects\Xpath first proj\bookshop.html");
            HtmlNodeCollection metaTags = document.DocumentNode.SelectNodes("/html/body/div/div/div/div/div/ul/li/span");

            Console.WriteLine("{0}    {1}","ON",metaTags[0].InnerHtml);
            Console.WriteLine("{0}  {1}", "PB95", metaTags[1].InnerHtml);
            Console.WriteLine("{0}  {1}", "PB98", metaTags[2].InnerHtml);
            Console.WriteLine("{0}   {1}", "LPG", metaTags[3].InnerHtml);

            Console.ReadLine();
        }
コード例 #4
0
        static MangaScrapModel ScrapingManga(string url)
        {
            MangaScrapModel manga        = new MangaScrapModel();
            HtmlWeb         htmlWeb      = new HtmlWeb();
            HtmlDocument    htmlDocument = htmlWeb.Load(url);

            // Manga Details
            Console.WriteLine("Load html document");
            var htmlExtract1 = htmlDocument.DocumentNode.SelectSingleNode("//img[@class='manga-cover']").Attributes;

            foreach (var item in htmlExtract1)
            {
                if (item.Name == "alt")
                {
                    manga.Title = item.Value.Replace("manga", "").Trim();
                }
                if (item.Name == "src")
                {
                    manga.CoverUrl = item.Value;
                }
            }
            var htmlNode   = htmlDocument.DocumentNode.SelectSingleNode("//div[@class='manga-details-extended']");
            var detailNode = htmlNode.SelectNodes("//h4");

            manga.DateEdition = detailNode[0].InnerHtml;
            manga.State       = detailNode[1].InnerHtml;
            manga.Resume      = detailNode[2].InnerHtml;
            var texts = htmlNode.SelectNodes("//ul").First().InnerText.Split('\n');

            manga.Tags = new List <string>();
            foreach (var item in texts)
            {
                if (!string.IsNullOrEmpty(item) && !manga.Tags.Contains(item.Trim()))
                {
                    manga.Tags.Add(item.Trim());
                }
            }
            Console.WriteLine("Manga details extracted with success");
            manga.Chapters = new List <ChapterScrapModel>();
            var htmlExtract = htmlDocument.DocumentNode.SelectNodes("//a[@class='chapter']");
            int chNb        = htmlExtract.Count;

            foreach (var item in htmlExtract)
            {
                var message = "chapter" + (htmlExtract.Count - chNb + 1) + " extracting...";
                Console.Write(message);
                ChapterScrapModel chapter = new ChapterScrapModel
                {
                    Number = chNb
                };
                string urlch   = "";
                var    urlPart = item.Attributes["href"].Value.Split('/');
                for (int i = 0; i < urlPart.Length - 2; i++)
                {
                    urlch += urlPart[i] + '/';
                }
                urlch        += "0/full";
                chapter.Url   = urlch;
                chapter.Pages = GetPages(urlch);
                chapter.Title = item.InnerHtml;
                manga.Chapters.Add(chapter);
                chNb--;
                for (int j = 0; j < message.Length; j++)
                {
                    Console.Write("\b \b");
                }
            }
            manga.Chapters.Reverse();
            return(manga);
        }
コード例 #5
0
        private Game GetGame(string url)
        {
            HtmlWeb web = new HtmlWeb();

            var html = web.Load(url.Replace("https", "http"));
            var dom  = html.DocumentNode;

            var metaTable     = dom.QuerySelector("#table4");
            var featuresTable = dom.QuerySelector("#table19");
            var discsTable    = dom.QuerySelector("#table7");

            var game = new Game()
            {
                Title       = GetContent(metaTable.QuerySelector("tr:nth-child(1) td:nth-child(2)")),
                CommonTitle = GetContent(metaTable.QuerySelector("tr:nth-child(2) td:nth-child(2)")),
                Region      = GetContent(metaTable.QuerySelector("tr:nth-child(4) td:nth-child(2)")),
                Genre       = GetContent(metaTable.QuerySelector("tr:nth-child(5) td:nth-child(2)")),
                Developer   = GetContent(metaTable.QuerySelector("tr:nth-child(6) td:nth-child(2)")).TrimEnd('.'),
                Publisher   = GetContent(metaTable.QuerySelector("tr:nth-child(7) td:nth-child(2)")).TrimEnd('.'),
                Players     = GetPlayerCount(GetContent(featuresTable.QuerySelector("tr:nth-child(1) td:nth-child(2)"))),
                Discs       = new List <Disc>(),
                Covers      = new List <Cover>()
            };

            var serialNumbers = new List <string>();

            for (int i = 2; i <= 7; i++)
            {
                var cell = GetContent(discsTable.QuerySelector($"tr:nth-child(2) td:nth-child({i})"));

                if (cell != "")
                {
                    serialNumbers.Add(cell);
                }
            }

            foreach (var serialNumber in serialNumbers)
            {
                var disc = new Disc()
                {
                    SerialNumber = serialNumber,
                    Game         = game,
                };

                game.Discs.Add(disc);
            }

            var coverNode     = dom.QuerySelector("#table2 tr:nth-child(2) td:nth-child(1) img");
            var fileExtension = new FileInfo(coverNode.GetAttributeValue("src", "")).Extension;

            var cover = new Cover()
            {
                File = serialNumbers.First() + fileExtension,
                Game = game
            };

            game.Covers.Add(cover);

            using (WebClient wc = new WebClient())
            {
                var currentPage  = new FileInfo(url).Name;
                var pageLessPath = url.Replace(currentPage, "");
                wc.DownloadFile(
                    new Uri(pageLessPath + coverNode.GetAttributeValue("src", "")),
                    Path.Combine("covers", cover.File)
                    );
            }

            var dateString = GetContent(metaTable.QuerySelector("tr:nth-child(8) td:nth-child(2)"));

            if (DateTime.TryParse(dateString, out var dateReleased))
            {
                game.DateReleased = dateReleased;
            }

            Console.WriteLine($"Grabbed info for [{game.Title}]");

            return(game);
        }
コード例 #6
0
        public static void ProcessAllDealers()
        {
            WebClient client = new System.Net.WebClient();
            HtmlWeb   webGet = new HtmlWeb();

            List <DEALERSHIP> dealers = VehicleDBManager.GetDealerships("Dealer.Com");

            if (dealers == null)
            {
                return;
            }

            foreach (DEALERSHIP dealer in dealers)
            {
                string DealerName = dealer.DEALER_NAME;
                string MainUrl    = dealer.DEALER_URL;
                Console.WriteLine($"************\r\n*** Processing Dealer: {DealerName}\r\n************");
                HtmlDocument mainpage       = webGet.Load(MainUrl);
                bool         KeepProcessing = (mainpage != null);
                int          PageCount      = 0;

                BaseURI = new Uri(MainUrl).GetLeftPart(UriPartial.Authority);
                Console.WriteLine($"Base URL for dealership: {BaseURI}");
                List <VEHICLE> PreviouslyFoundAtDealer = VehicleDBManager.GetAllUnsoldVehiclesForDealer(DealerName);

                while (KeepProcessing)
                {
                    Console.WriteLine($"\r\n\r\bRetrieving page #{++PageCount} of vehicles for dealer {DealerName} in the {dealer.MARKET_AREA_NAME} Market Area\r\n");

                    // Note: if the items are "shared", then they are listings from affiliated dealerships.
                    HtmlNodeCollection VehicleListItems = mainpage.DocumentNode.SelectNodes("//li[substring(@class, 1, 14)='item notshared']");
                    ProcessVehicleListOnPage(VehicleListItems, DealerName, ref PreviouslyFoundAtDealer, dealer.MARKET_AREA_NAME);

                    HtmlNode NextLink = mainpage.DocumentNode.SelectSingleNode("//a[@class='ddc-btn ddc-btn-link ddc-btn-xsmall'][@rel='next']");
                    string   Next     = string.Empty;
                    if (NextLink != null)
                    {
                        Next = NextLink.Attributes["href"].Value.ToString();
                        string NextURL = MainUrl + Next;
                        mainpage       = webGet.Load(NextURL);
                        KeepProcessing = (mainpage != null);
                    }
                    else
                    {
                        KeepProcessing = false;
                    }
                }

                if (!KeepProcessing && PreviouslyFoundAtDealer != null)
                {
                    // All the vehicles currently at this dealer have been screen-scraped.
                    // Now go through the complete list of "STILL_FOR_SALE" vehicles from the previous run
                    // and update any that have disappeared from their website as being sold or auctioned.

                    Console.WriteLine($"Found {PreviouslyFoundAtDealer.Count} vehicle(s) removed from dealer inventory since last run.");
                    foreach (VEHICLE SoldOrAuctionedVehicle in PreviouslyFoundAtDealer)
                    {
                        SoldOrAuctionedVehicle.STILL_FOR_SALE = "NO";
                        List <VehiclePriceHistory> PriceHistory = RetrieveHistory(SoldOrAuctionedVehicle.VEHICLE_HISTORY);
                        var MinDate           = (from pricehistories in PriceHistory select pricehistories.Date_Recorded).Min();
                        var MaxDate           = (from pricehistories in PriceHistory select pricehistories.Date_Recorded).Max();
                        var DaysOnMarket      = MaxDate - MinDate;
                        var FinalPriceHistory = (from pricehistories in PriceHistory
                                                 where pricehistories.Date_Recorded == MaxDate
                                                 select pricehistories).SingleOrDefault <VehiclePriceHistory>();
                        FinalPriceHistory.WasFinalPrice        = "YES";
                        SoldOrAuctionedVehicle.VEHICLE_HISTORY = SaveHistory(PriceHistory);
                        Console.WriteLine($@"Updating VIN {SoldOrAuctionedVehicle.VIN} : {SoldOrAuctionedVehicle.YEAR} {SoldOrAuctionedVehicle.MAKE} {SoldOrAuctionedVehicle.MODEL} (Stock number {SoldOrAuctionedVehicle.STOCK_NUMBER}) as sold.  Final price was {FinalPriceHistory.Price}, Mileage was {SoldOrAuctionedVehicle.MILEAGE}, Days on market was {DaysOnMarket.Days}");
                        VehicleDBManager.UpdateVehicleRecord(SoldOrAuctionedVehicle);
                    }
                }
            }
        }
コード例 #7
0
        private void scrapeAndSend()
        {
            string html    = "";
            var    browser = new Browser();
            var    url     = _scrapeConfig.CoronaVirusScrape.ScrapeUrl;
            var    ss      = "https://docs.google.com/spreadsheets";

            var web = new HtmlWeb();
            var doc = web.Load(url);

            HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//iframe[@src]");

            foreach (var node in nodes)
            {
                HtmlAttribute attr = node.Attributes["src"];
                if (attr.Value.ToLower().StartsWith(ss))
                {
                    url = attr.Value;
                    break;
                }
            }

            var fpath = @"C:\devApps\data\coronavirus\html";

            web = new HtmlWeb();
            doc = web.Load(url);

            html = doc.Text;
            var fname = string.Format("{0}-bno.html", DateTime.Now.ToString("yyyy.MM.dd.hh.mm.tt"));

            File.WriteAllText(Path.Combine(fpath, fname), html);

            //*[@id="0"]/div/table
            var sheet     = doc.DocumentNode.SelectSingleNode("//*[@id='sheets-viewport']").InnerHtml;
            var htmlTable = doc.DocumentNode.SelectSingleNode("//*[@id='0']/div/table").InnerHtml;
            var cases     = doc.DocumentNode.SelectSingleNode("//*[@id='0']/div/table/tbody/tr[5]/td[1]").InnerHtml;
            var deaths    = doc.DocumentNode.SelectSingleNode("//*[@id='0']/div/table/tbody/tr[5]/td[2]").InnerHtml;
            var recovered = doc.DocumentNode.SelectSingleNode("//*[@id='0']/div/table/tbody/tr[5]/td[3]").InnerHtml;
            var disp      = string.Format("Cases: {0}  Deaths: {1}  Recovered: {2}", cases, deaths, recovered);

            if (this._scrapeCache.PreviousCVscrapeValue != disp)
            {
                dumpToConsole(doc, true);

                var rowData = parseAndDump(doc);

                // send scrape notice
                _sender.SendMessage(new CoronaVirusScrapeMessage());

                // send email notice
                this._scrapeCache.PreviousCVscrapeValue = disp;
                body = string.Format("<h2>{0}:<br>{1}</h2>", DateTime.Now, disp);

                // WriteLine("{0} {1} CHANGED", DateTime.Now, disp);
                //
                List <string> recipients = new List <string> {
                    { "*****@*****.**" },
                    { "*****@*****.**" },
                    { "*****@*****.**" }
                };

                Gmail.Send(subject, body, recipients, true);
            }
            else
            {
                dumpToConsole(doc, false);
                // WriteLine("{0} {1} no change", DateTime.Now, disp);
            }
        }
コード例 #8
0
ファイル: Program.cs プロジェクト: tibuchivn/relaximage
        public static void TestImageOnePage(string strPage, string strCounter, string category, string strTitle)
        {
            try
            {
                var bolService = new BOLService.BOLService();
                if (bolService.CheckExistLinkByDomain(strCounter, "xiuren.org"))
                {
                    Console.WriteLine("Exist :" + strCounter);
                    return;
                }

                var lst = new List <BOLService.ImgLink>();
                var web = new HtmlWeb();
                var doc = web.Load(strPage);
                //TODO: Check valid
                var divContainer = doc.DocumentNode.SelectSingleNode("//div[@id = 'main']");
                if (divContainer == null)
                {
                    return;
                }
                HtmlNodeCollection nodeImagesCollection = divContainer.SelectNodes("//span[@class = 'photoThum']");
                foreach (HtmlNode childNode in nodeImagesCollection)
                {
                    if (childNode.HasChildNodes)
                    {
                        foreach (var node in childNode.ChildNodes)
                        {
                            if (node.Name.Equals("a") && node.Attributes["title"] != null)
                            {
                                try
                                {
                                    string strLink = node.Attributes["href"].Value;
                                    var    item    = new BOLService.ImgLink()
                                    {
                                        Category   = category,
                                        Counter    = strCounter,
                                        CreateDate = DateTime.Now,
                                        Domain     = "xiuren.org",
                                        GroupName  = strTitle,
                                        linkimg    = strLink
                                    };
                                    lst.Add(item);
                                }
                                catch (Exception ex)
                                {
                                    //TODO: show error;
                                    Console.WriteLine(ex.ToString());
                                }
                            }
                        }
                    }
                }
                if (lst.Count > 0)
                {
                    bolService.SaveImgDepVD(lst);
                    Console.WriteLine(strPage);
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
            }
        }
コード例 #9
0
 public void test()
 {
     HtmlAgilityPack.HtmlWeb      web = new HtmlWeb();
     HtmlAgilityPack.HtmlDocument doc =
         web.Load("https://www.youtube.com/watch?v=8x8zPoYRRbQ&list=PL9417AD8BBBA981BE");
 }
コード例 #10
0
        /// <summary>
        /// 搜索书本
        /// </summary>
        /// <param name="q"></param>
        /// <param name="pn"></param>
        /// <returns></returns>
        public List <BookInfoDto> GetBooks(string q, int pn)
        {
            HtmlDocument doc = new HtmlDocument();
            string       url = string.Empty;

            //这里请求三次是因为。。。 调试过就知道,你就当做是错误重试吧 (′゜ω。‵)
            try
            {
                try
                {
                    //no.1
                    url = $"https://www.xxbiquge.com/search.php?keyword={q}&page={pn}&p={pn - 1}";
                    HtmlWeb webClient = new HtmlWeb();
                    doc = webClient.Load(url);
                }
                catch
                {
                    try
                    {
                        //no.2
                        url = $"http://zhannei.baidu.com/cse/search?s=8823758711381329060&q={q}&page={pn}&p={pn - 1}";
                        Thread.Sleep(1000 * 1);
                        var html = Utils.HttpHelper.Get(url);
                        doc.LoadHtml(html);
                    }
                    catch
                    {
                        //no.3
                        url = $"http://zhannei.baidu.com/cse/search?s=3654077655350271938&q={q}&page={pn}&p={pn - 1}";
                        Thread.Sleep(1000 * 2);
                        HtmlWeb webClient = new HtmlWeb();
                        doc = webClient.Load(url);
                    }
                }
            }
            catch (Exception ex)
            {
                throw new UserFriendlyException($"抓取网站请求失败,{ex.Message}。请退出后重试");
            }

            List <BookInfoDto> list = new List <BookInfoDto>();
            var books = doc.DocumentNode.SelectNodes("//div[@class='result-list']/div");

            if (books != null)
            {
                int i = 0;
                foreach (var item in books)
                {
                    list.Add(new BookInfoDto()
                    {
                        BookName                = item.SelectNodes("//a[@class='result-game-item-title-link']")[i].Attributes["title"].Value,
                        BookLink                = item.SelectNodes("//a[@class='result-game-item-title-link']")[i].Attributes["href"].Value.Trim(),
                        Author                  = item.SelectNodes("//p[@class='result-game-item-info-tag']")[i * 4 + 0].SelectNodes("span")[1].InnerText.Replace("\r\n", string.Empty).Trim(),
                        CoverImage              = item.SelectNodes("//img[@class='result-game-item-pic-link-img']")[i].Attributes["src"].Value,
                        BookClassify            = item.SelectNodes("//p[@class='result-game-item-info-tag']")[i * 4 + 1].SelectNodes("span")[1].InnerText.Replace("\r\n", string.Empty).Trim(),
                        Last_Update_Time        = item.SelectNodes("//p[@class='result-game-item-info-tag']")[i * 4 + 2].SelectNodes("span")[1].InnerText.Replace("\r\n", string.Empty).Trim(),
                        BookIntro               = item.SelectNodes("//p[@class='result-game-item-desc']")[i].InnerText,
                        Last_Update_ChapterName = item.SelectNodes("//p[@class='result-game-item-info-tag']")[i * 4 + 3].SelectSingleNode("a").InnerText.Trim(),
                        Last_Update_ChapterLink = item.SelectNodes("//p[@class='result-game-item-info-tag']")[i * 4 + 3].SelectSingleNode("a").Attributes["href"].Value.Trim()
                    });
                    i++;
                }
            }
            return(list);
        }
コード例 #11
0
        public void RZLoadFromTaxes(ref List <EVHF> EVHFList, ref string[] link, ref ProgressBar progressBar)
        {
            /*
             * // The HtmlWeb class is a utility class to get the HTML over HTTP
             * HtmlWeb htmlWeb = new HtmlWeb();
             *
             * // Creates an HtmlDocument object from an URL
             * HtmlDocument document = htmlWeb.Load(link);
             *
             * // Targets a specific node
             * HtmlNode someNode = document.GetElementbyId("trback2");
             *
             * // If there is no node with that Id, someNode will be null
             * if (someNode != null)
             * {
             *  // Extracts all links within that node
             *  IEnumerable<HtmlNode> allLinks = someNode.Descendants("td");
             *
             *  Console.WriteLine(allLinks.Count<HtmlNode>());
             *  // Outputs the href for external links
             *  foreach (HtmlNode linki in allLinks)
             *  {
             *      Console.WriteLine(linki.InnerHtml);
             *
             *      // Checks whether the link contains an HREF attribute
             *      //if (linki.Attributes.Contains("trback2"))
             *      //{
             *          // Simple check: if the href begins with "http://", prints it out
             *          //if (linki.Attributes["trback2"].Value.StartsWith("http://"))
             *      //        Console.WriteLine(linki.Attributes["trback2"].Value);
             *      //}
             *      //Console.WriteLine(linki);
             *  }
             * }
             */
            var htmlWeb = new HtmlWeb
            {
                OverrideEncoding = Encoding.UTF8
            };
            var htmlDoc = new HtmlAgilityPack.HtmlDocument();

            //DateTime startDate = new DateTime(); //--Time work inicializing

            //*** var temp = Path.GetTempFileName();
            //*** var tempFile = temp.Replace(Path.GetExtension(temp), ".html");

            for (int i = 0; i < link.Length; i++)
            {
                if (progressBar != null)
                {
                    progressBar.Value += 1;
                }
                try
                {
                    //Запись в Stream всех ссылок для последующего отображения
                    //*** using (StreamWriter sw = new StreamWriter(tempFile))
                    //*** {
                    //***     for (int j = 0; j < link.Length; j++)
                    //***     {
                    //***         sw.Write($"{link[j]}<br>");
                    //***     }
                    //*** }
                    WebClient wc = new WebClient
                    {
                        Encoding = Encoding.UTF8
                    };

                    // using System.Net;
                    //ServicePointManager.Expect100Continue = true;
                    //ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
                    //ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
                    //ServicePointManager.SecurityProtocol = (SecurityProtocolType)3072;
                    // Use SecurityProtocolType.Ssl3 if needed for compatibility reasons

                    ServicePointManager.Expect100Continue = true;
                    ServicePointManager.SecurityProtocol  = SecurityProtocolType.Tls
                                                            //| SecurityProtocolType.Tls11
                                                            //| SecurityProtocolType.Tls12
                                                            | SecurityProtocolType.Ssl3
                                                            | (SecurityProtocolType)3072;

                    //HttpWebRequest request = (HttpWebRequest)WebRequest.Create("https://google.com/api/");

                    var result = wc.DownloadString(link[i]);

                    string type;
                    if (Main.DocType == 0)
                    {
                        type = "EVHF(";
                    }
                    else
                    {
                        type = "E-Qaime(";
                    }
                    System.IO.File.WriteAllText(Main.TempSaveFileString + $@"\{type}{i+1}).html", result);
                    Main.information.Add($"Fayl {i+1} yarandı");
                }
                catch (Exception e)
                {
                    Main.information.Add("---Əlagə yaranmadı!---");
                    Main.information.Add(e.Message);
                    MessageBox.Show(e.Message);
                    //throw;
                }
            }
            // From File
            for (int m = 0; m < link.Length; m++)
            {
                try
                {
                    // Open the text file using a stream reader.
                    //using (StreamReader sr = new StreamReader(link)) //link = "TestFile.txt"
                    //{
                    //    // From Web
                    //    //var url = @"http://html-agility-pack.net/";
                    //    //var web = new HtmlWeb();
                    //    //var doc3 = web.Load(url);
                    //
                    //    // From String
                    //    //var doc2 = new HtmlDocument();
                    //    //doc2.LoadHtml(link);
                    //
                    //    // Read the stream to a string, and write the string to the console.
                    //    String line = sr.ReadToEnd();
                    //    Console.WriteLine(line);
                    //}
                    //htmlDoc.Load($@"C:\New folder\text{m}.html");
                    string type;
                    if (Main.DocType == 0)
                    {
                        type = "EVHF(";
                    }
                    else
                    {
                        type = "E-Qaime(";
                    }
                    htmlDoc = htmlWeb.Load(Main.TempSaveFileString + $@"\{type}{m+1}).html");
                }
                catch (Exception e)
                {
                    Main.information.Add("---Fayl oxunmuyor---");
                    Main.information.Add(e.Message);
                }
                //startDate = DateTime.Now; //--Time work start
                //EVHFList.AddRange(StringToListEVHF(RZEncoding.HTMLToUTF8(htmlDoc.ParsedText)));
                EVHFList.AddRange(StringToListEVHF(htmlDoc.ParsedText));
                Main.information.Add($"File {m+1} added");
            }
            //DateTime endDate = DateTime.Now; //--Time work start
            //Console.WriteLine(endDate - startDate);  // raznica vo vremeni raboti
            //*** Process.Start(new ProcessStartInfo(tempFile));
        }
コード例 #12
0
ファイル: Program.cs プロジェクト: jilnesta/scraping-demo
        static void Main()
        {
            Console.WriteLine("Hit Enter with no url to quit");

            while (true)
            {
                Console.Write("Enter a URL, e.g. www.irishtimes.com:  ");
                var url = Console.ReadLine();

                if (String.IsNullOrEmpty(url))
                {
                    return;
                }

                var httpRegex = new Regex(@"http://|https://", RegexOptions.Compiled);
                if (!httpRegex.IsMatch(url))
                {
                    url = $"http://{url}";
                }

                try
                {
                    var web = new HtmlWeb();

                    // Optional: This is not absolutely necessary, but it's a good idea. See comments for the OnPreRequest handler
                    web.PreRequest = new HtmlWeb.PreRequestHandler(OnPreRequest);

                    // Optional: monitor some useful data we get back in the Post Repsonse:
                    web.PostResponse = new HtmlWeb.PostResponseHandler(OnPostResponse);


                    // The result of the web.Load will fill the HtmlDocument object
                    var document = web.Load(url);


                    // And now you can parse whatever you want...
                    // Everything you need will be in the DocumentNode.  See individual methods for usage

                    // Get all of the anchors...
                    Console.WriteLine("Getting all the anchors...");
                    FindAllTheAnchors(web, document);

                    Console.WriteLine("h1 tags");
                    FindHTags(document, "h1");

                    Console.WriteLine("h2 tags");
                    FindHTags(document, "h2");

                    Console.WriteLine("Get some data from meta tags");
                    GetSomeDataFromMetaTags(document);

                    Console.WriteLine("Get all elements with a particular class name.  Enter a class name that you know exists: ");
                    var classToFind = Console.ReadLine();
                    FindElementsWithClassName(document, classToFind);

                    Console.WriteLine("Print between each script tag");
                    PrintContentOfScriptTags(document);

                    Console.WriteLine("Get the script tags with a particular src attribute");
                    GetScriptTagsWithMatchingSrcAttribute(document);

                    // This one demonstrates how to mix node type selectors and attributes (change this to match for whatever site you are testing)
                    Console.WriteLine("Get every anchor tag with class that contains the letters: a");
                    FindAnchorsWithClassText(document);
                }
                catch (Exception ex)
                {
                    Console.WriteLine($"Something went wrong while scraping {url}! See Debug log...");
                    Debug.WriteLine(ex);
                }
            }
        }
コード例 #13
0
ファイル: Program.cs プロジェクト: tibuchivn/relaximage
        public static void TestImageOnePage(string strPage, string strCounter, string category, string strTitle)
        {
            try
            {
                var bolService = new BOLService.BOLService();
                if (bolService.CheckExistLinkByDomain(strCounter, "ugirls.com"))
                {
                    Console.WriteLine("Exist :" + strCounter);
                    return;
                }

                var lst = new List <BOLService.ImgLink>();
                var web = new HtmlWeb();
                var doc = web.Load(strPage);
                //TODO: Check valid
                var divContainer = doc.DocumentNode.SelectSingleNode("//div[@class = 'main auto']");
                if (divContainer == null)
                {
                    return;
                }
                HtmlNodeCollection nodeImagesCollection = divContainer.SelectNodes("//div[@class = 'img']");
                if (nodeImagesCollection.Count > 0)
                {
                    foreach (HtmlNode node in nodeImagesCollection.First().ChildNodes)
                    {
                        if (node.Name.Equals("ul") && node.Attributes["id"] != null &&
                            node.Attributes["id"].Value.Equals("myGallery"))
                        {
                            foreach (HtmlNode childNode in node.ChildNodes)
                            {
                                if (childNode.HasChildNodes && childNode.Name.Equals("li"))
                                {
                                    foreach (var nodeThumb in childNode.ChildNodes)
                                    {
                                        if (nodeThumb.Name.Equals("img") && nodeThumb.Attributes["src"] != null)
                                        {
                                            try
                                            {
                                                string strLink = nodeThumb.Attributes["src"].Value;
                                                strLink = strLink.Replace("_magazine_web_m", "");
                                                var item = new BOLService.ImgLink()
                                                {
                                                    Category   = category,
                                                    Counter    = strCounter,
                                                    CreateDate = DateTime.Now,
                                                    Domain     = "ugirls.com",
                                                    GroupName  = strTitle,
                                                    linkimg    = strLink
                                                };
                                                lst.Add(item);
                                            }
                                            catch (Exception ex)
                                            {
                                                //TODO: show error;
                                                Console.WriteLine(ex.ToString());
                                            }
                                        }
                                    }
                                }
                            }
                            if (lst.Count > 0)
                            {
                                bolService.SaveImgDepVD(lst);
                                Console.WriteLine(strPage);
                            }
                            break;
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
            }
        }
コード例 #14
0
        public override List <IMovie> Mine()
        {
            var    result = new List <IMovie>();
            string url    = $"{Url}/daily-box-office-chart";
            var    web    = new HtmlWeb();

            ContainsEstimates = false;
            WeekendEnding     = MovieDateUtil.GameSunday();                     // This page should always have the "current" theater count.

            //  https://www.the-numbers.com/daily-box-office-chart

            var doc = web.Load(url);

            UrlSource = url;

            // Lookup XPATH to get the right node that matches.
            // Select all of the <script> nodes that are children of <body> with an attribute of "src"
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            //var tableRows = doc.DocumentNode?.SelectNodes("//body//table//tr[position()>1]");
            var tableRows = doc.DocumentNode?.SelectNodes("//body//table//tr");

            if (tableRows != null)
            {
                foreach (var row in tableRows)
                {
                    Movie movie      = null;
                    var   rowColumns = row.SelectNodes("td");

                    if (rowColumns != null)
                    {
                        int columnCount = 0;

                        foreach (var column in rowColumns)
                        {
                            if (columnCount == 2)
                            {
                                movie = new Movie
                                {
                                    Name = RemovePunctuation(MapName(HttpUtility.HtmlDecode(column.InnerText)))
                                };

                                if (WeekendEnding.HasValue)
                                {
                                    movie.WeekendEnding = WeekendEnding.Value;
                                }
                            }
                            else if (columnCount == 4)
                            {
                                movie.Earnings = ParseEarnings(column.InnerText);
                            }
                            else if (columnCount == 7)
                            {
                                decimal theaterCount = 0;

                                if (decimal.TryParse(column.InnerText?.Replace("-", "0"), out theaterCount))
                                {
                                    movie.TheaterCount = (int)theaterCount;
                                }
                                break;
                            }

                            columnCount++;
                        }
                    }

                    if (movie != null)
                    {
                        result.Add(movie);
                    }
                }
            }

            return(result);
        }
コード例 #15
0
        private List <IMovie> MineForecast(int articleNumber = 1)
        {
            var    result = new List <IMovie>();
            string url    = Url + "news/";
            var    web    = new HtmlWeb();

            var doc = web.Load(url);                    // Load main page.

            // Lookup XPATH to get the right node that matches.
            // Select all of the <script> nodes that are children of <body> with an attribute of "src"
            // REF: https://www.w3schools.com/xml/xpath_syntax.asp

            HtmlNode node = null;

            if (articleNumber == 1)
            {
                //node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@href, '/news/?id=')]");
                node = doc.DocumentNode.SelectSingleNode("//body//a[contains(@href, '/article/')]");
            }
            else
            {
                //var nodes = doc.DocumentNode.SelectNodes("//body//a[contains(@href, '/news/?id=')]");
                var nodes = doc.DocumentNode.SelectNodes("//body//a[contains(@href, '/article/')]");

                if (nodes != null && articleNumber <= nodes.Count)
                {
                    node = nodes[articleNumber - 1];
                }
            }

            if (node != null)
            {
                var href = node.GetAttributeValue("href", null);

                if (href != null)
                {
                    DateTime?articleDate = null;

                    // Now retrieve the article page.

                    UrlSource = $"{Url}/{href}";

                    doc = web.Load(UrlSource);

                    // Get the date of the article (hoping that the date is the ONLY thing in such a small font)

                    //node = doc.DocumentNode.SelectSingleNode("//body//font[@size='1']");
                    node = doc.DocumentNode.SelectSingleNode("//body//div[@class='mojo-news-byline']");

                    if (node != null)
                    {
                        // Remove the first child span.

                        if (node.ChildNodes.Count > 1)
                        {
                            string   articleText = HttpUtility.HtmlDecode(node.ChildNodes[1].InnerText).Trim();
                            var      tokens      = articleText.Split(new char[] { '-' });
                            DateTime parsedDateTime;

                            if (tokens.Length > 0 && DateTime.TryParse(tokens[0].Replace("PDT", string.Empty).Replace("PST", string.Empty), out parsedDateTime))
                            {
                                articleDate = parsedDateTime.Date;
                            }
                        }
                    }

                    // Need to scan for the <p> tag that contains "This weekend's forecast is directly below."

                    // The movies are just in a <ul> tag (unsorted list)

                    var movieNodes = doc.DocumentNode?.SelectNodes("//body//ul/li/span[@class='a-list-item']");

                    if (movieNodes == null)
                    {
                        Error = NO_DATA;
                    }
                    else
                    {
                        foreach (var movieNode in movieNodes)
                        {
                            int index = movieNode.InnerText.IndexOf(DELIMITER);

                            if (index > 0)
                            {
                                var nodeText  = movieNode.InnerText;
                                var movieName = nodeText.Substring(0, index);

                                // Might switch this to RegEx...

                                var valueInMillions = nodeText.Substring(index, nodeText.Length - index)?.Contains("M");

                                var estimatedBoxOffice = nodeText.Substring(index, nodeText.Length - index)?.Replace(DELIMITER, string.Empty).Replace("M", string.Empty);

                                var parenIndex = movieName.IndexOf("(");

                                if (parenIndex > 0)
                                {
                                    // Trim out the THEATERS (for now).
                                    movieName = movieName.Substring(0, parenIndex - 1).Trim();
                                }

                                parenIndex = estimatedBoxOffice.IndexOf("(");

                                if (parenIndex > 0)
                                {
                                    // Trim out the multi-day value.
                                    estimatedBoxOffice = estimatedBoxOffice.Substring(0, parenIndex - 1).Trim();
                                }

                                decimal estBoxOffice;

                                if (!string.IsNullOrEmpty(movieName) && decimal.TryParse(estimatedBoxOffice, out estBoxOffice))
                                {
                                    var name  = MapName(RemovePunctuation(HttpUtility.HtmlDecode(movieName)));
                                    var movie = new Movie
                                    {
                                        MovieName = name,
                                        Earnings  = estBoxOffice * (valueInMillions.Value ? 1000000 : 1)
                                    };

                                    if (articleDate.HasValue)
                                    {
                                        movie.WeekendEnding = MovieDateUtil.NextSunday(articleDate);
                                    }

                                    if (movie != null)
                                    {
                                        if (!result.Contains(movie))
                                        {
                                            result.Add(movie);
                                        }
                                        else if (GameDays > 3)
                                        {
                                            // It's OK to override the BO value if the game days is MORE than the default.

                                            // Need to use "fuzzy" logic here because the names may have dates as suffixes and those should match.
                                            var found = result.Find(item => item.Equals(movie));

                                            if (found != null && found.EarningsBase < movie.EarningsBase)
                                            {
                                                // Replace the movie if a larger value was found. (4 day weekend versus 3 day)

                                                result.Remove(found);
                                                result.Add(movie);

                                                Error = FOUR_DAY;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            return(result);
        }
コード例 #16
0
ファイル: Mail.cs プロジェクト: sitskomickhail/insta-checker
        public string GetMailPath(DateTime dt)
        {
            lock (LogIO.locker) logging.Invoke(LogIO.mainLog, new Log()
                {
                    UserName   = null,
                    Date       = DateTime.Now,
                    LogMessage = $"Entering into method {UserPass}",
                    Method     = "Mail.GetMailPath"
                });

            int length = 1;

            for (int i = 0; i < 10; i++)
            {
                MailMessage[] mm = ic.GetMessages(ic.GetMessageCount() - 1, ic.GetMessageCount());
                if (mm.Length == 0)
                {
                    lock (LogIO.locker) logging.Invoke(LogIO.mainLog, new Log()
                        {
                            Date = DateTime.Now, LogMessage = $"Waiting for message {UserPass}", Method = "Mail.GetMailPath"
                        });
                    lock (LogIO.locker) logging.Invoke("MailLog.log", new Log()
                        {
                            Date = DateTime.Now, LogMessage = $"Waiting for message {UserPass}", Method = "Mail.GetMailPath"
                        });
                    Thread.Sleep(5000);
                    return(GetMailPath(dt));
                }

                if (mm[mm.Length - length].Date >= dt)
                {
                    MailMessage message = ic.GetMessage(mm[mm.Length - 1].Uid);
                    Thread.Sleep(1000);
                    lock (LogIO.locker) logging.Invoke(LogIO.mainLog, new Log()
                        {
                            Date = DateTime.Now, LogMessage = $"Getting messages {UserPass}", Method = "Mail.GetMailPath"
                        });
                    lock (LogIO.locker) logging.Invoke("MailLog.log", new Log()
                        {
                            Date = DateTime.Now, LogMessage = $"Getting messages {UserPass}", Method = "Mail.GetMailPath"
                        });

                    string     path       = $"message{r}.html";
                    FileStream filestream = new FileStream(path, FileMode.Create);
                    filestream.Close();
                    StreamWriter file = new StreamWriter(path);
                    file.Write(message.Body);
                    file.Close();
                    lock (LogIO.locker) logging.Invoke(LogIO.mainLog, new Log()
                        {
                            UserName = null, Date = DateTime.Now, LogMessage = $"File Created {UserPass}", Method = "Mail.GetMailPath"
                        });
                    lock (LogIO.locker) logging.Invoke("MailLog.log", new Log()
                        {
                            UserName = null, Date = DateTime.Now, LogMessage = $"File Created {UserPass}", Method = "Mail.GetMailPath"
                        });

                    Thread.Sleep(5000);
                    HtmlWeb web = new HtmlWeb();

                    HtmlDocument doc = web.Load(Environment.CurrentDirectory + @"\" + path);
                    lock (LogIO.locker) logging.Invoke("MailLog.log", new Log()
                        {
                            UserName = null, Date = DateTime.Now, LogMessage = $"Getting info from file {UserPass}", Method = "Mail.GetMailPath"
                        });
                    var    nodes  = doc.DocumentNode.SelectNodes("//a");
                    string result = nodes[0].GetAttributeValue("href", null);
                    Thread.Sleep(1000);
                    lock (LogIO.locker) logging.Invoke(LogIO.mainLog, new Log()
                        {
                            UserName = null, Date = DateTime.Now, LogMessage = $"\"href\" finded = {result} ---- {UserPass}", Method = "Mail.GetMailPath"
                        });
                    lock (LogIO.locker) logging.Invoke("MailLog.log", new Log()
                        {
                            UserName = null, Date = DateTime.Now, LogMessage = $"\"href\" finded = {result} ---- {UserPass}", Method = "Mail.GetMailPath"
                        });

                    File.Delete(path);
                    lock (LogIO.locker) logging.Invoke(LogIO.mainLog, new Log()
                        {
                            UserName = null, Date = DateTime.Now, LogMessage = $"FileDeleted {UserPass}", Method = "Mail.GetMailPath"
                        });
                    lock (LogIO.locker) logging.Invoke("MailLog.log", new Log()
                        {
                            UserName = null, Date = DateTime.Now, LogMessage = $"FileDeleted {UserPass}", Method = "Mail.GetMailPath"
                        });
                    Thread.Sleep(1000);

                    if (result.Contains("https://instagram.com/accounts/confirm_email/"))
                    {
                        lock (LogIO.locker) logging.Invoke(LogIO.mainLog, new Log()
                            {
                                UserName = null, Date = DateTime.Now, LogMessage = $"Returning result {UserPass}", Method = "Mail.GetMailPath"
                            });
                        lock (LogIO.locker) logging.Invoke("MailLog.log", new Log()
                            {
                                UserName = null, Date = DateTime.Now, LogMessage = $"Returning result {UserPass}", Method = "Mail.GetMailPath"
                            });

                        return(result);
                    }
                    else
                    {
                        length++;
                        lock (LogIO.locker) logging.Invoke(LogIO.mainLog, new Log()
                            {
                                UserName = null, Date = DateTime.Now, LogMessage = $"{length - 1} message not correct {UserPass}", Method = "Mail.GetMailPath"
                            });
                        lock (LogIO.locker) logging.Invoke("MailLog.log", new Log()
                            {
                                UserName = null, Date = DateTime.Now, LogMessage = $"{length - 1} message not correct", Method = "Mail.GetMailPath"
                            });
                        continue;
                    }
                }
                else
                {
                    Thread.Sleep(2000);
                }
            }
            if (tryAgainResult == false)
            {
                lock (LogIO.locker) logging.Invoke(LogIO.mainLog, new Log()
                    {
                        UserName = null, Date = DateTime.Now, LogMessage = $"Try to  find again {UserPass}", Method = "Mail.GetMailPath"
                    });
                lock (LogIO.locker) logging.Invoke("MailLog.log", new Log()
                    {
                        UserName = null, Date = DateTime.Now, LogMessage = $"Try to  find again {UserPass}", Method = "Mail.GetMailPath"
                    });
                tryAgainResult = true;
                return(GetMailPath(dt));
            }
            else
            {
                tryAgainResult = false;
                lock (LogIO.locker) logging.Invoke(LogIO.mainLog, new Log()
                    {
                        UserName = null, Date = DateTime.Now, LogMessage = $"Mail not found", Method = "Mail.GetMailPath"
                    });
                lock (LogIO.locker) logging.Invoke("MailLog.log", new Log()
                    {
                        UserName = null, Date = DateTime.Now, LogMessage = $"Mail not found", Method = "Mail.GetMailPath"
                    }); tryAgainResult = false;
                return(null);
            }
        }
コード例 #17
0
        public static List <NovelDataModel> GetNovelFullData(string url, bool isSearch)
        {
            var boxNovelData = new List <NovelDataModel>();

            try
            {
                HtmlWeb htmlWeb = new HtmlWeb();
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc = htmlWeb.Load($"{url}");

                if (isSearch)
                {
                    HtmlNode[] nodes = doc.DocumentNode.SelectNodes("//div/div/div/div[@class='row']").ToArray();
                    foreach (HtmlNode item in nodes)
                    {
                        var title = HttpUtility.HtmlDecode(
                            item?.SelectSingleNode(".//div/h3[@class='truyen-title']/a")
                            ?.InnerText
                            );

                        var latestchapter = /*StringFormat.TrimAllWithInplaceCharArray(*/
                                            HttpUtility.HtmlDecode(
                            item?.SelectSingleNode(".//div/div/a/span[@class='chapter-text']")
                            ?.InnerText
                            )?.Trim() /*)*/;


                        var link = HttpUtility.HtmlDecode(
                            item?.SelectSingleNode(".//div/h3[@class='truyen-title']/a[@href]")
                            ?.GetAttributeValue("href", string.Empty)
                            );

                        if (!string.IsNullOrEmpty(link))
                        {
                            link = "https://novelfull.com" + link;
                        }

                        var imagelink = HttpUtility.HtmlDecode(
                            item?.SelectSingleNode(".//img")
                            ?.Attributes["src"]
                            ?.Value
                            );

                        if (!string.IsNullOrEmpty(imagelink))
                        {
                            imagelink = "https://novelfull.com" + imagelink;
                        }

                        var rating = "0";

                        boxNovelData.Add(new NovelDataModel(title, latestchapter, link, imagelink, rating));
                    }
                }
                else
                {
                    HtmlNode[] nodes = doc.DocumentNode.SelectNodes("//div/div/div/div[@class='row']").ToArray();
                    foreach (HtmlNode item in nodes)
                    {
                        var title = HttpUtility.HtmlDecode(
                            item?.SelectSingleNode(".//div/h3[@class='truyen-title']/a")
                            ?.InnerText
                            );

                        var latestchapter = /*StringFormat.TrimAllWithInplaceCharArray(*/
                                            HttpUtility.HtmlDecode(
                            item?.SelectSingleNode(".//div/div/a/span[@class='chapter-text']")
                            ?.InnerText
                            )?.Trim() /*)*/;


                        var link = HttpUtility.HtmlDecode(
                            item?.SelectSingleNode(".//div/h3[@class='truyen-title']/a[@href]")
                            ?.GetAttributeValue("href", string.Empty)
                            );

                        if (!string.IsNullOrEmpty(link))
                        {
                            link = "https://novelfull.com" + link;
                        }

                        var imagelink = HttpUtility.HtmlDecode(
                            item?.SelectSingleNode(".//img")
                            ?.Attributes["src"]
                            ?.Value
                            );

                        if (!string.IsNullOrEmpty(imagelink))
                        {
                            imagelink = "https://novelfull.com" + imagelink;
                        }

                        var rating = "0";

                        boxNovelData.Add(new NovelDataModel(title, latestchapter, link, imagelink, rating));
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }

            return(boxNovelData);
        }
コード例 #18
0
        /// <summary>
        /// 获取小说内容
        /// </summary>
        /// <param name="chapterLink"></param>
        /// <returns></returns>
        public BookContentDto GetBookContent(string chapterLink)
        {
            HtmlWeb      webClient = new HtmlWeb();
            HtmlDocument doc;

            webClient.OverrideEncoding = Encoding.UTF8;

            //这里两次请求是为了。。。 不解释了
            try
            {
                try
                {
                    doc = webClient.Load(chapterLink);
                }
                catch
                {
                    Thread.Sleep(2000);
                    doc = webClient.Load(chapterLink);
                }
            }
            catch (Exception ex)
            {
                throw new UserFriendlyException($"抓取网站请求失败,{ex.Message}。请退出后重试");
            }

            var nodes = doc.DocumentNode.SelectNodes("//div[@class='bookname']/div[@class='bottem1']/a[@href]");

            if (nodes == null || nodes.Count == 0)
            {
                throw new UserFriendlyException("解析网页异常,请重试");
            }

            //var _domain = StringHelper.GetUrlDomain(link);
            Uri uri      = new Uri(chapterLink);
            var _domain  = $"{uri.Scheme}://{uri.Host}";
            var _content = doc.DocumentNode.SelectSingleNode("//div[@id='content']").InnerHtml;

            //
            var bookContent = new BookContentDto()
            {
                BookName        = doc.DocumentNode.SelectSingleNode("//div[@class='footer_cont']/p/a").InnerText.Trim(),
                BookLink        = _domain + nodes[1].Attributes["href"].Value.Trim(),
                ChapterName     = doc.DocumentNode.SelectSingleNode("//div[@class='bookname']/h1").InnerText.Trim(),
                ChapterLink     = chapterLink,
                Content         = ClearSensitiveCharacter(_content).TrimEnd(),
                NextChapterLink = _domain + nodes[2].Attributes["href"].Value.Trim(),
                PrevChapterLink = _domain + nodes[0].Attributes["href"].Value.Trim()
            };

            bookContent.Number_Of_Words = ClearSensitiveCharacter(doc.DocumentNode.SelectSingleNode("//div[@id='content']").InnerText).TrimEnd().Length;

            //有些网站喜欢将最后一章的“下一章的链接地址”设置为返回目录,所以有下面的处理
            var a = chapterLink.Substring(0, chapterLink.LastIndexOf("/")) + "/";

            if (a == bookContent.NextChapterLink)
            {
                bookContent.NextChapterLink = "";
            }

            return(bookContent);
        }
コード例 #19
0
        private static void DoSearch(GeetMalaUrl geetMalaUrl, bool showMessages)
        {
            Console.Clear();
            if (showMessages)
            {
                Console.WriteLine(Resource.openingUrl, geetMalaUrl.CurrentFormattedUrl);
            }

            var web  = new HtmlWeb();
            var page =
                web.Load(geetMalaUrl.CurrentFormattedUrl)
                .DocumentNode.QuerySelectorAll(GeetMalaUrl.AlbumSearchQuery)
                .Where(x => x.InnerText.Trim().Length > 0);



            foreach (var album in page)
            {
                var albuminfo = new AlbumInfo
                {
                    AlbumName = album.InnerText.RemoveInvalidPathChars(),
                    SongInfo  = new List <SongInfo>()
                };


                if (showMessages)
                {
                    Console.WriteLine(Resource.openingAlbumNameParam, albuminfo.AlbumName);
                }

                var albumInfoData = web.Load(GeetMalaUrl.CoreUrl + album.GetAttributeValue("href", ""))
                                    .DocumentNode.QuerySelectorAll(GeetMalaUrl.SongSearchQuery).Select(x => x.GetAttributeValue("href", ""));

                foreach (var xalbumInfo in albumInfoData)
                {
                    var songInfoComplexData = web.Load(GeetMalaUrl.CoreUrl + xalbumInfo)
                                              .DocumentNode;
                    var songInfoData = songInfoComplexData.QuerySelectorAll(GeetMalaUrl.SongInfoSearchQuery).ToList();


#pragma warning disable 618
                    var songInfo = new SongInfo(DirPath, albuminfo.AlbumName)
#pragma warning restore 618
                    {
                        Artist = new Artist {
                            Value = songInfoData.ElementAt(0).InnerText
                        },
                        Composer = new Composer {
                            Value = songInfoData.ElementAt(1).InnerText
                        },
                        Writer = new Writer {
                            Value = songInfoData.ElementAt(2).InnerText
                        },
                        SongName = songInfoComplexData.QuerySelector(GeetMalaUrl.SongTitleSearchQuery)
                                   .InnerText.RemoveInvalidPathChars()
                    };



                    var lyrics = songInfoComplexData.QuerySelector(GeetMalaUrl.LyricsSearchQuery);
                    if (lyrics != null)
                    {
                        songInfo.Lyrics = new Lyrics
                        {
                            Value = lyrics.InnerText
                        }
                    }
                    ;

                    if (LoadedFiles(songInfo.SongDirPath).Contains(songInfo.SongName))
                    {
                        continue;
                    }
                    albuminfo.SongInfo.Add(songInfo);
                }
                if (albuminfo.SongInfo.Count > 0)
                {
                    WriteMetadata(albuminfo);
                }
            }
            if (showMessages)
            {
                Console.WriteLine(Resource.ClosingUrl, geetMalaUrl.CurrentFormattedUrl);
            }
        }
    }
コード例 #20
0
        /// <summary>
        /// 根据书本介绍页获取书本信息
        /// </summary>
        /// <param name="bookLink"></param>
        /// <returns></returns>
        public BookChapterDto GetBookChapters(string bookLink)
        {
            HtmlWeb      webClient;
            HtmlDocument doc;

            //这里两次请求是为了。。。  嗯,错误请求重试
            try
            {
                try
                {
                    webClient = new HtmlWeb();
                    webClient.OverrideEncoding = Encoding.UTF8;
                    doc = webClient.Load(bookLink);
                }
                catch
                {
                    Thread.Sleep(2000);
                    webClient = new HtmlWeb();
                    webClient.OverrideEncoding = Encoding.UTF8;
                    doc = webClient.Load(bookLink);
                }
            }
            catch (Exception ex)
            {
                throw new UserFriendlyException($"抓取网站请求失败,{ex.Message}。请退出后重试");
            }

            //var _domain = StringHelper.GetUrlDomain(link);
            Uri uri     = new Uri(bookLink);
            var _domain = $"{uri.Scheme}://{uri.Host}";
            var nodes   = doc.DocumentNode.SelectNodes("//div[@id='info']/p");

            if (nodes == null || nodes.Count == 0)
            {
                throw new UserFriendlyException("解析网页异常,请重试");
            }

            //章节目录
            List <BookChapterDto.ChapterlistModel> chapterList = new List <BookChapterDto.ChapterlistModel>();
            var chapters = doc.DocumentNode.SelectNodes("//div[@id='list']/dl/dd/a");

            foreach (var item in chapters)
            {
                chapterList.Add(new BookChapterDto.ChapterlistModel()
                {
                    ChapterName = item.InnerText,
                    ChapterLink = _domain + item.Attributes["href"].Value.Trim()
                });
            }

            //书本信息
            var bookChapter = new BookChapterDto()
            {
                BookName                = doc.DocumentNode.SelectSingleNode("//div[@id='info']/h1").InnerText.Trim(),
                Author                  = nodes[0].InnerText.Replace(nodes[0].InnerText.Split(':')[0] + ":", string.Empty).Trim(),
                Status                  = nodes[1].InnerText.Replace(nodes[1].InnerText.Split(':')[0] + ":", string.Empty).Replace(",加入书架,直达底部", string.Empty),
                Last_Update_Time        = nodes[2].InnerText.Replace(nodes[2].InnerText.Split(':')[0] + ":", string.Empty),
                Last_Update_ChapterName = nodes[3].InnerText.Replace(nodes[3].InnerText.Split(':')[0] + ":", string.Empty).Trim(),
                Last_Update_ChapterLink = _domain + nodes[3].ChildNodes["a"].Attributes["href"].Value.Trim(),
                Intro       = doc.DocumentNode.SelectSingleNode("//div[@id='intro']").InnerText.Replace("&nbsp;", "").Trim(),
                Chapterlist = chapterList
            };

            return(bookChapter);
        }
コード例 #21
0
        //Парсинг страниц и запись в БД
        static void Main()
        {
            //IRepository repository;
            //IKernel ninjectkernel = new StandardKernel();
            //ninjectkernel.Bind<IRepository>().To<WeatherForecastFullRepository>();
            //repository = ninjectkernel.Get<IRepository>();

            WeatherForecastFullRepository repository = new WeatherForecastFullRepository();

            Dictionary <string, int> dayMonthPairs = new Dictionary <string, int>()
            {
                { "января", 1 },
                { "февраля", 2 },
                { "марта", 3 },
                { "апреля", 4 },
                { "мая", 5 },
                { "июня", 6 },
                { "июля", 7 },
                { "августа", 8 },
                { "сентября", 9 },
                { "октября", 10 },
                { "ноября", 11 },
                { "декабря", 12 }
            };

            var workerTimer = new Timer(x => {
                try
                {
                    ////Регионы
                    //GetRegions();
                    //repository.Save();

                    ////Города
                    //GetСities();
                    //repository.Save();

                    //Погода
                    GetWeatherForecast();
                    repository.Save();
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }
            }, null, dueTime: TimeSpan.Zero, period: TimeSpan.FromMinutes(5));

            Console.ReadLine();

            void GetRegions()
            {
                var html      = @"https://yandex.ru/pogoda/region/225?via=reg";
                var web       = new HtmlWeb();
                var htmlDoc   = web.Load(html);
                var testNodes = htmlDoc.DocumentNode.SelectNodes("//div[@class='place-list']//a[@href]");

                foreach (var node in testNodes)
                {
                    repository.Load(new Region()
                    {
                        RegionName = node.InnerText, RegionLink = node.Attributes["href"].Value
                    });
                }
            }

            void GetСities()
            {
                var regionsCollection = repository.GetRegions();

                foreach (var region in regionsCollection)
                {
                    var html    = @"https://yandex.ru" + region.RegionLink;
                    var web     = new HtmlWeb();
                    var htmlDoc = web.Load(html);
                    var nodes   = htmlDoc.DocumentNode.SelectNodes("//div[@class='place-list']//a[@href]");
                    foreach (var node in nodes)
                    {
                        repository.Load(new City()
                        {
                            RegionId = region.Id, CityName = node.InnerText, CityLink = node.Attributes["href"].Value
                        });
                    }
                    Console.WriteLine(region.Id);
                }
            }

            void GetWeatherForecast()
            {
                Console.WriteLine("Hello");
                var citiesCollection = repository.GetCities();

                try
                {
                    foreach (var city in citiesCollection)
                    {
                        var html       = @"https://yandex.ru" + city.CityLink;
                        var web        = new HtmlWeb();
                        var htmlDoc    = web.Load(html);
                        var node       = htmlDoc.DocumentNode.SelectSingleNode("(//div[@class='forecast-briefly__header']//a)[2]");
                        var newhtml    = @"https://yandex.ru" + node.Attributes["href"].Value;
                        var newhtmlDoc = web.Load(newhtml);
                        var nodes      = newhtmlDoc.DocumentNode.SelectNodes("//div[@class='climate-calendar-container__calendar']//span/a");//Загрузка на каждый месяц
                        foreach (var nod in nodes.Skip(1).Take(12))
                        {
                            var htmlMonth    = @"https://yandex.ru" + nod.Attributes["href"].Value;
                            var htmlMonthDoc = web.Load(htmlMonth);
                            var nodeMonths   = htmlMonthDoc.DocumentNode.SelectNodes("//div[@class='climate-calendar-day__day']/../..//div[@class='climate-calendar-day__detailed-container-center']");
                            foreach (var nodeDay in nodeMonths)
                            {
                                //2-температура днем 1-температура ночью
                                var tempWeatherArray = nodeDay.SelectNodes("*//div").Where(x => x.Name == "div").ToArray();

                                //1-давление 3-влажность 5 - скорость ветра
                                var paramWeatherArray = nodeDay.SelectNodes("*//td").Where(x => x.Name == "td").ToArray();

                                //WeatherIconLink
                                var weatherIcon = @"https:" + nodeDay.SelectSingleNode("//img").Attributes["src"].Value;

                                //Дата
                                var      dayMonth      = nodeDay.SelectSingleNode("h6").InnerText;//   Проверить h6 или *h6
                                string[] splitDayMonth = dayMonth.Split(new char[] { ' ', ',' }, StringSplitOptions.RemoveEmptyEntries);

                                DateTime weatherdate = DateTime.Parse($"{splitDayMonth[0]}/{dayMonthPairs[splitDayMonth[1]]}/{DateTime.Now.Year.ToString()}").Date;

                                repository.Load(new WeatherForecast()
                                {
                                    CityId          = city.Id,
                                    WeatherDate     = weatherdate,
                                    TempDay         = tempWeatherArray[2].InnerText,
                                    TempNight       = tempWeatherArray[1].InnerText,
                                    Pressure        = paramWeatherArray[1].InnerText,
                                    AirHumidity     = paramWeatherArray[3].InnerText,
                                    WindDirection   = paramWeatherArray[5].InnerText,
                                    WeatherIconLink = weatherIcon
                                });
                                //Добавление в БД
                            }
                            Console.WriteLine($"    Месяц: {nod.InnerText} города: {city.CityName}");
                        }
                        Console.WriteLine($"Город: {city.CityName} --- City-Id = {city.Id} / RegionId = {city.RegionId} --- {citiesCollection.Count()}");
                        //Сохранение ТЕСТ
                        repository.Save();
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }
            }
        }
コード例 #22
0
ファイル: Program.cs プロジェクト: tibuchivn/relaximage
        public static void XuiRenSexyGirl()
        {
            try
            {
                var            bolService = new BOLService.BOLService();
                List <ImgLink> lst        = new List <ImgLink>();
                var            web        = new HtmlWeb();
                int            start      = 1;
                int            end        = 5;// 5; //TODO: current max value 27

                for (int i = start; i <= end; i++)
                {
                    string strURL = string.Format("http://www.xiuren.org/page-{0}.html", i);
                    var    doc    = web.Load(strURL);
                    //TODO: Check valid
                    var divContainer = doc.DocumentNode.SelectSingleNode("//div[@id = 'main']");
                    if (divContainer == null)
                    {
                        return;
                    }

                    foreach (HtmlNode childNode in divContainer.ChildNodes)
                    {
                        if (childNode.Name.Equals("div") && childNode.Attributes["class"] != null &&
                            childNode.Attributes["class"].Value.Equals("loop"))
                        {
                            //TODO: have 4 column
                            //HtmlNodeCollection nodeImagesCollection = childNode.SelectNodes("//div[@class = 'content']");

                            foreach (var node in childNode.ChildNodes)
                            {
                                if (node.Name.Equals("div") && node.HasChildNodes)
                                {
                                    try
                                    {
                                        foreach (HtmlNode nodeLink in node.ChildNodes)
                                        {
                                            if (nodeLink.Name.Equals("a"))
                                            {
                                                string strTitle    = nodeLink.Attributes["title"].Value;
                                                string strPage     = nodeLink.Attributes["href"].Value;
                                                string strCategory = "Sexy Girl";
                                                string strCounter  = strPage;
                                                TestImageOnePage(strPage, strCounter, strCategory, strTitle);
                                                break;
                                            }
                                        }
                                    }
                                    catch (Exception ex)
                                    {
                                        //TODO: show error;
                                        Console.WriteLine(ex.ToString());
                                    }
                                }
                            }
                        }
                    }
                    Console.WriteLine("Finish page ^_^: " + strURL);
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
            }
        }
コード例 #23
0
ファイル: MainWindow.xaml.cs プロジェクト: KindTis/Airi
        private void _UpdateMetaData()
        {
            Regex rx = new Regex(@"([\w]+)-([\d]+)",
                                 RegexOptions.Compiled | RegexOptions.IgnoreCase);

            foreach (var e in mAiri.Videos)
            {
                Dispatcher.Invoke((Action)(() =>
                {
                    this.Title = "Airi [" + e.strTitle + " 갱신 중...]";
                }));

                bool   needDownloadCoverImg = false;
                bool   needUpdateMetadata   = false;
                string imgName = System.IO.Path.GetFileNameWithoutExtension(e.strImagePath);

                if (imgName == "noimage")
                {
                    needDownloadCoverImg = true;
                }
                if (e.actors.Count == 0)
                {
                    needUpdateMetadata = true;
                }

                if (needDownloadCoverImg || needUpdateMetadata)
                {
                    MatchCollection matches = rx.Matches(e.strTitle);
                    if (rx.Matches(e.strTitle).Count == 0)
                    {
                        continue;
                    }

                    string title      = matches.First().Value;
                    var    html       = @"http://www.b49t.com/en/vl_searchbyid.php?keyword=" + title;
                    var    htmlDoc    = mWeb.Load(html);
                    var    properNode = _GetProperNode(htmlDoc.DocumentNode);
                    if (properNode == null)
                    {
                        continue;
                    }

                    if (needUpdateMetadata)
                    {
                        _UpdateActorList(properNode, e.actors);
                        foreach (var actor in e.actors)
                        {
                            if (mActorListAll.Contains(actor))
                            {
                                continue;
                            }
                            mActorListAll.Add(actor);
                        }
                        mActorListAll.Sort(1, mActorListAll.Count - 1, null);

                        Dispatcher.Invoke((Action)(() =>
                        {
                            lbActorList.Items.Refresh();
                        }));
                    }

                    if (needDownloadCoverImg)
                    {
                        if (_DownloadCoverImg(properNode, e.strTitle))
                        {
                            e.strImagePath = System.IO.Path.GetFullPath(@"thumb/" + e.strTitle + @".jpg");
                        }
                        Dispatcher.Invoke((Action)(() =>
                        {
                            lbThumbnailList.Items.Refresh();
                        }));
                    }
                }
            }
        }
コード例 #24
0
        // HTMPAglityPack Get
        public static void GetWebDataAgility(string url, out List <string> parsedData, out List <string> sites)
        {
            string data = "";

            parsedData = new List <string>();
            sites      = new List <string>();
            try
            {
                // Get data dump
                var    webGet = new HtmlWeb();
                var    doc    = webGet.Load(url);
                string title  = "";

                // Title
                var node = doc.DocumentNode.SelectSingleNode("//title");
                if (node != null)
                {
                    title = node.InnerText;
                }
                // Websites
                foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
                {
                    HtmlAttribute att = link.Attributes["href"];
                    // Block specific sites and duplications
                    if (att.Value != null && att.Value.StartsWith(@"/wiki/") && !sites.Contains("https://en.wikipedia.org" + att.Value) &&
                        !att.Value.Contains("File:") && !att.Value.Contains("Template:") && !att.Value.Contains("Wikipedia:") && !att.Value.Contains("Wikipedia_") &&
                        !att.Value.Contains("Help:") && !att.Value.Contains("Portal:") && !att.Value.Contains("Talk:") &&
                        !att.Value.Contains("Help:") && !att.Value.Contains("Category:") && !att.Value.Contains("Special:") &&
                        !att.Value.Contains("Media:") && !att.Value.Contains("MediaWiki:") && !att.Value.Contains("User:"******"https://en.wikipedia.org" + att.Value);
                    }

                    // Only take x number of sites
                    if (sites.Count >= MAXSEEDS)
                    {
                        break;
                    }
                }
                // Paragraphs
                var nodes = doc.DocumentNode.SelectNodes("//p");
                // Invalid data
                if (nodes == null)
                {
                    Console.WriteLine("Site invalid, no data retrieved.");
                    return;
                }
                data = title;
                foreach (var n in nodes)
                {
                    data += " " + n.InnerText;
                }

                // remove random characters
                data = new string(data
                                  .Where(x => char.IsWhiteSpace(x) || char.IsLetterOrDigit(x))
                                  .ToArray());
                // split into List
                parsedData = data.Split(' ').ToList();
            }
            catch (Exception e) { Console.WriteLine("Error in GetWebDataAgility(): " + e); }
        }
コード例 #25
0
ファイル: Helper.cs プロジェクト: Anil1111/C-Sharp-Advanced-2
        // ---------->   Need to correct bugs   <---------

        //public static List<ActiveJobs> SearchActiveJobForCompany(HtmlDocument doc)
        //{
        //    string pathForNames = "//div[@class=\"job-inner job-item-title\"]";
        //    string pathForData = "//div[@class='job-inner job-list-deadline']";
        //    string pathForLoacation = "//div[@class='job-inner job-location']";

        //    HtmlNodeCollection jobItemTitle = doc.DocumentNode.SelectNodes(pathForNames);
        //    HtmlNodeCollection jobԼistDeadline = doc.DocumentNode.SelectNodes(pathForData);
        //    HtmlNodeCollection jobLocation = doc.DocumentNode.SelectNodes(pathForLoacation);

        //    List<ActiveJobs> allActiveJobs = new List<ActiveJobs>();

        //if(jobItemTitle != null && )
        //{
        //    for (int i = 0; i < jobItemTitle.Count; i++)
        //    {
        //        var location = jobLocation[i].InnerText.Replace(" ", "").Replace("\n", "");
        //        var names = (jobItemTitle[i].InnerText.Replace(" ", "").Split('\n')
        //        .Select(item => item.Replace("\r", ""))).ToArray();

        //        var data = jobԼistDeadline[i].InnerText.Replace(" ", "").Split('\n')
        //                    .Select(item => item.Replace("\r", ""))
        //                    .Where(item => !string.IsNullOrEmpty(item)).ToArray();
        //        allActiveJobs.Add(new ActiveJobs { JobName = names[1], CompanyName = names[2], Data = string.Join(" ", data), Location = location });
        //    }
        //}

        //    return allActiveJobs;
        //}

        public static List <Company> ScrapForStaffAM(object data)
        {
            DataModel dataTemp = data as DataModel;

            string  urll = (string)dataTemp.Url;
            HtmlWeb web  = new HtmlWeb();

            dataTemp.Status = "Scrolling .....";
            string content = ScrollToEndAndGetSource(urll);

            dataTemp.flag   = false;
            dataTemp.Status = "Gathering Info of Companies....";
            Thread.Sleep(350);
            dataTemp.flag = true;

            Console.WriteLine();

            var doc = new HtmlDocument();

            doc.LoadHtml(content);

            string className = "//div[@class=\"company-action company_inner_right\"]";

            HtmlNodeCollection nodes       = doc.DocumentNode.SelectNodes(className);
            List <string>      compURLList = new List <string>(); // Create company url list
            int fail = 0;

            foreach (HtmlNode node in nodes)
            {
                try
                {
                    string href    = node.InnerHtml;
                    var    splited = href.Split('"')[1];
                    compURLList.Add(@"https://staff.am" + splited); // Fill the company url list
                }
                catch (Exception)
                {
                    fail++;
                }
            }

            List <Company> allCompanies         = new List <Company>(); // Create company list which will include all companies whith their info
            int            count                = nodes.Count;
            int            currentCompanyNumber = 0;

            try
            {
                foreach (var compURL in compURLList)
                {
                    currentCompanyNumber++;
                    dataTemp.Status = $"Remaining {(currentCompanyNumber*100)/count} % ....";
                    HtmlDocument       htmlDoc           = web.Load(compURL);
                    string             companyProperties = "//p[@class=\"professional-skills-description\"]";
                    HtmlNodeCollection htmlNodes         = htmlDoc.DocumentNode.SelectNodes(companyProperties); // All the property values in a collection
                    Company            company           = new Company();
                    ////  company.Jobs = SearchActiveJobForCompany(htmlDoc);

                    foreach (var node in htmlNodes)
                    {
                        string inner = node.InnerText;
                        if (inner.ToLower().Contains("industry"))
                        {
                            company.Industry = node.InnerText;
                        }
                        else if (inner.ToLower().Contains("type"))
                        {
                            company.Type = node.InnerText;
                        }
                        else if (inner.ToLower().Contains("number of employees"))
                        {
                            if (int.TryParse(node.InnerText, out int x))
                            {
                                company.NumbOfEmployees = x;
                            }
                        }
                        else if (inner.ToLower().Contains("data of foundation"))
                        {
                            company.DataOfFoundation = node.InnerText;
                        }
                        else if (inner.ToLower().Contains("website"))
                        {
                            company.WebSite = node.InnerText;
                        }
                        else if (inner.ToLower().Contains("address"))
                        {
                            company.Adress = node.InnerText;
                        }
                    }

                    string             companyProp        = "//div[@class='col-lg-8 col-md-8 about-text']";
                    HtmlNodeCollection htmlNodesAboutComp = htmlDoc.DocumentNode.SelectNodes(companyProp);
                    if (htmlNodesAboutComp != null && htmlNodesAboutComp.Count > 0)
                    {
                        string text = htmlNodesAboutComp[0].InnerText.Replace("\n", "");
                        company.AboutCompany = text; // Find text about company
                    }

                    string             companyName    = "//h1[@class=\"text-left\"]";
                    HtmlNodeCollection htmlNodeOfName = htmlDoc.DocumentNode.SelectNodes(companyName);
                    if (htmlNodesAboutComp != null && htmlNodeOfName.Count > 0)
                    {
                        company.Name = htmlNodeOfName[0].InnerText; // Find company name
                    }

                    allCompanies.Add(company);  // Add company to the list
                }
            }
            catch (Exception e)
            {
                Program.WriteExceptionInFile(e.Message);
                throw e;
            }
            return(allCompanies);
        }
コード例 #26
0
        private void Main_Load(object sender, EventArgs e)
        {
            groupBox1.Hide();
            groupBox2.Hide();

            if (!Data.Login_1)
            {
                label1.Text = "欢迎回来!";
            }
            dataGridView1.AutoSizeRowsMode = DataGridViewAutoSizeRowsMode.AllCellsExceptHeaders;

            #region//加载天气
            try
            {
                HtmlWeb htmlWeb = new HtmlWeb();
                HtmlAgilityPack.HtmlDocument doc = htmlWeb.Load("http://i.tianqi.com/index.php?c=code&id=34&icon=1&num=2");

                ///显示所在地
                HtmlNodeCollection htmlNodes = doc.DocumentNode.SelectNodes("//a");
                toolStripStatusLabel2.Text = htmlNodes[0].InnerText.Trim();
                ///插入图片
                ////*[@id="mobile280"]/div/a[2]/span[1]/img
                htmlNodes = doc.DocumentNode.SelectNodes("//*[@id='mobile280']/div/a[2]/span[1]/img");
                string url = "http:" + htmlNodes[0].Attributes["src"].Value;
                // 图片测试
                System.Net.WebRequest  webreq = System.Net.WebRequest.Create(url);
                System.Net.WebResponse webres = webreq.GetResponse();
                using (System.IO.Stream stream = webres.GetResponseStream())
                {
                    toolStripStatusLabel3.Image = Image.FromStream(stream);
                }

                ///获取天气情况
                htmlNodes = doc.DocumentNode.SelectNodes("//span");
                String tianqi = "|温度:";
                foreach (var iteam in htmlNodes)
                {
                    tianqi += iteam.InnerText.Trim();
                }
                doc       = htmlWeb.Load("http://www.tianqi.com/");
                htmlNodes = doc.DocumentNode.SelectNodes("//p[@class='p_2']");
                tianqi   += "|" + htmlNodes[0].InnerText;

                htmlNodes = doc.DocumentNode.SelectNodes("//p[@class='p_3']");
                tianqi   += "|" + htmlNodes[0].InnerText;

                htmlNodes = doc.DocumentNode.SelectNodes("//p[@class='p_1']");
                tianqi    = htmlNodes[0].InnerText + tianqi;


                toolStripStatusLabel4.Text = tianqi + "|";
            }
            catch (Exception e1)
            {
                MessageBox.Show("天气加载失败!", "提示");
                Data.WriteLog("天气加载失败!" + e1.Message.ToString(), 1);
                statusStrip1.Hide();
            }

            #endregion
        }
コード例 #27
0
        //public static readonly int TotalPages = 141;

        static void Main(string[] args)
        {
            HtmlWeb         web          = new HtmlWeb();
            WebClient       client       = new WebClient();
            List <string>   gamePageURLs = new List <string>();
            List <HtmlNode> extrasLinks  = new List <HtmlNode>();
            int             gameCount    = 0;

            bool skipPage = false;

            //iterate through each page of games
            for (int page = 1; page > 0; page++)
            {
                skipPage = false;

                Debug.WriteLine("Scraping Listing Page " + (page));
                HtmlNode rawPageNode = web.Load(Site + page).DocumentNode;

                //replacementdocs returns "File Not Found" when you ask for a bad page
                foreach (HtmlNode node in rawPageNode.Descendants(1))
                {
                    if (node.HasClass("bodymain"))
                    {
                        if (node.InnerHtml.Contains("File Not Found"))
                        {
                            skipPage = true;
                            break;
                        }
                    }
                }

                if (skipPage)
                {
                    continue;
                }

                //CONTINUE FROM HERE, Load page, correct URL, download into Platform folder

                gamePageURLs.AddRange(BuildGameNodeList(rawPageNode.Descendants(1)));

                gameCount = 1;
                foreach (string URL in gamePageURLs)
                {
                    Debug.WriteLine("Scraping Page #" + (page) + " Game #" + gameCount); gameCount++;
                    HtmlNode gamePage = web.Load(RootDownloadURL + URL).DocumentNode;

                    //find extras and add to list
                    foreach (HtmlNode gameNode in gamePage.Descendants(1))
                    {
                        if (gameNode.Name == "a")
                        {
                            if (gameNode.OuterHtml.Contains("ExtraID"))
                            {
                                extrasLinks.Add(gameNode);
                            }
                        }
                    }
                    if (Wait)
                    {
                        System.Threading.Thread.Sleep(500);
                    }

                    //for each extra, convert to link and metadata and download
                    foreach (HtmlNode extrasNode in extrasLinks)
                    {
                        string downloadUrl = RootDownloadURL + extrasNode.OuterHtml.Split('"')[1];

                        //build filename
                        string gameTitle = extrasNode.OwnerDocument.DocumentNode.ChildNodes["html"].ChildNodes["head"].ChildNodes["title"].InnerHtml.Replace(" @  Reloaded.org", "").Trim();
                        string extraType = WebUtility.HtmlDecode(extrasNode.InnerText).Replace('|', '-').Trim();
                        //string extension = Path.GetExtension(downloadUrl);
                        string filename = Utility.GetSafeFilename(gameTitle + "_" + extraType);

                        //build savepath
                        string subfolder = Utility.GetSafeFilename(gameTitle) + "\\";
                        string savePath  = SavePathRoot + subfolder;

                        //Have to determine extension after the fact
                        //if (extension == "")
                        //    continue;

                        if (!Directory.Exists(savePath))
                        {
                            Directory.CreateDirectory(savePath);
                        }

                        HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(downloadUrl);
                        using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
                        {
                            var contentName = response.Headers["Content-Disposition"].Split(new string[] { "=" }, StringSplitOptions.None)[1];
                            if (contentName[contentName.Length - 1] == ';')
                            {
                                contentName = contentName.TrimEnd(';');
                            }
                            string extension = Path.GetExtension(contentName);
                            if (!File.Exists(savePath + filename + extension))
                            {
                                Debug.WriteLine("Downloading " + filename + extension);
                                var responseStream = response.GetResponseStream();
                                using (var fileStream = File.Create(Path.Combine(savePath, filename + extension)))
                                {
                                    responseStream.CopyTo(fileStream);
                                }
                            }
                            else
                            {
                                Debug.WriteLine("Skipping " + filename + extension);
                            }
                        }

                        //if (!File.Exists(savePath + filename))
                        //{
                        //    Debug.WriteLine("Downloading " + filename);
                        //    try
                        //    {
                        //        client.DownloadFile(downloadUrl, savePath + filename);
                        //    }
                        //    catch (WebException ex)
                        //    {
                        //        if(ex.Message.Contains("The operation has timed out"))
                        //        {
                        //            Debug.WriteLine(filename + " timed out");
                        //            continue;
                        //        }
                        //    }

                        //    string extension = "";
                        //    string mimeType = Utility.GetMimeFromFile(savePath + filename);
                        //    switch (mimeType)
                        //    {
                        //        case "application/x-zip-compressed":
                        //            extension = "zip";
                        //            break;
                        //        case "image/pjpeg":
                        //            extension = "jpeg";
                        //            break;
                        //        case "text/richtext":
                        //            extension = "rtf";
                        //            break;
                        //        case "text/plain":
                        //            extension = "txt";
                        //            break;
                        //        case "application/pdf":
                        //            extension = "pdf";
                        //            break;
                        //        case "image/x-png":
                        //            extension = "png";
                        //            break;
                        //        default:
                        //            break;
                        //    }
                        //    if (Wait) System.Threading.Thread.Sleep(3000);
                        //}
                        //else
                        //    Debug.WriteLine("Skipping " + filename);
                    }

                    extrasLinks.Clear();
                }
                gamePageURLs.Clear();
                if (Wait)
                {
                    System.Threading.Thread.Sleep(500);
                }
            }
        }
コード例 #28
0
        static void Main(string[] args)
        {
            if (args.Length < 1)
            {
                Console.WriteLine("Du måste ange en url där ett träningsprogram kan hämtas");
                Console.ReadKey();
                return;
            }
            var url = args[0];

            Console.WriteLine("Försöker läsa träningsprogram från {0}", url);
            var web         = new HtmlWeb();
            var doc         = web.Load(url);
            var header      = doc.DocumentNode.SelectNodes("//*[@id=\"content\"]/div[2]/div/div/h2")[0].InnerText.Replace(':', '.');
            var raceDate    = ParseDateWithYear(doc.DocumentNode.SelectSingleNode("//*[@id=\"MainContent_m_adaptionDiv\"]/b/text()[2]").InnerText.Replace("-", "").Replace("&nbsp;", "").Trim());
            var minimumDate = raceDate.AddMonths(-6); // Expecting no programs to start more than 6 months before competition
            var competition = ParseCompetitionName(doc, header);
            var rows        = doc.DocumentNode.SelectNodes("//*[@id=\"content\"]/div[2]/div/div/table")[0].Descendants("tr");
            var calendar    = new Calendar();

            Console.WriteLine("Exporterar kalender från träningsprogrammet...");
            calendar.AddTimeZone(new VTimeZone("Europe/Copenhagen"));
            foreach (var row in rows.Where(row => row.Id.Contains("weekRepeater") && !row.InnerHtml.Contains("MidPanorama")))
            {
                var cells   = row.Descendants("td").ToList();
                var subject = ParseSubject(cells);
                if (!subject.StartsWith("Vila"))
                {
                    var date     = ParseDateForRow(cells, minimumDate);
                    var duration = ParseDuration(cells);
                    var start    = new DateTime(date.Year, date.Month, date.Day, 12, 0, 0);
                    var end      = start.Add(duration);
                    var link     = url + "#" + row.Id;
                    var details  = ParseDetails(cells, link) + $"\n\nMål: {competition}";

                    calendar.Events.Add(new CalendarEvent()
                    {
                        DtStart     = new CalDateTime(start),
                        DtEnd       = new CalDateTime(end),
                        Summary     = subject,
                        Description = details,
                        Class       = "PUBLIC",
                        IsAllDay    = false,
                        Categories  = new List <string>()
                        {
                            "Träning"
                        }
                    });
                }
            }
            var serializer         = new CalendarSerializer(new SerializationContext());
            var serializedCalendar = serializer.SerializeToString(calendar);
            var directory          = Environment.GetFolderPath(Environment.SpecialFolder.Desktop) + "\\";
            var filename           = $"{competition} - {header}.ics";
            var path = Path.Combine(directory, filename);

            using (var streamWriter = new StreamWriter(path))
            {
                streamWriter.Write(serializedCalendar);
                streamWriter.Close();
            }
            Console.WriteLine("Klart! Träningsprogrammet sparades till {0}", path);
        }
コード例 #29
0
 public HtmlDocument Convert(IWebPage webPage)
 {
     var getHtmlWeb = new HtmlWeb();
     return getHtmlWeb.Load(webPage.Uri.OriginalString) as HtmlDocument;
 }
コード例 #30
0
        public void Execute(Bot bot, Types.Update update, string[] commandPieces)
        {
            if (commandPieces.Length < 2)
            {
                bot.SendMessage(
                    chatId: update.Message.Chat.Id,
                    text: "O comando foi utilizado de maneira incorreta.\nSiga o seguinte formato na próxima vez: \"/dicio <palavra>\".",
                    disableWebPagePreview: false,
                    replyToMessageId: update.Message.MessageId);
                return;
            }

            string palavraBusca = commandPieces[1];

            HtmlWeb web = new HtmlWeb
            {
                OverrideEncoding = Encoding.GetEncoding("iso-8859-1")
            };

            HtmlDocument doc = web.Load(String.Format("http://www.dicio.com.br/pesquisa.php?q={0}", palavraBusca));

            bool paginaDefinicaoPalavra = doc.DocumentNode.SelectNodes("//div[@id='content']//h1[@itemprop='name']") != null;

            if (!paginaDefinicaoPalavra)
            {
                bool paginaMultiplasPalavras = doc.DocumentNode.SelectNodes("//*[@id='enchant']/a") == null && doc.DocumentNode.SelectNodes("//*[@id='content']/div[1]/p/text()[1]") == null;

                if (paginaMultiplasPalavras)
                {
                    string pathPaginaPrimeiraPalavra = doc.DocumentNode.SelectNodes("//div[@id='content']//ul[@id='resultados']/li/a")[0].Attributes["href"].Value;
                    doc = web.Load(String.Format("http://www.dicio.com.br{0}", pathPaginaPrimeiraPalavra));
                }
                else
                {
                    bot.SendMessage(
                        chatId: update.Message.Chat.Id,
                        text: String.Format("Não foi possível encontrar o significado de \"{0}\".", palavraBusca),
                        replyToMessageId: update.Message.MessageId);

                    return;
                }
            }

            // TODO: Descobrir uma maneira menos porca de limpar esse HTML.
            string palavra     = doc.DocumentNode.SelectNodes("//*[@id='content']/div[1]/h1")[0].InnerText;
            string significado = doc.DocumentNode.SelectNodes("//*[@id='significado']")[0].InnerHtml.Replace("<br>", "\n");
            string definicao   = doc.DocumentNode.SelectNodes("//*[@id='content']/div[1]/p[2]")[0].InnerHtml
                                 .Replace("<br>", "\n")
                                 .Replace("<b>", "")
                                 .Replace("</b>", "")
                                 .Replace("<span class=\"sep\">", "")
                                 .Replace("</span>", "")
                                 .Replace("</a>", "");

            bool onceAgain;

            do
            {
                onceAgain = false;

                var anchorTagStartIndex = definicao.IndexOf("<a href=\"");
                var anchorTagEndIndex   = definicao.IndexOf("/\">");

                if (anchorTagStartIndex != -1 && anchorTagEndIndex != -1)
                {
                    var anchorTag = definicao.Substring(anchorTagStartIndex, (anchorTagEndIndex + 3) - anchorTagStartIndex);
                    definicao = definicao.Replace(anchorTag, "");

                    onceAgain = true;
                }
            } while (onceAgain);

            bot.SendMessage(
                chatId: update.Message.Chat.Id,
                text: String.Format("Significado de {0}:\n{1}\n\nDefinição de {2}:\n{3}", palavra, significado, palavra, definicao),
                disableWebPagePreview: false,
                replyToMessageId: update.Message.MessageId);
        }
コード例 #31
0
        /// <inheritdoc />
        public Season ConstructSeason()
        {
            // parse website
            var web = new HtmlWeb();
            var doc = web.Load(Url);

            // determine season
            var tableHeader  = doc.DocumentNode.SelectSingleNode("//header/h2").InnerText;
            var seasonString = tableHeader.Substring(tableHeader.Length - 9, 9); // matches to '2018/2019'
            var season       = new Season(seasonString);

            // get and iterate over table of games
            var tbodyNode = doc.DocumentNode.SelectSingleNode("//tbody");

            foreach (var tbodyChildNode in tbodyNode.ChildNodes)
            {
                // ensure a non-empty table
                var trowChildNodes = tbodyChildNode.ChildNodes.Where(x => x.OriginalName == "td").ToList();
                if (trowChildNodes.Count == 0)
                {
                    continue;
                }

                // extract basic game facts
                var date   = trowChildNodes[0].InnerText.Split('-')[0].Split('.');
                var day    = Convert.ToInt32(date[0]);
                var month  = Convert.ToInt32(date[1]);
                var year   = Convert.ToInt32(date[2]);
                var hour   = Convert.ToInt32(trowChildNodes[1].InnerText.Substring(0, 2));
                var minute = Convert.ToInt32(trowChildNodes[1].InnerText.Substring(3, 2));

                // determine participating teams
                var teamsString = "";
                var gameString  = "";
                var teamFinder  = trowChildNodes[2].ChildNodes.Where(x => x.OriginalName == "span" || x.OriginalName == "a").ToList();
                if (teamFinder.Count == 2) // sometimes the root element directly contains two span-objects...
                {
                    gameString  = teamFinder[0].InnerText;
                    teamsString = teamFinder[1].InnerText;
                }
                else if (teamFinder.Count == 1) // and other times one a-object with two nested span-objects
                {
                    gameString  = teamFinder[0].ChildNodes[1].InnerText;
                    teamsString = teamFinder[0].ChildNodes[3].InnerText;
                }

                var teams    = teamsString.Split('-'); // matches to 'Team A - Team B' => afterwards remove leading and trailing whitespaces
                var homeTeam = teams[0].Remove(teams[0].Length - 1, 1);
                var awayTeam = teams[1].Remove(0, 1);

                // find out location of the game (home or away)
                string location;
                string opponent;
                if (homeTeam.Contains("Dynamo Dresden"))
                {
                    location = "Dresden";
                    opponent = awayTeam;
                }
                else
                {
                    location = homeTeam;
                    opponent = homeTeam;
                }

                // ignore 'Testspiele'
                if (gameString.Split('-')[0].StartsWith("Test"))
                {
                    continue;
                }

                // each game gets it's unique identifier
                var identifier = $"{season.StartYear}/{season.EndYear} - {gameString}";

                season.Games.Add(new Game(new DateTime(year, month, day, hour, minute, 0), location, opponent, identifier));
            }

            return(season);
        }
コード例 #32
0
        public static string GetFromLyricWiki(string artist, string title)
        {
            string rep = string.Empty;

            artist = HttpUtility.UrlEncode((artist + ""), ISOEncoding).Replace(".", "%2E");
            title  = HttpUtility.UrlEncode((title + ""), ISOEncoding).Replace(".", "%2E");

            //Utilizar o serviço PUSH para verificar se obtemos a música.
            WebRequest   request  = HttpWebRequest.Create(string.Format("http://lyrics.wikia.com/api.php?func=getSong&artist={0}&song={1}&fmt=xml", artist, title));
            WebResponse  response = request.GetResponse();
            StreamReader reader   = new StreamReader(response.GetResponseStream());

            XmlDocument doc1 = new XmlDocument();
            string      req  = reader.ReadToEnd();

            doc1.LoadXml(req);

            //Verificar se eles tem a letra
            XmlNode firstNode = doc1.SelectSingleNode("//lyrics");

            if (firstNode == null)
            {
                return(string.Empty);
            }
            if (doc1.SelectSingleNode("//lyrics").InnerText == "Not found")
            {
                return(string.Empty);
            }

            //Caso tenham, fazer uma requisição a URL onde está a letra completa
            HtmlWeb      web  = new HtmlWeb();
            HtmlDocument doc  = web.Load(doc1.SelectSingleNode("//url").InnerText);
            HtmlNode     node = doc.DocumentNode.SelectSingleNode("//div[@class='lyricbox']");

            if (node == null)
            {
                return(string.Empty);
            }

            foreach (HtmlNode comment in node.SelectNodes("//comment()"))
            {
                comment.ParentNode.RemoveChild(comment);
            }

            //Remover elementos desnecessários
            node.FirstChild.Remove();
            node.LastChild.Remove();
            node.LastChild.Remove();
            node.LastChild.Remove();
            node.LastChild.Remove();

            //Trocar os <br> por quebras de linha convencionais
            rep = Regex.Replace(WebUtility.HtmlDecode(node.InnerHtml), "\\s*<br ?\\/?>\\s*", "\r\n");
            //Remover qualquer tag html presente
            rep = Regex.Replace(rep, "<[^>]+>", "");

            //Verificar se o site tem a letra completa, senão deixa em branco mesmo que é melhor e podemos procurar em outros
            if (Regex.IsMatch(rep, "Unfortunately, we are not licensed to display the full lyrics for this song at the moment"))
            {
                return(string.Empty);
            }

            return(rep);
        }
コード例 #33
0
        internal static BusRoute[] GetKingCountyRoute(HtmlWeb webLoader, Uri uri)
        {
            HtmlDocument routeDoc = webLoader.Load(uri);

            if (uri.AbsoluteUri.EndsWith(".pdf"))
            {
                return(new BusRoute[0]);
            }

            HtmlNode routeNode = routeDoc.DocumentNode;

            string routeNumber = HttpUtility.HtmlDecode(StripHTML(GetParameter(routeNode.OuterHtml, "var routeNumber=\'", "\'")));
            string routeName   = HttpUtility.HtmlDecode(StripHTML(GetParameter(routeNode.OuterHtml, "var routeName=\'", "\'")))?.TrimStart('0');
            string aDestShort  = HttpUtility.HtmlDecode(StripHTML(GetParameter(routeNode.OuterHtml, "var aDestShort=\'", "\'")))?.Replace("To ", "").Trim();
            string bDestShort  = HttpUtility.HtmlDecode(StripHTML(GetParameter(routeNode.OuterHtml, "var bDestShort=\'", "\'")))?.Replace("To ", "").Trim();

            HtmlNode daysNode = routeNode.SelectSingleNode("//*[@id=\"schedule_wrapper\"]/ul[1]");

            if (daysNode == null)
            {
                return(new BusRoute[0]);
            }

            byte daysOfOperation = 0b1111100;
            bool onlyWeekday     = routeNode.OuterHtml.Contains("/* Hiding Saturday and Sunday buttons */");

            if (!onlyWeekday)
            {
                daysOfOperation = 0b1111111;
            }

            int routeId = -1;

            if (!int.TryParse(routeNumber, out routeId))
            {
                return(new BusRoute[0]);
            }

            BusRoute toRoute = new BusRoute();

            toRoute.RouteName       = routeName;
            toRoute.RouteNumber     = routeId;
            toRoute.FromName        = aDestShort;
            toRoute.ToName          = bDestShort;
            toRoute.DaysOfOperation = daysOfOperation;

            string toCsv = GetRouteTableCSV(routeNumber, true);

            BusRouteStop[] routeStops = GetBusStops(toRoute, toCsv);
            toRoute.Stops = routeStops;

            BusRoute fromRoute = new BusRoute();

            fromRoute.RouteName       = routeName;
            fromRoute.RouteNumber     = routeId;
            fromRoute.FromName        = bDestShort;
            fromRoute.ToName          = aDestShort;
            fromRoute.DaysOfOperation = daysOfOperation;

            string fromCsv = GetRouteTableCSV(routeNumber, false);

            BusRouteStop[] fromStopData = GetBusStops(fromRoute, fromCsv);
            fromRoute.Stops = fromStopData;

            return(new BusRoute[]
            {
                toRoute,
                fromRoute
            });
        }
コード例 #34
0
ファイル: Html2Rss.cs プロジェクト: dbose/raagahacker
        static void ManualWay(HtmlWeb hw, string url)
        {
            // get the document from the Internet resource
            HtmlDocument doc = hw.Load(url);

            // we remarked all articles have discriminant target="_new" attribute.
            HtmlNodeCollection hrefs = doc.DocumentNode.SelectNodes("//a[@href and @target='_new']");
            if (hrefs == null)
            {
                return;
            }

            // create fake rss feed
            XmlDocument rssDoc = new XmlDocument();
            rssDoc.LoadXml("<?xml version=\"1.0\" encoding=\"" + doc.Encoding.BodyName + "\"?><rss version=\"0.91\"/>");

            // add channel element and other information
            XmlElement channel = rssDoc.CreateElement("channel");
            rssDoc.FirstChild.NextSibling.AppendChild(channel);

            XmlElement temp = rssDoc.CreateElement("title");
            temp.InnerText = "ASP.Net articles scrap RSS feed";
            channel.AppendChild(temp);

            temp = rssDoc.CreateElement("link");
            temp.InnerText = url;
            channel.AppendChild(temp);

            XmlElement item;
            // browse each article
            foreach(HtmlNode href in hrefs)
            {
                // get what's interesting for RSS
                string link = href.Attributes["href"].Value;
                string title = href.InnerText;
                string description = null;
                HtmlNode descNode = href.SelectSingleNode("../div/text()");
                if (descNode != null)
                    description = descNode.InnerText;

                // create XML elements
                item = rssDoc.CreateElement("item");
                channel.AppendChild(item);

                temp = rssDoc.CreateElement("title");
                temp.InnerText = title;
                item.AppendChild(temp);

                temp = rssDoc.CreateElement("link");
                temp.InnerText = link;
                item.AppendChild(temp);

                // description is not always here
                if ((description != null) && (description.Length >0))
                {
                    temp = rssDoc.CreateElement("description");
                    temp.InnerText = description;
                    item.AppendChild(temp);
                }
            }
            rssDoc.Save("rss.xml");
        }
コード例 #35
0
ファイル: Form1.cs プロジェクト: elifozlem/whemovies
        public void LoadData()
        {
            var web = new HtmlWeb();

            db.Links.ToList();
            for (int i = 0; i < db.Links.Local.Count; i++)
            {
                var doc      = web.Load(db.Links.Local[i].LINK1);
                var trailers = doc.DocumentNode.SelectNodes("//div[@class='KoD8 mrtrailer']/a");
                var imdb     = doc.DocumentNode.SelectNodes("//div[@class='KoD8 imdb']/em/a");

                var video = doc.DocumentNode.SelectNodes("//div[@class='icerik']/div[@class='postTabs_divs']/p[iframe]");
                var topic = doc.DocumentNode.SelectNodes("//p");

                var mv = new Movie();
                var pt = new Part();


                foreach (var item in topic)
                {
                    var tp = item.InnerText;
                    if (tp != "")
                    {
                        mv.TOPIC = tp;
                    }
                }

                if (trailers != null)
                {
                    foreach (var item in trailers)
                    {
                        var lnk = item.Attributes["href"].Value;
                        mv.TRAILER = lnk;
                    }
                }
                else
                {
                    mv.TRAILER = "";
                }

                mv.NAME = db.Links.Local[i].NAME;
                if (imdb != null)
                {
                    foreach (var item in imdb)
                    {
                        var ımdb = item.Attributes["href"].Value;
                        mv.IMDB    = ımdb.Substring(26);
                        mv.MOVIEID = db.Links.Local[i].LINKID;
                    }
                }
                else
                {
                    mv.IMDB = "";
                }

                pt.MOVIEID = mv.MOVIEID;
                if (video != null)
                {
                    foreach (var item in video)
                    {
                        var VideoLink = item.InnerHtml;
                        pt.Link = VideoLink;

                        var Partcount = db.Parts.Count(m => m.Link == pt.Link);
                        if (Partcount == 0)
                        {
                            db.Parts.Add(pt);
                            db.SaveChanges();
                        }
                    }
                }
                else
                {
                    pt.Link = "";
                    var Partcount1 = db.Parts.Count(m => m.Link == pt.Link);
                    if (Partcount1 == 0)
                    {
                        db.Parts.Add(pt);
                        db.SaveChanges();
                    }
                }
                var count = db.Movies.Count(m => m.MOVIEID == mv.MOVIEID);
                if (count == 0)
                {
                    db.Movies.Add(mv);

                    db.SaveChanges();
                }
            }
        }