A utility class to get HTML document from HTTP.
        public datascraper()
        {
            string url = @"http://www.bbc.co.uk/sport/football/results/partial/competition-118996114";
            HtmlWeb htmlWeb = new HtmlWeb();
            HtmlDocument doc = new HtmlDocument{ OptionUseIdAttribute = true };

            doc = htmlWeb.Load(url);
            HtmlNodeCollection mtchrslts = doc.DocumentNode.SelectNodes("//tr[@id]");

            string date;
            string ateam;
            string hteam;
            string score;
            string idmess;
            string idnum;
            string[] teamscores;
            string teamscoreh;
            string teamscorea;

            foreach (HtmlNode matchresult in mtchrslts)
            {
                idmess = matchresult.SelectSingleNode("//tr[@id]").Id;
                idnum = idmess.Replace("match-row-", "");
                score = matchresult.SelectSingleNode("//abbr[@title='Score']").InnerText;
                teamscores = score.Split('-');
                teamscoreh = teamscores[0];
                teamscorea = teamscores[1];
                hteam = matchresult.SelectSingleNode("//p[(@class='team-home teams')]").InnerText;
                ateam = matchresult.SelectSingleNode("//p[(@class='team-away teams')]").InnerText;
                date = matchresult.SelectSingleNode("//td[(@class='match-date')]").InnerText;
            }

            return;
        }
Example #2
0
 private void button1_Click(object sender, EventArgs e)
 {
     //antes de esto inspeccionar en la web y poner control + F para hallar lo que se necesita en el selectedNode
     HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
     HtmlAgilityPack.HtmlDocument doc = web.Load("https://coincost.net/es/currencies");
     foreach (var item in doc.DocumentNode.SelectNodes("//td [@class = 'price'] //p"))
     {
         preciosMalo.Add(item.InnerText);
     }
     for (int n = 0; n < preciosMalo.Count; n = n + 2)
     {
         string usd    = preciosMalo[n];
         string numero = usd.Substring(4);
         precios.Add(numero);
         listBox1.Items.Add(numero);
     }
     listBox1.Items.Add("-------------------------------------");
     foreach (var item in doc.DocumentNode.SelectNodes("//td [@class = 'title'] //span"))
     {
         nombresMalo.Add(item.InnerText);
     }
     for (int d = 1; d < nombresMalo.Count; d = d + 2)
     {
         nombres.Add(nombresMalo[d]);
         listBox1.Items.Add(nombresMalo[d]);
     }
     //buscar ultimo valor = "//td[@class = 'right tar']"
     //buscar simbolo = "//b"
 }
Example #3
0
 public HtmlDocument ReadLink(string url)
 {
     HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();
     htmlWeb.UserAgent = "Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0";
     HtmlAgilityPack.HtmlDocument htmlDoc = htmlWeb.Load(url);
     return(htmlDoc);
 }
Example #4
0
        public static void comicDown(string url, string path)
        {
            //https://www.comicextra.com/invincible-iron-man-2015/chapter-14

            if (url.Substring(url.Length - 5) != "/full")
            {
                url = (url + "/full");
            }


            HtmlAgilityPack.HtmlWeb      hw  = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = hw.Load(url);
            List <string> linky = new List <string>();



            foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//img[@class='chapter_img']"))
            {
                //string.Empty
                string hrefValue = link.GetAttributeValue("src", "");

                linky.Add(hrefValue);
                //System.Threading.Thread.Sleep(100);
            }
            foreach (var item in linky)
            {
                Console.WriteLine(item);
            }

            string refer      = "referer: https://www.comicextra.com/invincible-iron-man-2015/chapter-14/full";
            string FolderName = url.Substring(url.Length - 15);

            downloadFunctions.Download(linky, path, refer, FolderName);
        }
Example #5
0
        async public Task <List <VN> > GetVNSearch(string searchString)
        {
            List <VN> result = new List <VN>();
            string    url    = "https://vndb.org/v/all?sq=" + searchString;

            //await VNDB.sem.WaitAsync();
            try
            {
                var web = new HtmlAgilityPack.HtmlWeb();
                web.CaptureRedirect = true;

                HtmlDocument doc = await web.LoadFromWebAsync(url);

                var searchList = doc.DocumentNode.SelectNodes("//table[@class='stripe']/tr");

                foreach (var search in searchList)
                {
                    string id      = search.SelectSingleNode("./td[@class='tc1']/a").GetAttributeValue("href", "");
                    string japName = search.SelectSingleNode("./td[@class='tc1']/a").GetAttributeValue("title", "");
                    string engName = search.SelectSingleNode("./td[@class='tc1']").InnerText;
                    string date    = search.SelectSingleNode("./td[@class='tc4']").InnerText;
                    VN     vn      = new VN(ExtractId(id), new Name(engName, japName), date);
                    result.Add(vn);
                }
            }
            catch (Exception e)
            {
            }
            //VNDB.sem.Release();
            return(result);
        }
Example #6
0
        static void Main(string[] args)
        {
            Console.WriteLine("Silahkan masukkan jenis buku...");
            string searchKey = Console.ReadLine();

            Console.WriteLine("Sedang mencari informasi buku...");
            baseUrl = baseUrl.Replace("query=", "query=" + searchKey).Replace(" ", "+");

            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(baseUrl);

            var bookName = doc.DocumentNode.SelectNodes("//a[@class='bookTitle']//span[@itemprop='name']").ToList();
            var penulis  = doc.DocumentNode.SelectNodes("//a[@class='authorName']//span[@itemprop='name']").ToList();
            var rating   = doc.DocumentNode.SelectNodes("//span[@class='minirating']").ToList();

            foreach (var book in bookName)
            {
                bookList.Add(book.InnerText);
            }
            foreach (var writter in penulis)
            {
                penulisList.Add(writter.InnerText);
            }
            foreach (var rate in rating)
            {
                ratingList.Add(rate.InnerText);
            }

            showOutput();
            Console.ReadLine();
        }
Example #7
0
        public WebScraperService()
        {
            _web = new HtmlWeb();

            // _htmlDoc = new HtmlAgilityPack.HtmlDocument();
            _urlervice = new UrlService();
        }
Example #8
0
        private void AddGithubJobs(string url, List<JobListing> jobListings)
        {
            HtmlWeb page = new HtmlWeb();
            var document = page.Load(url);

            string baseURL = "https://jobs.github.com";

            try
            {
                HtmlNodeCollection rows = document.DocumentNode
                    .SelectSingleNode("//table[@class='positionlist']")
                    .SelectNodes(".//td[@class='title']");

                if (rows.Count > 0)
                {
                    foreach (HtmlNode row in rows)
                    {
                        if (row.ChildNodes.Count == 5)
                        {
                            jobListings.Add(new JobListing()
                            {
                                SearchEngine = SearchEngines.GitHub,
                                Title = row.ChildNodes[1].InnerText,
                                Company = row.ChildNodes[3].ChildNodes[1].InnerText,
                                URL = baseURL + row.ChildNodes[1].ChildNodes[0].Attributes[0].Value
                            });
                        }
                    }
                }
            }
            catch { }
        }
Example #9
0
        /// <summary>
        /// 通过HAP插件解析网页内容,获取Ranked歌曲列表
        /// </summary>
        /// <param name="html">网页的文本内容</param>
        /// <returns>BeatMap列表</returns>
        private List<BeatMap> GetBeatMapsListByHAP(ConfigModel xpModel)
        {
            List<BeatMap> beatMaps = new List<BeatMap>();
            beatMaps.Clear();
            HtmlDocument hDoc = new HtmlWeb().Load(xpModel.WebRankListUrl);  //加载网页,实例化对象。

            HtmlNode rootNode = hDoc.DocumentNode;
            HtmlNodeCollection categoryNodeList = rootNode.SelectNodes(xpModel.CategoryListXPath); //定位HTML标签至遍历处,获取内容(包含BeatMapList列表集合)
            HtmlNode temp = null;
            BeatMap beatMap = null;

            foreach (HtmlNode categoryNode in categoryNodeList)
            {
                temp = HtmlNode.CreateNode(categoryNode.OuterHtml);  //获取一个包含BeatMapList的内容。
                //读出值:
                beatMap = new BeatMap();
                beatMap.Id = temp.SelectSingleNode(xpModel.IdXPath).GetAttributeValue("Id", 0000);
                beatMap.Title = temp.SelectSingleNode(xpModel.TitleXPath).InnerText;
                beatMap.Artist = temp.SelectSingleNode(xpModel.ArtistXPath).InnerText;
                beatMap.Mapper = temp.SelectSingleNode(xpModel.MapperXPath).InnerText;
                beatMap.Styles = temp.SelectSingleNode(xpModel.StylesXPath).InnerText;
                beatMap.Language = temp.SelectSingleNode(xpModel.LanguageXPath).InnerText;
                if (temp.SelectSingleNode(xpModel.SbXPath) != null)
                {
                    beatMap.Sb = temp.SelectSingleNode(xpModel.SbXPath).GetAttributeValue("class", "NoSb");
                }
                else
                {
                    beatMap.Sb = "NoSb";
                }
                beatMaps.Add(beatMap);
            }

            return beatMaps;
        }
Example #10
0
 long NumOfHits(string phrase)
 {
     HtmlAgilityPack.HtmlWeb      web     = new HtmlAgilityPack.HtmlWeb();
     HtmlAgilityPack.HtmlDocument htmlDoc = web.Load("https://www.google.com/search?q=" + phrase);
     if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
     {
         // Handle any parse errors as requiredcw
         System.Console.WriteLine("error");
         debug.Print("error\n");
         return(-1);
     }
     else if (htmlDoc.DocumentNode != null)
     {
         HtmlAgilityPack.HtmlNode node = htmlDoc.DocumentNode.SelectSingleNode("//div[@id='resultStats']");
         Regex  re     = new Regex(@"[1-9](?:\d{0,2})(?:,\d{3})*(?:\.\d*[1-9])?|0?\.\d*[1-9]|0");
         String result = re.Match(node.InnerHtml).Value;
         long   hits   = 0;
         if (result.Contains(","))
         {
             hits = long.Parse(result.Replace(",", ""));
         }
         //System.Console.WriteLine(hits);
         return(hits);
     }
     return(-1);
 }
Example #11
0
        static List <HtmlNode> ExtractPostsFromUrl(String url)
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(url);

            return(doc.DocumentNode.SelectNodes("//tr[@class='athing']").Cast <HtmlNode>().ToList());
        }
Example #12
0
        async void getData(String url)
        {
            fragmentLoad.Visibility = Visibility.Visible;
            HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();
            try
            {
                htmlDoc = await htmlWeb.LoadFromWebAsync(url);

                getInfo(htmlDoc);
                HtmlNode      _nod = htmlDoc.DocumentNode.SelectSingleNode(@"//div[@id='detailcontent']");
                String        html = _nod.InnerHtml.Replace("\t", "");
                StringBuilder sb   = new StringBuilder(html);
                sb.Replace("<br>", "\n");
                sb.Replace("&quot;", "\"");
                tblContent.Text = sb.ToString() + "\n\n\n\n\n";
                checkPageState(htmlDoc);
            }
            catch (Exception)
            {
                MessageDialog md = new MessageDialog("Lỗi hệ thống , vui lòng thử lại sau");
                md.ShowAsync();
            }
            finally
            {
                fragmentLoad.Visibility = Visibility.Collapsed;
            }
        }
        async void getData(String url, Mode mode)
        {
            fragmentLoad.Visibility = Visibility.Visible;
            if (mode == Mode.Refresh)
            {
                listNovels.Clear();
            }
            HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();
            try
            {
                htmlDoc = await htmlWeb.LoadFromWebAsync(url);

                HtmlNode           _nod      = htmlDoc.DocumentNode.SelectSingleNode(@"//ul[@class='homeListstory']");
                HtmlNodeCollection _mainNode = _nod.SelectNodes("li");
                foreach (var node in _mainNode)
                {
                    String name    = node.SelectSingleNode("h3").SelectSingleNode("a").GetAttributeValue("title", null);
                    String imgUrl  = node.SelectSingleNode("a").SelectSingleNode("img").GetAttributeValue("src", null);
                    String mainUrl = node.SelectSingleNode("h3").SelectSingleNode("a").GetAttributeValue("href", null);
                    listNovels.Add(new Novel(name, imgUrl, mainUrl));
                }

                fragmentGridView.lvHomePage.ItemsSource = listNovels;
                currentPage = Convert.ToInt16(htmlDoc.DocumentNode.SelectSingleNode(@"//a[@title='current-page']").InnerText);
            }
            catch (Exception)
            {
                MessageDialog md = new MessageDialog("Lỗi hệ thống , vui lòng thử lại sau");
                md.ShowAsync();
            }
            finally
            {
                fragmentLoad.Visibility = Visibility.Collapsed;
            }
        }
Example #14
0
        private IDictionary <string, string> getVideoUrls(string mainUrl)
        {
            var videoSources       = new Dictionary <string, string>();
            var web                = new HAP.HtmlWeb();
            var doc                = web.Load(mainUrl);
            var videoSelectorTable = doc.DocumentNode.SelectSingleNode("//td[@id='embedcode']").ParentNode.ParentNode;

            foreach (var node in videoSelectorTable.SelectNodes("//a"))
            {
                if (node.Attributes.Contains("onclick"))
                {
                    var          jscriptCode        = node.Attributes["onclick"].Value;
                    const string pivot              = "unescape('";
                    var          srtIdx             = jscriptCode.IndexOf(pivot) + pivot.Length;
                    var          endIdx             = jscriptCode.IndexOf("'", srtIdx + 1);
                    var          redirectUrlEncoded = jscriptCode.Substring(srtIdx, endIdx - srtIdx);
                    var          redirectUrl        = WebUtility.UrlDecode(redirectUrlEncoded);
                    var          scriptDoc          = new HAP.HtmlDocument();
                    scriptDoc.LoadHtml(redirectUrl);
                    string videoPageUrl = scriptDoc.DocumentNode.SelectSingleNode("//iframe").Attributes["src"].Value;
                    videoSources.Add(node.InnerText, videoPageUrl);
                }
            }

            return(videoSources);
        }
Example #15
0
        public List <String> ReviewUrl(Source source)
        {
            var           web      = new HtmlAgilityPack.HtmlWeb();
            var           document = web.Load(source.Domain + source.Path);
            var           page     = document.DocumentNode;
            List <String> ListUrl  = new List <string>();

            foreach (var item in page.QuerySelectorAll(source.LinkSelector))
            {
                try
                {
                    var url = item.GetAttributeValue("href", "");
                    Debug.WriteLine(url);
                    if (url != null && url != "")
                    {
                        if (url.StartsWith("/"))
                        {
                            url = source.Domain.TrimEnd('/') + url;
                        }
                        ListUrl.Add(url);
                    }
                }
                catch (Exception ex)
                {
                    Debug.WriteLine(ex);
                }
            }
            return(ListUrl);
        }
        protected void Button1_Click(object sender, EventArgs e)
        {
            try
            {
                HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
                HtmlAgilityPack.HtmlDocument doc = web.Load(TextBox1.Text);

                var Articles = doc.DocumentNode.SelectNodes("//*[@class ='article-single']");

                foreach (var article in Articles)
                {
                    var header = HttpUtility.HtmlDecode(article.SelectSingleNode(".//li[@class='article-header']" + "\n").InnerText);

                    var description = HttpUtility.HtmlDecode(article.SelectSingleNode(".//li[@class='article-copy']").InnerText);
                    Response.Write("<Table>");

                    Response.Write("<td>");
                    Response.Write("Name - " + header);
                    Response.Write("<br />");
                    Response.Write(" Description - " + description);
                    Response.Write("<tr />");
                    Response.Write("<td />");
                    Response.Write("</Table>");
                }
            }
            catch (Exception ex) { Response.Write(ex.Message); }
        }
        //Public Methods
        public void Scrape(string url)
        {            
            

            try
            {
                HtmlWeb hw = new HtmlWeb();
                HtmlDocument doc = hw.Load(url);   
    
                foreach(HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
                {
                    try
                    {
                        HtmlAttribute att = link.Attributes["href"];
                        Console.WriteLine(att.Value);
                        this._results.Add(new Uri(att.Value));
                    }
                    catch
                    {

                    }
                }
            }
            catch
            {
                //What Should I Do Here?
                //Maybe Nothing for Now
            }
        }
Example #18
0
        public void getCoworkerNames()
        {
            if (updateCoworkerWarningBoolean())
            {
                try
                {
                    var web = new HtmlAgilityPack.HtmlWeb();
                    var doc = web.Load("http://10.45.10.149/brdkServices/EmployeeDB/");

                    var nodes = doc.DocumentNode
                                .SelectNodes("//*[@id=\"bootstrap-override\"]/div[1]/div/table/tbody//tr/td[1]").ToList();
                    var innerTexts = nodes.Select(node => node.InnerText).ToList();

                    for (int i = 0; i < innerTexts.Count(); i++)
                    {
                        if (i % 7 == 0)
                        {
                            names.Add(innerTexts[i]);
                        }
                    }
                    setCoworkerstoFile();
                }
                catch (System.Net.WebException)
                {
                    getCoworkersFromFile();
                }
                initDDL();
            }
            else
            {
                return;
            }
        }
 static HtmlNodeCollection GetSuburb(string URL)
 {
     HtmlWeb client = new HtmlWeb();
     string suburbURL = System.Net.WebUtility.HtmlDecode(BASE + URL);
     HtmlDocument doc = client.Load(suburbURL);
     return doc.DocumentNode.SelectNodes("//table[@id='myTable']/tbody/tr/td[4]/a");
 }
Example #20
0
        public void setName()
        {
            if (name != null)
            {
                return;
            }

            string newname;

            var          web = new HtmlAgilityPack.HtmlWeb();
            HtmlDocument doc = web.Load(this._urlLink);

            var h1    = doc.DocumentNode.SelectSingleNode("//h1");
            var title = doc.DocumentNode.SelectSingleNode("//title");

            if (h1 != null && !h1.HasChildNodes)
            {
                newname = h1.InnerHtml.Trim();
            }
            else if (title != null && !title.HasChildNodes)
            {
                newname = title.InnerHtml.Trim();
            }
            else
            {
                string n = Regex.Replace(urlLink, @"^((https:[/]*|http:[/]*)(www)*|(www.))[.]*", "");
                string f = Regex.Replace(n, @"[.].*$", "");
                newname = f;
            }

            name = newname;
        }
Example #21
0
        public static void  GetText2()
        {
            List<string> outList = new List<string>();

            string html = "https://yandex.by/search/?numdoc=10&p=0&rdrnd=601861&text=kinogo.co%20Один%20дома%201990%20&lr=157";
            HtmlDocument HD = new HtmlDocument();
            var web = new HtmlWeb
            {
                AutoDetectEncoding = false,
                OverrideEncoding = Encoding.UTF8 //GetEncoding("windows-1251")
            };

            HD = web.Load(html);

            
            HtmlNodeCollection NoAltElements = HD.DocumentNode.SelectNodes("//div");
            
            ///допилить
            if (NoAltElements != null)
            {
                foreach(HtmlNode node in NoAltElements)
                {

                    string outputText = node.InnerHtml;
                    Console.WriteLine(outputText);
                }
            }
            else
                Console.WriteLine("found nothing");
        }
Example #22
0
        private static string[] PrepareTestData(string Filename)
        {
            if (File.Exists(Filename))
            {
                return(File.ReadAllLines(Filename));
            }
            else
            {
                Console.WriteLine("Preparing test data - reading...");
                List <String> rs = new List <string>();
                foreach (var i in GetAlphabet())
                {
                    Console.Write(i);
                    string addr = String.Format("http://en.wikipedia.org/wiki/Index_of_Windows_games_({0})", i);
                    HtmlAgilityPack.HtmlWeb w = new HtmlAgilityPack.HtmlWeb();
                    HtmlDocument            d = w.Load(addr);
                    rs.AddRange(d.DocumentNode.SelectNodes(GetAddress(i)).Select(t => t.InnerText));
                }

                File.WriteAllLines(Filename, rs);
                Console.WriteLine("Done!");

                return(rs.ToArray());
            }
        }
Example #23
0
        public static void getHrefs(string url)
        {
            // try to fetch href values from a webpage
            try
            {
                // Create an instance of HtmlWeb
                HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlWeb();
                // Creating an instance of HtmlDocument and loading the html source code into it.
                HtmlAgilityPack.HtmlDocument doc = htmlWeb.Load(url);

                // Adding the crawled url to the list of crawled urls
                VisitedPages.Add(url);

                // For each HTML <a> tag found in the document
                foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
                {
                    // Extract the href value from the <a> tag
                    Uri l = new Uri(baseUrl, link.Attributes["href"].Value.ToString());

                    // check if the href value does not exist in the list or the queue and if it is a page of the url the user entered.
                    if (!LinkQueue.Contains(l.ToString()) && !VisitedPages.Contains(l.ToString()) && l.Host.ToString() == baseUrl.Host.ToString())
                    {
                        // Add the href value to the queue to get scanned.
                        LinkQueue.Enqueue(l.ToString());
                    }
                }
            }
            catch
            {
                // return if anything goes wrong
                return;
            }
        }
Example #24
0
        // return remote page title from URI
        public static string GetTitleFromUri(string @remoteUri)
        {
            try
            {
                // try using Open Graph to get target page title
                var graph = OpenGraph.ParseUrl(@remoteUri, "Voat.co OpenGraph Parser");
                if (!string.IsNullOrEmpty(graph.Title))
                {
                    var tmpStringWriter = new StringWriter();
                    HttpUtility.HtmlDecode(graph.Title, tmpStringWriter);
                    return tmpStringWriter.ToString();
                }

                // Open Graph parsing failed, try getting HTML TITLE tag instead
                HtmlWeb htmlWeb = new HtmlWeb();
                HtmlDocument htmlDocument = htmlWeb.Load(@remoteUri);

                if (htmlDocument != null)
                {
                    var titleNode = htmlDocument.DocumentNode.Descendants("title").SingleOrDefault();
                    if (titleNode != null)
                    {
                        return titleNode.InnerText;
                    }
                }

                return null;
            }
            catch (Exception ex)
            {
                return null;
            }
        }
Example #25
0
 public void GetJobListFromWeb()
 {
     try
     {
         var htmlWeb = new HtmlWeb { OverrideEncoding = Encoding.GetEncoding("UTF-8") };
         HtmlDocument htmlDoc =
             htmlWeb.Load(string.Format("http://sou.zhaopin.com/jobs/searchresult.ashx?jl={0}&kw={1}&p={2}",
                 DataClass.GetDic_zhilian(_pars.Addr), _pars.Key, _pars.Page));
         var nodeList =
             htmlDoc.DocumentNode.SelectNodes("//*[@id='newlist_list_content_table']/table[@class='newlist']")
                 .AsParallel()
                 .ToList();
         for (int i = 1; i < nodeList.Count; i++)
         {
             var node = nodeList[i];
             var job = new JobInfo();
             job.TitleName = node.SelectSingleNode(".//tr/td[@class='zwmc']/div/a").InnerText;
             job.InfoUrl = node.SelectSingleNode(".//tr/td[@class='zwmc']/div/a").Attributes["href"].Value;
             job.Company = node.SelectSingleNode(".//tr/td[@class='gsmc']/a").InnerText;
             job.Salary = node.SelectSingleNode(".//tr/td[@class='zwyx']").InnerText;
             job.City = node.SelectSingleNode(".//tr/td[@class='gzdd']").InnerText;
             job.Date = node.SelectSingleNode(".//tr/td[@class='gxsj']/span").InnerText;
             job.Source = "智联招聘";
             job.Method = "月薪";
             _jobList.Add(job);
         }
     }
     catch (Exception ex)
     {
         LogSave.ErrLogSave("错误【解析】", ex);
     }
 }
        private void PanoramaItem_Loaded(object sender, RoutedEventArgs e)
        {
            HtmlAgilityPack.HtmlWeb htmlDoc = new HtmlAgilityPack.HtmlWeb();
            htmlDoc.LoadCompleted += new EventHandler<HtmlDocumentLoadCompleted>(htmlDocComplete);

            htmlDoc.LoadAsync("http://www.cnblogs.com/");
        }
Example #27
0
        public static string getContent(string webAddress)
        {
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            HtmlDocument            doc = web.Load(webAddress);

            return(string.Join(" ", doc.DocumentNode.Descendants().Select(x => x.InnerText)));
        }
Example #28
0
        /// <summary>
        /// Retrieves list of Houzz project IDs
        /// </summary>
        /// <param name="username">Houzz Username</param>
        /// <returns>List of project IDs</returns>
        public IEnumerable<int> GetProjectIds(string username)
        {
            var url = string.Format("http://www.houzz.com/projects/users/{0}", username);
            HtmlDocument htmlDoc = new HtmlWeb().Load(url);

            var nodes = htmlDoc.DocumentNode.SelectNodes("//div[@class='sidebar-body']//a");

            if (nodes != null && nodes.Any())
            {
                var links = nodes.Skip(1) // Skip "All Projects"
                                .Select(e => e.GetAttributeValue("href", null).ToString());

                var projects = new List<int>();

                foreach (var link in links)
                {
                    var splicedUrl = link.Split('/');
                    if (splicedUrl.Length >= 4)
                    {
                        projects.Add(Convert.ToInt32(splicedUrl[4]));
                    }
                }

                return projects;
            }

            return null;
        }
Example #29
0
        private void btnDownloadImages_Click(object sender, EventArgs e)
        {
            HtmlAgilityPack.HtmlDocument chapterDoc = new HtmlWeb().Load(txtURL.Text);

            MessageBox.Show("Invalid URL");

            HtmlAgilityPack.HtmlDocument pageDoc;
            HtmlNode pageNode;
            WebClient client = new WebClient();
            string localFilename, chapterUrl, chapter;
            int page = 1;

            foreach (HtmlNode link in chapterDoc.DocumentNode.SelectNodes("//div[@class = 'detail_list']/ul/li/span/a"))
            {
                chapterUrl = link.Attributes["href"].Value;
                string nextPage = chapterUrl;
                while (nextPage != "javascript:void(0);")
                {
                    chapter = link.InnerText.Replace("\r\n", "").Replace(" ", "").Replace(":", "");

                    pageDoc = new HtmlWeb().Load(nextPage);
                    pageNode = pageDoc.DocumentNode.SelectSingleNode("//img[@id='image']");
                    localFilename = @"C:\Users\Anh\Desktop\New folder\Coding\Image2\" + chapter + "." + page + ".jpg";
                    client.DownloadFile(pageNode.Attributes["src"].Value, localFilename);
                    nextPage = pageDoc.DocumentNode.SelectSingleNode("//a[@class = 'next_page']").Attributes["Href"].Value;
                    page++;
                }
                page = 0;
            }
        }
        private void addPageType(String pageUrl)
        {
            mCarTypeList.Clear();

            HtmlDocument htmlDocument = new HtmlWeb().Load(WebConstants.BASE_URL + pageUrl);
            HtmlNodeCollection typeNodes = htmlDocument.DocumentNode.SelectNodes(WebConstants.TYPE_NODE);
            if (typeNodes != null)
            {
                foreach (HtmlNode tempNode in typeNodes)
                {
                    HtmlNode typeNode = HtmlNode.CreateNode(tempNode.OuterHtml);
                    CarType carType = new CarType(mCarFactory);
                    HtmlNode nameNode = HtmlNode.CreateNode(typeNode.SelectSingleNode(WebConstants.TYPE_NAME).OuterHtml);
                    carType.Name = nameNode.SelectSingleNode(WebConstants.LINK_HREF).InnerText;
                    HtmlNode imageNode = HtmlNode.CreateNode(typeNode.SelectSingleNode(WebConstants.TYPE_IMAGE).OuterHtml);
                    carType.ImageUrl = imageNode.SelectSingleNode(WebConstants.IMAGE_SRC).Attributes[WebConstants.SRC].Value;
                    new Thread(new TypeImageDownloadTask(carType).Download).Start();

                    mCarTypeList.Add(carType);
                }
            }

            String priceUrl = pageUrl.Replace(WebConstants.PHOTO, WebConstants.PRICE);
            setPrice(priceUrl);
        }
        void getData(String url, Mode mode)
        {
            if (mode == Mode.Refresh)
            {
                chapter.Clear();
            }
            HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();
            htmlDoc = htmlWeb.Load(url);
            getInfo(htmlDoc);
            getSummary(htmlDoc);
            HtmlNode           _nod      = htmlDoc.DocumentNode.SelectSingleNode(@"//table[@class='table table-striped']");
            HtmlNodeCollection _mainNode = _nod.SelectNodes("tr");

            foreach (HtmlNode node in _mainNode)
            {
                HtmlNode chap = node.SelectSingleNode("td[2]");
                HtmlNode n    = node.SelectSingleNode("td[3]");
                if (n != null)
                {
                    String displayName = chap.SelectSingleNode("strong").InnerText + " : " + n.SelectSingleNode("a").InnerText;
                    String chapterUrl  = n.SelectSingleNode("a").GetAttributeValue("href", null);
                    chapter.Add(new Chapter(displayName, chapterUrl));
                }
            }


            currentPage           = Convert.ToInt16(htmlDoc.DocumentNode.SelectSingleNode(@"//a[@title='current-page']").InnerText);
            lvChapter.ItemsSource = chapter;
            checkNextPage(htmlDoc);
        }
Example #32
0
        public List<TimetableItem> GetTimetableForYear(StudyYear year, HalfYear halfYear = HalfYear.None)
        {
            List<TimetableItem> timetable;

            string tempYear = Enum.GetName(typeof(StudyYear), year);
            string tempHalfYear = Enum.GetName(typeof(HalfYear), halfYear);
            if (tempHalfYear == "None") tempHalfYear = String.Empty;

            try
            {
                HtmlWeb hw = new HtmlWeb();
                HtmlDocument doc = hw.Load(String.Format("http://thor.info.uaic.ro/~orar/participanti/orar_{0}{1}.html", tempYear, halfYear));
                doc.DocumentNode.InnerHtml = doc.DocumentNode.InnerHtml.Replace("\r\n", "");

                timetable = ParseTable(doc, TimetableType.Year);
            }
            catch (WebException ex)
            {
                Logger.ExceptionLogger.Log(ex);
                timetable = null;
            }
            catch (NotSupportedException ex)
            {
                Logger.ExceptionLogger.Log(ex);
                timetable = null;
            }
            return timetable;
        }
Example #33
0
 public static IEnumerable<MangaData> getNews(Source source)
 {
     var web = new HtmlAgilityPack.HtmlWeb();
     web.AutoDetectEncoding = true;
     var htmlMainDoc = web.Load(@"http://www.mangahere.com/latest/");
     var itemsManga = htmlMainDoc.DocumentNode.SelectNodes(@"/html/body/section[@class='page_main']/div[@class='latest_released']/div[@class='manga_updates']/dl");
     for (int i = itemsManga.Count-1; i >+0; i--)
     {
         var itemManga = itemsManga[i];
         MangaData manga = new MangaData(source, true);
         var mangaNode = itemManga.SelectSingleNode(@"dt");
         var mangaName = mangaNode.SelectSingleNode("a").InnerHtml;
         var mangaDetailLink = mangaNode.SelectSingleNode("a").GetAttributeValue("href", "");
         manga.DetailMangaSource = source.CreateDetailMangaSource(manga, mangaDetailLink);
         manga.Name = mangaName;
         foreach (var itemChapter in itemManga.SelectNodes("dd"))
         {
             ChapterData chapter = new ChapterData();
             //var matches = Regex.Matches(itemChapter.SelectSingleNode("a").InnerText, @"\d+");
             //var chapterName = matches[matches.Count - 1].Value;
             var chapterName = itemChapter.SelectSingleNode("a").InnerText;
             var chapterLink = itemChapter.SelectSingleNode("a").GetAttributeValue("href", "");
             chapter.Name = chapterName;
             chapter.ChapterSource = source.CreateChapterSource(chapterLink);
             manga.ChaptersData.Add(chapter);
         }
         yield return manga;
     }
 }
Example #34
0
        public static ChapterData getChapters(Source source, string link)
        {
            ChapterData chapter = new ChapterData();
            var web = new HtmlAgilityPack.HtmlWeb();
            web.AutoDetectEncoding = true;
            var htmlpage1 = web.Load(link);
            var pages = new List<IObservable<HtmlDocument>>();
            pages.Add(Observable.Return(htmlpage1));
            var linksToPages = htmlpage1.DocumentNode.SelectNodes(@"/html/body/section[@class='readpage_top']/div[@class='go_page clearfix']/span[@class='right']/select[@class='wid60']/option");
            for (int i = 1; i < linksToPages.Count; i++)
            {
                var linkToPage=linksToPages[i].GetAttributeValue("value", "");
                pages.Add(Observable.Start<HtmlDocument>(
                    ()=>{
                        var web2 = new HtmlAgilityPack.HtmlWeb();
                        web.AutoDetectEncoding = true;
                        return htmlpage1 = web.Load(linkToPage);
                    }
                ));

            }
            foreach (IObservable<HtmlDocument> item in pages)
            {
                HtmlDocument pagehtml = item.Wait();
                chapter.Images.Add(pagehtml.DocumentNode.SelectSingleNode(@"/html/body/section[@id='viewer']/a/img[@id='image']/@src").GetAttributeValue("src","")) ;

            }
            return chapter;
        }
Example #35
0
        public static HtmlDocument Crawl(string url)
        {
            HtmlWeb hw = new HtmlWeb();
            HtmlDocument html = hw.Load(url);

            return html;
        }
Example #36
0
        public static List<string> GetChapterUrls(string url)
        {
            HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlWeb();
            HtmlAgilityPack.HtmlDocument htdoc = htmlWeb.Load(url);

            List<string> ret = new List<string>();

            IEnumerable<HtmlAgilityPack.HtmlNode> selectList = htdoc.DocumentNode.Descendants("select")
                                        .Where(x => x.Attributes["class"].Value == "selectBox");

            if (selectList.ToList().Count == 0) return null;
            if (selectList == null) return null;

            var selectElement = selectList.Single();

            foreach (var cNode in selectElement.ChildNodes)
            {
                if (cNode.Name == "option")
                {
                    ret.Add(cNode.GetAttributeValue("value", "NO_URL"));
                }
            }

            //cleanups
            ret.Remove("#");

            return ret;
        }
Example #37
0
        public static List<string> getNameOfEmail(string url)
        {
            List<string> a = new List<string>();
            HtmlWeb website = new HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = website.Load(url);
            HtmlNodeCollection authors = new HtmlNodeCollection(doc.DocumentNode.ParentNode); ;
            authors = doc.DocumentNode.SelectNodes(".//li[@itemprop='author']");

            if (!Directory.Exists(@"C:\Springer\"))
            {
                Directory.CreateDirectory(@"C:\Springer\");
            }

            using (StreamWriter outputFile = new StreamWriter(@"C:\Springer\Springer Emails.txt", true))
                {
                    if (authors != null)
                    {

                        foreach (HtmlNode author in authors)
                        {

                            HtmlNode Name = author.SelectSingleNode(".//a[@class='person']");
                            HtmlNode EMail = author.SelectSingleNode(".//a[@class='envelope']");

                            if (EMail != null)
                            {
                                outputFile.WriteLine(Name.InnerText + " - " + EMail.Attributes["title"].Value);
                            }
                        }
                    }

                }

            return a;
        }
		protected override void OnCreate (Bundle savedInstanceState)
		{
			base.OnCreate (savedInstanceState);

			SetContentView (Resource.Layout.Main);


			TextView textView = FindViewById<TextView> (Resource.Id.TEXT_STATUS_ID);

			HtmlWeb web = new HtmlWeb();
			HtmlDocument doc = web.Load("https://www.ltd.org/system-map/route_79x/");

			HtmlNodeCollection tags = doc.DocumentNode.SelectNodes("//td");
			foreach (HtmlNode item in tags)  
			{  
				textView.Text = textView.Text + item.InnerHtml +"\n";
			}  
			textView.Text = Regex.Replace(textView.Text, @"<[^>]*>", String.Empty);

	
			Button button = FindViewById<Button> (Resource.Id.myButton);
			
			button.Click += delegate {
				//button.Text = string.Format ("{0} clicks!", count++);
				StartActivity(typeof(Page2));
			};
		}
Example #39
0
        async void getData(String url, Mode mode)
        {
            fragmentLoad.Visibility = Visibility.Visible;
            if (mode == Mode.Refresh)
            {
                listNovels.Clear();
            }
            HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb();
            htmlDoc = await htmlWeb.LoadFromWebAsync(url);

            HtmlNode           _nod      = htmlDoc.DocumentNode.SelectSingleNode(@"//ul[@class='homeListstory']");
            HtmlNodeCollection _mainNode = _nod.SelectNodes("li");

            if (_mainNode != null)
            {
                foreach (var node in _mainNode)
                {
                    String name    = node.SelectSingleNode("h3").SelectSingleNode("a").GetAttributeValue("title", null);
                    String imgUrl  = node.SelectSingleNode("a").SelectSingleNode("img").GetAttributeValue("src", null);
                    String mainUrl = node.SelectSingleNode("h3").SelectSingleNode("a").GetAttributeValue("href", null);
                    listNovels.Add(new Novel(name, imgUrl, mainUrl));
                }

                if (realUrl == null || realUrl.Equals(""))
                {
                    realUrl = htmlDoc.DocumentNode.SelectSingleNode(@"//link[@rel='canonical']").GetAttributeValue("href", null);
                }

                fragmentGridView.lvHomePage.ItemsSource = listNovels;
                currentPage             = Convert.ToInt16(htmlDoc.DocumentNode.SelectSingleNode(@"//a[@title='current-page']").InnerText);
                fragmentLoad.Visibility = Visibility.Collapsed;
            }
        }
Example #40
0
        public static Bilinformation HentBilinformation(string nummerplade)
        {
            try
            {
                Bilinformation bilinformation = new Bilinformation();

                string html = "http://www.nummerplade.net/soeg/?regnr=" + nummerplade;
                HtmlWeb web = new HtmlWeb();
                HtmlDocument page = web.Load(html);

                if (page.DocumentNode != null)
                {
                    bilinformation.Maerke = page.DocumentNode.SelectSingleNode("//td[@id='maerke']").InnerText;
                    bilinformation.Model = page.DocumentNode.SelectSingleNode("//td[@id='model']").InnerText;
                    bilinformation.Variant = page.DocumentNode.SelectSingleNode("//td[@id='variant']").InnerText;
                    bilinformation.Stelnummer = page.DocumentNode.SelectSingleNode("//td[@id='stelnr']").InnerText;
                    bilinformation.Aargang = page.DocumentNode.SelectSingleNode("//td[@id='model_aar']").InnerText;
                    bilinformation.Nummerplade = page.DocumentNode.SelectSingleNode("//td[@id='regnr']").InnerText;
                }

                return bilinformation;
            }
            catch (Exception ex)
            {
                throw new IngenBilinformationException("Der blev ikke fundet nogen bilinformation på nummerpladen.", ex);
            }
        }
Example #41
0
        static void Main(string[] args)
        {
            var web = new HtmlWeb();
            var doc = web.Load("https://ua.linkedin.com/in/kirillmiroshnichenko");

            var name = doc.DocumentNode.SelectNodes("//span[@class='full-name']");
            Print(name);

            var summary = doc.DocumentNode.SelectNodes("//p[@class='description']");
            Print(summary);

            var skills = doc.DocumentNode.SelectNodes("//span[@class='skill-pill']");
            Print(skills);
            Console.WriteLine("-------------");

            string[] values = new string[] {"experience", "courses","projects","certifications", "languages", "education","interests",
            "patents","publications","honors","test-scores","organizations","volunteering"};

            foreach (var item in values)
            {
                Info(doc, item);
            }

            Console.ReadLine();
        }
        //find out all the plumber information in a city
        private void ExtractCity(string state, string city)
        {
            HtmlWeb web = new HtmlWeb();
            string cityUrl = RootUrl + @"/" + state + @"/" + city + @"?" + @"page=1&ipp=All";
            HtmlDocument doc = web.Load(cityUrl);

            var linksOnPage = from lnks in doc.DocumentNode.Descendants()
                              where lnks.Name == "a" &&
                                    lnks.Attributes["href"] != null &&
                                    lnks.InnerText.Trim().Length > 0
                              select lnks;

            foreach (var li in linksOnPage)
            {
                if (li.InnerText == "Phone")
                {
                    string phone, name, address;
                    phone = li.ParentNode.NextSibling.InnerText;
                    Console.WriteLine();
                    Console.WriteLine("phone: "+phone);
                    name = li.ParentNode.ParentNode.ParentNode.ParentNode.FirstChild.NextSibling.InnerText.Split('\n')[1].Trim();
                    address = li.ParentNode.ParentNode.ParentNode.ParentNode.FirstChild.NextSibling.InnerText.Split('\n')[2].Trim();
                    Console.WriteLine("name: "+ name);
                    Console.WriteLine("address: " + address);
                    file.WriteLine(name + "," + address + "," + city.Replace("-plumbers","") + "," + phone.Replace(" ", ""));
                }

            }
        }
Example #43
0
        public IEnumerable<Podcast> GetLatestPodcasts(int pageNumber)
        {
            var hw = new HtmlWeb();
            hw.OverrideEncoding = Encoding.GetEncoding("ISO-8859-2");
            var doc = hw.Load("http://www.tok.fm/TOKFM/0,94037.html?str=" + pageNumber.ToString(CultureInfo.InvariantCulture));
            doc.OptionOutputAsXml = true;
            doc.OptionCheckSyntax = true;
            doc.OptionFixNestedTags = true;
            var sb = new StringBuilder();
            var stringWriter = new StringWriter(sb);

            doc.Save(stringWriter);
            var page = sb.ToString();
            var stringReader = new StringReader(page);
            doc.Load(stringReader);
            var result = new List<Podcast>();
            foreach(HtmlNode link in doc.DocumentNode.SelectNodes("//a[@class='tokfm_play']"))
            {
                var imgNode = link.SelectSingleNode("img");
                var imageURL = String.Empty;
                if (imgNode != null)
                    imageURL = imgNode.Attributes["src"].Value;
                result.Add(new Podcast { Href = link.Attributes["href"].Value, Title = link.Attributes["title"].Value, ImageURL = imageURL });
            }

            return result;
        }
Example #44
0
        public string getBibTex(string url)
        {
            string res = "", temp = "";

            HtmlWeb web;
            HtmlDocument doc;
            HtmlNode n;

            if (url.Contains("viewdoc"))//e.g. http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.3487
            {
                web = new HtmlWeb();
                doc = web.Load(url);

                if (doc != null)
                    Console.WriteLine("Document Loaded!");
                else
                    Console.WriteLine("Load Error!");
                try
                {
                    if ((n = doc.DocumentNode.SelectSingleNode("//*[@id=\"bibtex\"]/p")) != null)
                    {
                        temp = n.InnerText;
                        temp = temp.Replace(",", ",\n").Replace("&nbsp;", " ");
                    }
                }
                catch (Exception e) { }
                res = temp;
                return res;
            }
            else//e.g. http://citeseer.ist.psu.edu/showciting?cid=2131272
                return res;
        }
 static void Main()
 {
     string mainUrl       = "http://urfu.ru/";
     string toSchedule    = "student/schedule/schedule/list/institute/";
     string getInstitutes = "student/schedule/schedule/list/group/institute";
     string getGroups     = "student/schedule/schedule/list/lesson/institute";
     var    webGet        = new HtmlAgilityPack.HtmlWeb();
     var    doc           = webGet.Load(mainUrl + toSchedule);
     var    listInstituts = doc.DocumentNode.SelectNodes("//a").Where(item => item.Attributes["href"].Value.StartsWith(getInstitutes));
     //foreach (var institut in listInstituts)
     var institut = listInstituts.Skip(7).First();
     {
         Console.WriteLine(institut.InnerHtml);
         doc = webGet.Load(mainUrl + institut.Attributes["href"].Value);
         var listGroups = doc.DocumentNode.SelectNodes("//a").Where(item => item.Attributes["href"].Value.StartsWith(getGroups));
         foreach (var group in listGroups)
         {
             Console.WriteLine(group.InnerHtml);
             doc = webGet.Load(mainUrl + group.Attributes["href"].Value);
             var schedule = doc.DocumentNode.SelectNodes("//table");//.Where(item => item.Attributes["class"])
             new Schedule(schedule);
         }
     }
     //doc = webGet.Load("http://urfu.ru/student/schedule/schedule/list/lesson/institute/6/sch_group/419/week/odd/semi_semester/2/");
     //var schedule = doc.DocumentNode.SelectNodes("//table").Where(item => item.Attributes["class"] != null);
     //new Schedule(schedule);
 }
Example #46
0
        public static string GetFromTerra(string artist, string title)
        {
            string rep = string.Empty;

            artist = (artist + "").ToLowerInvariant();
            title = (title + "").ToLowerInvariant();

            //Obter a letra da música
            HtmlWeb web = new HtmlWeb();
            HtmlDocument doc = web.Load(string.Format("http://letras.mus.br/winamp.php?t={0}-{1}", HttpUtility.UrlEncode(artist, ISOEncoding), HttpUtility.UrlEncode(title, ISOEncoding)));
            HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[@id='letra']/p");

            //Se encontrar a letra, retorna
            if (node == null && (artist.Contains("&") || title.Contains("&"))) {
                artist = artist.Replace('&', 'e');
                title = title.Replace('&', 'e');

                return GetFromTerra(artist, title);
            }

            node.InnerHtml = node.InnerHtml.Replace("<br>", "\r\n");

            rep = WebUtility.HtmlDecode(node.InnerText);

            return rep;
        }
Example #47
0
        /// <summary>
        /// WebCrawl facebook to get likes from ordbogen.com page
        /// </summary>
        /// <returns>int</returns>
        public int GetFaceBookLikes()
        {
            int numOfLikes = 0;
            string searchStart = "omBeskedDelMere";
            string searchEnd = " ";
            try
            {
                HtmlDocument doc = new HtmlWeb().Load("https://m.facebook.com/ordbogen");

                if (doc != null)
                {
                    var divNodes = doc.DocumentNode.SelectNodes("//div");
                    foreach (var div in divNodes)
                    {
                        if (div.InnerText.Contains("personer synes godt om dette"))
                        {
                            int start = div.InnerText.IndexOf(searchStart, 0) + searchStart.Length;
                            int end = div.InnerText.IndexOf(searchEnd, start);
                            string number = div.InnerText.Substring(start, end - start);
                            int.TryParse(number, out numOfLikes);
                            return numOfLikes;
                        }
                    }
                    return -1;
                }
                else
                {
                    return -1;
                }
            }
            catch (Exception)
            {
                return -3;
            }
        }
Example #48
0
 protected override string _GetSerieMiniatureUrl(Serie serie)
 {
     var web = new HtmlWeb();
     var doc = web.Load(serie.URL);
     var img = doc.DocumentNode.SelectSingleNode("//div[@id='series_info']/div[@class='cover']/img");
     return img.GetAttributeValue("src", "");
 }
Example #49
0
        //query gametracker by map
        public static List<string> GetServersFromMap(List<string> list, string map)
        {
            HtmlWeb htmlWeb = new HtmlWeb();

            // Creates an HtmlDocument object from an URL
            HtmlAgilityPack.HtmlDocument document = htmlWeb.Load("http://www.gametracker.com/search/dota2/?search_by=map&query="+map.Trim()+"&searchipp=50");

            var query = from table in document.DocumentNode.SelectNodes("//table").Cast<HtmlNode>()
                        from row in table.SelectNodes("tr").Cast<HtmlNode>()
                        from cell in row.SelectNodes("td").Cast<HtmlNode>()
                        select new { Table = table.Id, CellText = cell.InnerText, CellClass = cell.Attributes };
            string rep = "";
            bool started = false;
            bool stopped = true;
            foreach (var cell in query)
            {

                if (cell.CellText.Contains("Rank&darr"))
                {
                    stopped = !stopped;
                    started = false;
                }
                if (started && !stopped)
                {
                    list.Add(cell.CellText.Trim());
                }

                if (cell.CellText.Contains("Server Map&nbsp;"))
                {
                    started = true;
                }

            }
            return list;
        }
        public void crawlingPhase(CloudQueueMessage urlMessage)
        {
            totalUrls++;
            String url = urlMessage.AsString;
            if (!alreadyVisitedUrls.Contains(url))
            {
                alreadyVisitedUrls.Add(url);
                try
                {
                    HtmlWeb hw = new HtmlWeb();
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    doc = hw.Load(url);
                    String date = "";
                    if (doc.DocumentNode.SelectSingleNode("//head/meta[@property='og:pubdate']") != null)
                    {
                        String stringDate = doc.DocumentNode.SelectSingleNode("//head/meta[@property='og:pubdate']").GetAttributeValue("content", "default").Substring(0, 10);
                        date = DateTime.ParseExact(stringDate, "yyyy-MM-dd", CultureInfo.InvariantCulture).ToString("yyyy/MM/dd");
                    }
                    String fullTitle = doc.DocumentNode.SelectSingleNode("//head/title").InnerText;
                    String[] titles = fullTitle.Split(' ');
                    foreach (string partTitle in titles) {
                        if (!partTitle.Equals(" ") && !partTitle.Equals("-") && !partTitle.Equals("CNN.com") && !partTitle.Equals(""))
                        {
                            CrawlerEntry entry = new CrawlerEntry(url, fullTitle, date, partTitle);
                            TableOperation insertOperation = TableOperation.Insert(entry);
                            table.Execute(insertOperation);
                            tableSize++;
                        }
                    }

                    //get urls in page
                    foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
                    {
                        string href = link.GetAttributeValue("href", string.Empty);
                        HashSet<String> links = new HashSet<String>();
                        String[] hrefSplit = href.Split('/');
                        String html = hrefSplit[hrefSplit.Length - 1];
                        //if the href is not in the disallowed urls, is not already crawled, is not a duplicate link, is a valid html page, and on cnn or bleacherreport
                        if (!disallowedUrls.Any(s => href.Contains(s)) && !alreadyVisitedUrls.Any(s => s.Equals(href)) && !links.Contains(href) && rgx.IsMatch(html) && (href.Contains("cnn.com") || href.Contains("bleacherreport.com")))
                        {
                            //store remaining into queue
                            urlQueue.AddMessage(new CloudQueueMessage(href));

                            //adds link to current link set
                            links.Add(href);
                        }
                    }
                }
                catch
                {

                }
            }

            updateTotalUrls();
            //Update last 10 urls crawled
            updateLastUrl(urlMessage.AsString);

            urlQueue.DeleteMessage(urlMessage);
        }
Example #51
0
        public void SearchLinks(string gametitle, string imagetype, string searchstring)
        {
            searchstring = searchstring.Replace(" ", "%20");
            var url = "https://www.qwant.com/?q=" + searchstring + "&t=images";

            try
            {
                HtmlAgilityPack.HtmlWeb      hw  = new HtmlAgilityPack.HtmlWeb();
                HtmlAgilityPack.HtmlDocument doc = hw.Load(url);
                foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//img"))
                {
                    string   imgValue = link.GetAttributeValue("src", string.Empty);
                    string[] imgLink  = imgValue.Split('=');
                    string   imglink  = imgLink[1].Replace("%3A", ":");
                    imglink  = imglink.Replace("%2F", "/");
                    imglink  = imglink.Remove(imglink.Length - 2);
                    imgValue = "http:" + imgValue;
                    imgValue = imgValue.Substring(0, imgValue.LastIndexOf("&q="));
                    searchlist.Add(new SearchResults
                    {
                        Thumbnail = imgValue,
                        Image     = imglink
                    });
                }
                SearchList = searchlist;
                ObservableList();
            }
            catch (Exception e) { Console.WriteLine("Error: " + e); }
        }
        public async Task <Models.rominfo> getrominfo(string link)
        {
            var doc2 = new HtmlAgilityPack.HtmlWeb();
            /////////////se busca la pagina de info de el rom

            var htmlDoc2 = await doc2.LoadFromWebAsync(link);


            //////////////////esta pagina si es valida no puede contener 404 ya que el response llega ok pero no tiene nada en las tablas
            //////////////////lo cual podria provocar futuros crashes
            if (!htmlDoc2.Text.Contains("404 Page Not Found"))
            {
                //////////////se selecciona el 2do div de la pagina


                var nodelo = htmlDoc2.DocumentNode.SelectNodes("//div")[1];
                var klowa  = nodelo.SelectNodes("//*[contains(@class,'table table-striped rom-info')]").Where(aaxx => aaxx.Name == "table").First().ChildNodes.Where(aaxx => aaxx.Name == "tbody").First();
                ////////////dentro de este se obtiene un inner text de una tabla que hay dentro de ese div el cual contiene la info de el rom
                var            listaelementos = desencriptar(klowa.InnerText).Split(new[] { "^^^???**//" }, StringSplitOptions.None);
                Models.rominfo info           = new Models.rominfo();
                /////////////////////////////se busca directamente el elemento rom-link por su ide y se le agregan un par de cosas para hacerlo spliteable
                info.linkdescarga = htmlDoc2.GetElementbyId("rom-link").Attributes["href"].Value.Replace("&amp;", "").Replace("&", "").Replace("token=", "&token=").Replace("id=", "&id=").Replace("name=", "&name=");
                ///////////////////////aqui se trata de buscar el id de el rom dentro de 2 parametros los cuales estan de la sig manera
                ///////////////////////&id=<id>&token=<token>
                info.id = info.linkdescarga.Split(new[] { "&id=" }, StringSplitOptions.None)[1].Split(new[] { "&token=" }, StringSplitOptions.None)[0].Replace("&", "");
                //////////////////////////con los datos "desencriptados" se le agregan a la instancia de la clase de modelo
                info.nombre  = listaelementos[0];
                info.size    = listaelementos[1];
                info.region  = listaelementos[2];
                info.consola = listaelementos[3];
                /////////////////////////se busca entre hijos la imagen y luego se ele extrae su href
                var imagen = nodelo.SelectNodes("//*[contains(@class,'product__img')]").Where(aaxx => aaxx.Name == "img");


                info.imagen = imagen.First().Attributes["src"].Value;
                ////////////////////aqui se le extrae el info de descargas y votos si estos son existentes por eso estan dentro de un try catch
                try
                {
                    info.descargas = listaelementos[4];

                    info.votos = nodelo.ChildNodes[2].ChildNodes[0].ChildNodes[1].ChildNodes[1].ChildNodes[1].ChildNodes[0].ChildNodes[0].InnerText.Replace("Out of", " De ");
                }
                catch
                {
                    /////////////si no los encuentra se le ponen valores por defecto
                    info.descargas = "0";
                    info.votos     = "0 de 5";
                }



                //  info.votos=
                return(info);
            }
            else
            {
                return(new Models.rominfo());
            }
        }
        public object getData(string url)
        {
            HtmlAgilityPack.HtmlWeb      web = new HtmlAgilityPack.HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = web.Load(url);
            var nodes = doc.DocumentNode.SelectNodes("//a[@class='itemDetail']");

            return(nodes);
        }
Example #54
0
        public IPXHtmlDocument CreateHtmlDocument(string url)
        {
            var web = new HtmlAgilityPack.HtmlWeb();

            var webWrapper = new HtmlWebWrapper(web);

            return(webWrapper.Load(url));
        }
Example #55
0
        public void storedata()
        {
            //url 변수
            try
            {
                //첫페이지의 데이터를 수집한다.
                web       = new HtmlAgilityPack.HtmlWeb();
                document  = web.Load(naverlink);
                document3 = web.Load(naverlink);
                collectdata(document);

                //나머지페이지의 데이터를 수집한다.
                int    index3 = 0;
                int    index4 = 0;
                int    tmp2   = naverlink.IndexOf("=") + 1;
                int    tmp3   = naverlink.IndexOf("&");
                int    tmp4   = naverlink.IndexOf("query=") + 6;
                int    tmp5   = naverlink.Length;
                String nvMid  = naverlink.Substring(tmp2, tmp3 - tmp2);
                String query  = naverlink.Substring(tmp4, tmp5 - tmp4);
                String page;
                String url;
                var    VARIABLES = document.DocumentNode.SelectSingleNode(".//div[@class='co_paginate']").Descendants().Where(x => x.Name == "a");

                foreach (var VARIABLE in VARIABLES)
                {
                    page   = VARIABLE.GetAttributeValue("onclick", "");
                    index3 = page.IndexOf("(") + 1;
                    index4 = page.IndexOf(",");
                    page   = page.Substring(index3, index4 - index3);
                    url    = "http://shopping.naver.com/detail/section_price_compare.nhn?nvMid=" + nvMid +
                             "&pkey=0&pkey2=0&mallSeq=all&fee=all&page=" + page + "&frm=NVSHATC&query=" + query;
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                    request.Method  = "GET";
                    request.Referer = "http://shopping.naver.com/detail/detail.nhn?nv_mid=9535864708&cat_id=50000151&frm=NVSHATC&query=%EC%82%BC%EC%84%B1%EC%A0%84%EC%9E%90+%EB%85%B8%ED%8A%B8%EB%B6%819+metal+NT900X3L-K58S";
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                    StreamReader    reader   = new StreamReader(response.GetResponseStream());
                    document3.LoadHtml(reader.ReadToEnd());
                    collectdata(document3);
                }
            }
            catch (WebException e)
            {
                Console.WriteLine("네이버url 변수 WebException");
            }
            catch (HtmlWebException e)
            {
                Console.WriteLine("네이버url 변수 WebException");
            }
            catch (UriFormatException e)
            {
                Console.WriteLine("네이버url 변수 WebException");
            }
            catch (NullReferenceException e)
            {
                Console.WriteLine("네이버url 변수 NullReferenceException");
            }
        }
Example #56
0
        public Product Scrape(Uri uri)
        {
            var          web = new HtmlAgilityPack.HtmlWeb();
            HtmlDocument doc = web.Load(uri);

            var x = ExtractPrice(doc);

            return(new Product(uri, ExtractName(doc)));
        }
Example #57
0
        static void Main(string[] args)
        {
            string url = string.Empty;

            List <Acao> listaAcao = new List <Acao>();
            Acao        a         = new Acao();

            url = "https://www.fundamentus.com.br/detalhes.php";
            HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb();
            web.CacheOnly  = false;
            web.CachePath  = null;
            web.UsingCache = false;
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc = web.Load(url);

            int i = 0;

            foreach (HtmlNode row in doc.DocumentNode.SelectNodes("//table[@id='test1']/tbody/tr/td"))
            {
                if (i == 0)
                {
                    a       = new Acao();
                    a.papel = row.InnerText;
                    i++;
                }
                else if (i == 1)
                {
                    a.nomeComercial = row.InnerText;
                    i++;
                }
                else if (i == 2)
                {
                    a.razaoSocial = row.InnerText;
                    listaAcao.Add(a);
                    i = 0;
                }

                Console.WriteLine(row.InnerText);
            }

            Console.ReadLine();

            string path = @"C:\Users\Yuri\Desktop\Stockbook\SQL\v2\listaAcoes.txt";

            if (!File.Exists(path))
            {
                // Create a file to write to.
                using (StreamWriter sw = File.CreateText(path))
                {
                    foreach (var item in listaAcao)
                    {
                        //sw.WriteLine("INSERT INTO tb_empresa (nome_comercial, razao_social) VALUES('" + item.nomeComercial + "', '" + item.razaoSocial + "')");
                        //sw.WriteLine("INSERT INTO tb_acao (ticker, id_empresa) VALUES('" + item.papel + "', (SELECT id_empresa FROM tb_empresa WHERE razao_social = '" + item.razaoSocial + "'))");
                    }
                }
            }
        }
Example #58
0
        public static string ObtainFaceitElo(string playerName)
        {
            string       url = $"https://faceitstats.com/player/{playerName}";
            var          web = new HtmlAgilityPack.HtmlWeb();
            HtmlDocument doc = web.Load(url);

            string elo = doc.DocumentNode.SelectNodes("//*[@id=\"app\"]/main/div/div[1]/div[2]/div[1]/div/div[1]/h5")[0].InnerText;

            return(elo);
        }
Example #59
0
 public HtmlDocument GetDocument()
 {
     HtmlAgilityPack.HtmlWeb doc1 = new HtmlAgilityPack.HtmlWeb();
     doc1.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)";
     HtmlAgilityPack.HtmlDocument doc2 = doc1.Load(Url);
     doc2.OptionOutputAsXml           = true;
     doc2.OptionAutoCloseOnEnd        = true;
     doc2.OptionDefaultStreamEncoding = System.Text.Encoding.UTF8;
     return(doc2);
 }
Example #60
0
        public static string ObtainLastMatchScore(string playerName)
        {
            string       url = $"https://faceitstats.com/player/{playerName}";
            var          web = new HtmlAgilityPack.HtmlWeb();
            HtmlDocument doc = web.Load(url);

            string score = doc.DocumentNode.SelectNodes("//*[@id=\"app\"]/main/div/div[7]/div/table/tbody/tr[1]/td[3]")[0].InnerText;

            return(score);
        }