public datascraper() { string url = @"http://www.bbc.co.uk/sport/football/results/partial/competition-118996114"; HtmlWeb htmlWeb = new HtmlWeb(); HtmlDocument doc = new HtmlDocument{ OptionUseIdAttribute = true }; doc = htmlWeb.Load(url); HtmlNodeCollection mtchrslts = doc.DocumentNode.SelectNodes("//tr[@id]"); string date; string ateam; string hteam; string score; string idmess; string idnum; string[] teamscores; string teamscoreh; string teamscorea; foreach (HtmlNode matchresult in mtchrslts) { idmess = matchresult.SelectSingleNode("//tr[@id]").Id; idnum = idmess.Replace("match-row-", ""); score = matchresult.SelectSingleNode("//abbr[@title='Score']").InnerText; teamscores = score.Split('-'); teamscoreh = teamscores[0]; teamscorea = teamscores[1]; hteam = matchresult.SelectSingleNode("//p[(@class='team-home teams')]").InnerText; ateam = matchresult.SelectSingleNode("//p[(@class='team-away teams')]").InnerText; date = matchresult.SelectSingleNode("//td[(@class='match-date')]").InnerText; } return; }
private void button1_Click(object sender, EventArgs e) { //antes de esto inspeccionar en la web y poner control + F para hallar lo que se necesita en el selectedNode HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = web.Load("https://coincost.net/es/currencies"); foreach (var item in doc.DocumentNode.SelectNodes("//td [@class = 'price'] //p")) { preciosMalo.Add(item.InnerText); } for (int n = 0; n < preciosMalo.Count; n = n + 2) { string usd = preciosMalo[n]; string numero = usd.Substring(4); precios.Add(numero); listBox1.Items.Add(numero); } listBox1.Items.Add("-------------------------------------"); foreach (var item in doc.DocumentNode.SelectNodes("//td [@class = 'title'] //span")) { nombresMalo.Add(item.InnerText); } for (int d = 1; d < nombresMalo.Count; d = d + 2) { nombres.Add(nombresMalo[d]); listBox1.Items.Add(nombresMalo[d]); } //buscar ultimo valor = "//td[@class = 'right tar']" //buscar simbolo = "//b" }
public HtmlDocument ReadLink(string url) { HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb(); htmlWeb.UserAgent = "Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0"; HtmlAgilityPack.HtmlDocument htmlDoc = htmlWeb.Load(url); return(htmlDoc); }
public static void comicDown(string url, string path) { //https://www.comicextra.com/invincible-iron-man-2015/chapter-14 if (url.Substring(url.Length - 5) != "/full") { url = (url + "/full"); } HtmlAgilityPack.HtmlWeb hw = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = hw.Load(url); List <string> linky = new List <string>(); foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//img[@class='chapter_img']")) { //string.Empty string hrefValue = link.GetAttributeValue("src", ""); linky.Add(hrefValue); //System.Threading.Thread.Sleep(100); } foreach (var item in linky) { Console.WriteLine(item); } string refer = "referer: https://www.comicextra.com/invincible-iron-man-2015/chapter-14/full"; string FolderName = url.Substring(url.Length - 15); downloadFunctions.Download(linky, path, refer, FolderName); }
async public Task <List <VN> > GetVNSearch(string searchString) { List <VN> result = new List <VN>(); string url = "https://vndb.org/v/all?sq=" + searchString; //await VNDB.sem.WaitAsync(); try { var web = new HtmlAgilityPack.HtmlWeb(); web.CaptureRedirect = true; HtmlDocument doc = await web.LoadFromWebAsync(url); var searchList = doc.DocumentNode.SelectNodes("//table[@class='stripe']/tr"); foreach (var search in searchList) { string id = search.SelectSingleNode("./td[@class='tc1']/a").GetAttributeValue("href", ""); string japName = search.SelectSingleNode("./td[@class='tc1']/a").GetAttributeValue("title", ""); string engName = search.SelectSingleNode("./td[@class='tc1']").InnerText; string date = search.SelectSingleNode("./td[@class='tc4']").InnerText; VN vn = new VN(ExtractId(id), new Name(engName, japName), date); result.Add(vn); } } catch (Exception e) { } //VNDB.sem.Release(); return(result); }
static void Main(string[] args) { Console.WriteLine("Silahkan masukkan jenis buku..."); string searchKey = Console.ReadLine(); Console.WriteLine("Sedang mencari informasi buku..."); baseUrl = baseUrl.Replace("query=", "query=" + searchKey).Replace(" ", "+"); HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = web.Load(baseUrl); var bookName = doc.DocumentNode.SelectNodes("//a[@class='bookTitle']//span[@itemprop='name']").ToList(); var penulis = doc.DocumentNode.SelectNodes("//a[@class='authorName']//span[@itemprop='name']").ToList(); var rating = doc.DocumentNode.SelectNodes("//span[@class='minirating']").ToList(); foreach (var book in bookName) { bookList.Add(book.InnerText); } foreach (var writter in penulis) { penulisList.Add(writter.InnerText); } foreach (var rate in rating) { ratingList.Add(rate.InnerText); } showOutput(); Console.ReadLine(); }
public WebScraperService() { _web = new HtmlWeb(); // _htmlDoc = new HtmlAgilityPack.HtmlDocument(); _urlervice = new UrlService(); }
private void AddGithubJobs(string url, List<JobListing> jobListings) { HtmlWeb page = new HtmlWeb(); var document = page.Load(url); string baseURL = "https://jobs.github.com"; try { HtmlNodeCollection rows = document.DocumentNode .SelectSingleNode("//table[@class='positionlist']") .SelectNodes(".//td[@class='title']"); if (rows.Count > 0) { foreach (HtmlNode row in rows) { if (row.ChildNodes.Count == 5) { jobListings.Add(new JobListing() { SearchEngine = SearchEngines.GitHub, Title = row.ChildNodes[1].InnerText, Company = row.ChildNodes[3].ChildNodes[1].InnerText, URL = baseURL + row.ChildNodes[1].ChildNodes[0].Attributes[0].Value }); } } } } catch { } }
/// <summary> /// 通过HAP插件解析网页内容,获取Ranked歌曲列表 /// </summary> /// <param name="html">网页的文本内容</param> /// <returns>BeatMap列表</returns> private List<BeatMap> GetBeatMapsListByHAP(ConfigModel xpModel) { List<BeatMap> beatMaps = new List<BeatMap>(); beatMaps.Clear(); HtmlDocument hDoc = new HtmlWeb().Load(xpModel.WebRankListUrl); //加载网页,实例化对象。 HtmlNode rootNode = hDoc.DocumentNode; HtmlNodeCollection categoryNodeList = rootNode.SelectNodes(xpModel.CategoryListXPath); //定位HTML标签至遍历处,获取内容(包含BeatMapList列表集合) HtmlNode temp = null; BeatMap beatMap = null; foreach (HtmlNode categoryNode in categoryNodeList) { temp = HtmlNode.CreateNode(categoryNode.OuterHtml); //获取一个包含BeatMapList的内容。 //读出值: beatMap = new BeatMap(); beatMap.Id = temp.SelectSingleNode(xpModel.IdXPath).GetAttributeValue("Id", 0000); beatMap.Title = temp.SelectSingleNode(xpModel.TitleXPath).InnerText; beatMap.Artist = temp.SelectSingleNode(xpModel.ArtistXPath).InnerText; beatMap.Mapper = temp.SelectSingleNode(xpModel.MapperXPath).InnerText; beatMap.Styles = temp.SelectSingleNode(xpModel.StylesXPath).InnerText; beatMap.Language = temp.SelectSingleNode(xpModel.LanguageXPath).InnerText; if (temp.SelectSingleNode(xpModel.SbXPath) != null) { beatMap.Sb = temp.SelectSingleNode(xpModel.SbXPath).GetAttributeValue("class", "NoSb"); } else { beatMap.Sb = "NoSb"; } beatMaps.Add(beatMap); } return beatMaps; }
long NumOfHits(string phrase) { HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument htmlDoc = web.Load("https://www.google.com/search?q=" + phrase); if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0) { // Handle any parse errors as requiredcw System.Console.WriteLine("error"); debug.Print("error\n"); return(-1); } else if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode node = htmlDoc.DocumentNode.SelectSingleNode("//div[@id='resultStats']"); Regex re = new Regex(@"[1-9](?:\d{0,2})(?:,\d{3})*(?:\.\d*[1-9])?|0?\.\d*[1-9]|0"); String result = re.Match(node.InnerHtml).Value; long hits = 0; if (result.Contains(",")) { hits = long.Parse(result.Replace(",", "")); } //System.Console.WriteLine(hits); return(hits); } return(-1); }
static List <HtmlNode> ExtractPostsFromUrl(String url) { HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = web.Load(url); return(doc.DocumentNode.SelectNodes("//tr[@class='athing']").Cast <HtmlNode>().ToList()); }
async void getData(String url) { fragmentLoad.Visibility = Visibility.Visible; HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb(); try { htmlDoc = await htmlWeb.LoadFromWebAsync(url); getInfo(htmlDoc); HtmlNode _nod = htmlDoc.DocumentNode.SelectSingleNode(@"//div[@id='detailcontent']"); String html = _nod.InnerHtml.Replace("\t", ""); StringBuilder sb = new StringBuilder(html); sb.Replace("<br>", "\n"); sb.Replace(""", "\""); tblContent.Text = sb.ToString() + "\n\n\n\n\n"; checkPageState(htmlDoc); } catch (Exception) { MessageDialog md = new MessageDialog("Lỗi hệ thống , vui lòng thử lại sau"); md.ShowAsync(); } finally { fragmentLoad.Visibility = Visibility.Collapsed; } }
async void getData(String url, Mode mode) { fragmentLoad.Visibility = Visibility.Visible; if (mode == Mode.Refresh) { listNovels.Clear(); } HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb(); try { htmlDoc = await htmlWeb.LoadFromWebAsync(url); HtmlNode _nod = htmlDoc.DocumentNode.SelectSingleNode(@"//ul[@class='homeListstory']"); HtmlNodeCollection _mainNode = _nod.SelectNodes("li"); foreach (var node in _mainNode) { String name = node.SelectSingleNode("h3").SelectSingleNode("a").GetAttributeValue("title", null); String imgUrl = node.SelectSingleNode("a").SelectSingleNode("img").GetAttributeValue("src", null); String mainUrl = node.SelectSingleNode("h3").SelectSingleNode("a").GetAttributeValue("href", null); listNovels.Add(new Novel(name, imgUrl, mainUrl)); } fragmentGridView.lvHomePage.ItemsSource = listNovels; currentPage = Convert.ToInt16(htmlDoc.DocumentNode.SelectSingleNode(@"//a[@title='current-page']").InnerText); } catch (Exception) { MessageDialog md = new MessageDialog("Lỗi hệ thống , vui lòng thử lại sau"); md.ShowAsync(); } finally { fragmentLoad.Visibility = Visibility.Collapsed; } }
private IDictionary <string, string> getVideoUrls(string mainUrl) { var videoSources = new Dictionary <string, string>(); var web = new HAP.HtmlWeb(); var doc = web.Load(mainUrl); var videoSelectorTable = doc.DocumentNode.SelectSingleNode("//td[@id='embedcode']").ParentNode.ParentNode; foreach (var node in videoSelectorTable.SelectNodes("//a")) { if (node.Attributes.Contains("onclick")) { var jscriptCode = node.Attributes["onclick"].Value; const string pivot = "unescape('"; var srtIdx = jscriptCode.IndexOf(pivot) + pivot.Length; var endIdx = jscriptCode.IndexOf("'", srtIdx + 1); var redirectUrlEncoded = jscriptCode.Substring(srtIdx, endIdx - srtIdx); var redirectUrl = WebUtility.UrlDecode(redirectUrlEncoded); var scriptDoc = new HAP.HtmlDocument(); scriptDoc.LoadHtml(redirectUrl); string videoPageUrl = scriptDoc.DocumentNode.SelectSingleNode("//iframe").Attributes["src"].Value; videoSources.Add(node.InnerText, videoPageUrl); } } return(videoSources); }
public List <String> ReviewUrl(Source source) { var web = new HtmlAgilityPack.HtmlWeb(); var document = web.Load(source.Domain + source.Path); var page = document.DocumentNode; List <String> ListUrl = new List <string>(); foreach (var item in page.QuerySelectorAll(source.LinkSelector)) { try { var url = item.GetAttributeValue("href", ""); Debug.WriteLine(url); if (url != null && url != "") { if (url.StartsWith("/")) { url = source.Domain.TrimEnd('/') + url; } ListUrl.Add(url); } } catch (Exception ex) { Debug.WriteLine(ex); } } return(ListUrl); }
protected void Button1_Click(object sender, EventArgs e) { try { HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = web.Load(TextBox1.Text); var Articles = doc.DocumentNode.SelectNodes("//*[@class ='article-single']"); foreach (var article in Articles) { var header = HttpUtility.HtmlDecode(article.SelectSingleNode(".//li[@class='article-header']" + "\n").InnerText); var description = HttpUtility.HtmlDecode(article.SelectSingleNode(".//li[@class='article-copy']").InnerText); Response.Write("<Table>"); Response.Write("<td>"); Response.Write("Name - " + header); Response.Write("<br />"); Response.Write(" Description - " + description); Response.Write("<tr />"); Response.Write("<td />"); Response.Write("</Table>"); } } catch (Exception ex) { Response.Write(ex.Message); } }
//Public Methods public void Scrape(string url) { try { HtmlWeb hw = new HtmlWeb(); HtmlDocument doc = hw.Load(url); foreach(HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]")) { try { HtmlAttribute att = link.Attributes["href"]; Console.WriteLine(att.Value); this._results.Add(new Uri(att.Value)); } catch { } } } catch { //What Should I Do Here? //Maybe Nothing for Now } }
public void getCoworkerNames() { if (updateCoworkerWarningBoolean()) { try { var web = new HtmlAgilityPack.HtmlWeb(); var doc = web.Load("http://10.45.10.149/brdkServices/EmployeeDB/"); var nodes = doc.DocumentNode .SelectNodes("//*[@id=\"bootstrap-override\"]/div[1]/div/table/tbody//tr/td[1]").ToList(); var innerTexts = nodes.Select(node => node.InnerText).ToList(); for (int i = 0; i < innerTexts.Count(); i++) { if (i % 7 == 0) { names.Add(innerTexts[i]); } } setCoworkerstoFile(); } catch (System.Net.WebException) { getCoworkersFromFile(); } initDDL(); } else { return; } }
static HtmlNodeCollection GetSuburb(string URL) { HtmlWeb client = new HtmlWeb(); string suburbURL = System.Net.WebUtility.HtmlDecode(BASE + URL); HtmlDocument doc = client.Load(suburbURL); return doc.DocumentNode.SelectNodes("//table[@id='myTable']/tbody/tr/td[4]/a"); }
public void setName() { if (name != null) { return; } string newname; var web = new HtmlAgilityPack.HtmlWeb(); HtmlDocument doc = web.Load(this._urlLink); var h1 = doc.DocumentNode.SelectSingleNode("//h1"); var title = doc.DocumentNode.SelectSingleNode("//title"); if (h1 != null && !h1.HasChildNodes) { newname = h1.InnerHtml.Trim(); } else if (title != null && !title.HasChildNodes) { newname = title.InnerHtml.Trim(); } else { string n = Regex.Replace(urlLink, @"^((https:[/]*|http:[/]*)(www)*|(www.))[.]*", ""); string f = Regex.Replace(n, @"[.].*$", ""); newname = f; } name = newname; }
public static void GetText2() { List<string> outList = new List<string>(); string html = "https://yandex.by/search/?numdoc=10&p=0&rdrnd=601861&text=kinogo.co%20Один%20дома%201990%20&lr=157"; HtmlDocument HD = new HtmlDocument(); var web = new HtmlWeb { AutoDetectEncoding = false, OverrideEncoding = Encoding.UTF8 //GetEncoding("windows-1251") }; HD = web.Load(html); HtmlNodeCollection NoAltElements = HD.DocumentNode.SelectNodes("//div"); ///допилить if (NoAltElements != null) { foreach(HtmlNode node in NoAltElements) { string outputText = node.InnerHtml; Console.WriteLine(outputText); } } else Console.WriteLine("found nothing"); }
private static string[] PrepareTestData(string Filename) { if (File.Exists(Filename)) { return(File.ReadAllLines(Filename)); } else { Console.WriteLine("Preparing test data - reading..."); List <String> rs = new List <string>(); foreach (var i in GetAlphabet()) { Console.Write(i); string addr = String.Format("http://en.wikipedia.org/wiki/Index_of_Windows_games_({0})", i); HtmlAgilityPack.HtmlWeb w = new HtmlAgilityPack.HtmlWeb(); HtmlDocument d = w.Load(addr); rs.AddRange(d.DocumentNode.SelectNodes(GetAddress(i)).Select(t => t.InnerText)); } File.WriteAllLines(Filename, rs); Console.WriteLine("Done!"); return(rs.ToArray()); } }
public static void getHrefs(string url) { // try to fetch href values from a webpage try { // Create an instance of HtmlWeb HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlWeb(); // Creating an instance of HtmlDocument and loading the html source code into it. HtmlAgilityPack.HtmlDocument doc = htmlWeb.Load(url); // Adding the crawled url to the list of crawled urls VisitedPages.Add(url); // For each HTML <a> tag found in the document foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]")) { // Extract the href value from the <a> tag Uri l = new Uri(baseUrl, link.Attributes["href"].Value.ToString()); // check if the href value does not exist in the list or the queue and if it is a page of the url the user entered. if (!LinkQueue.Contains(l.ToString()) && !VisitedPages.Contains(l.ToString()) && l.Host.ToString() == baseUrl.Host.ToString()) { // Add the href value to the queue to get scanned. LinkQueue.Enqueue(l.ToString()); } } } catch { // return if anything goes wrong return; } }
// return remote page title from URI public static string GetTitleFromUri(string @remoteUri) { try { // try using Open Graph to get target page title var graph = OpenGraph.ParseUrl(@remoteUri, "Voat.co OpenGraph Parser"); if (!string.IsNullOrEmpty(graph.Title)) { var tmpStringWriter = new StringWriter(); HttpUtility.HtmlDecode(graph.Title, tmpStringWriter); return tmpStringWriter.ToString(); } // Open Graph parsing failed, try getting HTML TITLE tag instead HtmlWeb htmlWeb = new HtmlWeb(); HtmlDocument htmlDocument = htmlWeb.Load(@remoteUri); if (htmlDocument != null) { var titleNode = htmlDocument.DocumentNode.Descendants("title").SingleOrDefault(); if (titleNode != null) { return titleNode.InnerText; } } return null; } catch (Exception ex) { return null; } }
public void GetJobListFromWeb() { try { var htmlWeb = new HtmlWeb { OverrideEncoding = Encoding.GetEncoding("UTF-8") }; HtmlDocument htmlDoc = htmlWeb.Load(string.Format("http://sou.zhaopin.com/jobs/searchresult.ashx?jl={0}&kw={1}&p={2}", DataClass.GetDic_zhilian(_pars.Addr), _pars.Key, _pars.Page)); var nodeList = htmlDoc.DocumentNode.SelectNodes("//*[@id='newlist_list_content_table']/table[@class='newlist']") .AsParallel() .ToList(); for (int i = 1; i < nodeList.Count; i++) { var node = nodeList[i]; var job = new JobInfo(); job.TitleName = node.SelectSingleNode(".//tr/td[@class='zwmc']/div/a").InnerText; job.InfoUrl = node.SelectSingleNode(".//tr/td[@class='zwmc']/div/a").Attributes["href"].Value; job.Company = node.SelectSingleNode(".//tr/td[@class='gsmc']/a").InnerText; job.Salary = node.SelectSingleNode(".//tr/td[@class='zwyx']").InnerText; job.City = node.SelectSingleNode(".//tr/td[@class='gzdd']").InnerText; job.Date = node.SelectSingleNode(".//tr/td[@class='gxsj']/span").InnerText; job.Source = "智联招聘"; job.Method = "月薪"; _jobList.Add(job); } } catch (Exception ex) { LogSave.ErrLogSave("错误【解析】", ex); } }
private void PanoramaItem_Loaded(object sender, RoutedEventArgs e) { HtmlAgilityPack.HtmlWeb htmlDoc = new HtmlAgilityPack.HtmlWeb(); htmlDoc.LoadCompleted += new EventHandler<HtmlDocumentLoadCompleted>(htmlDocComplete); htmlDoc.LoadAsync("http://www.cnblogs.com/"); }
public static string getContent(string webAddress) { HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb(); HtmlDocument doc = web.Load(webAddress); return(string.Join(" ", doc.DocumentNode.Descendants().Select(x => x.InnerText))); }
/// <summary> /// Retrieves list of Houzz project IDs /// </summary> /// <param name="username">Houzz Username</param> /// <returns>List of project IDs</returns> public IEnumerable<int> GetProjectIds(string username) { var url = string.Format("http://www.houzz.com/projects/users/{0}", username); HtmlDocument htmlDoc = new HtmlWeb().Load(url); var nodes = htmlDoc.DocumentNode.SelectNodes("//div[@class='sidebar-body']//a"); if (nodes != null && nodes.Any()) { var links = nodes.Skip(1) // Skip "All Projects" .Select(e => e.GetAttributeValue("href", null).ToString()); var projects = new List<int>(); foreach (var link in links) { var splicedUrl = link.Split('/'); if (splicedUrl.Length >= 4) { projects.Add(Convert.ToInt32(splicedUrl[4])); } } return projects; } return null; }
private void btnDownloadImages_Click(object sender, EventArgs e) { HtmlAgilityPack.HtmlDocument chapterDoc = new HtmlWeb().Load(txtURL.Text); MessageBox.Show("Invalid URL"); HtmlAgilityPack.HtmlDocument pageDoc; HtmlNode pageNode; WebClient client = new WebClient(); string localFilename, chapterUrl, chapter; int page = 1; foreach (HtmlNode link in chapterDoc.DocumentNode.SelectNodes("//div[@class = 'detail_list']/ul/li/span/a")) { chapterUrl = link.Attributes["href"].Value; string nextPage = chapterUrl; while (nextPage != "javascript:void(0);") { chapter = link.InnerText.Replace("\r\n", "").Replace(" ", "").Replace(":", ""); pageDoc = new HtmlWeb().Load(nextPage); pageNode = pageDoc.DocumentNode.SelectSingleNode("//img[@id='image']"); localFilename = @"C:\Users\Anh\Desktop\New folder\Coding\Image2\" + chapter + "." + page + ".jpg"; client.DownloadFile(pageNode.Attributes["src"].Value, localFilename); nextPage = pageDoc.DocumentNode.SelectSingleNode("//a[@class = 'next_page']").Attributes["Href"].Value; page++; } page = 0; } }
private void addPageType(String pageUrl) { mCarTypeList.Clear(); HtmlDocument htmlDocument = new HtmlWeb().Load(WebConstants.BASE_URL + pageUrl); HtmlNodeCollection typeNodes = htmlDocument.DocumentNode.SelectNodes(WebConstants.TYPE_NODE); if (typeNodes != null) { foreach (HtmlNode tempNode in typeNodes) { HtmlNode typeNode = HtmlNode.CreateNode(tempNode.OuterHtml); CarType carType = new CarType(mCarFactory); HtmlNode nameNode = HtmlNode.CreateNode(typeNode.SelectSingleNode(WebConstants.TYPE_NAME).OuterHtml); carType.Name = nameNode.SelectSingleNode(WebConstants.LINK_HREF).InnerText; HtmlNode imageNode = HtmlNode.CreateNode(typeNode.SelectSingleNode(WebConstants.TYPE_IMAGE).OuterHtml); carType.ImageUrl = imageNode.SelectSingleNode(WebConstants.IMAGE_SRC).Attributes[WebConstants.SRC].Value; new Thread(new TypeImageDownloadTask(carType).Download).Start(); mCarTypeList.Add(carType); } } String priceUrl = pageUrl.Replace(WebConstants.PHOTO, WebConstants.PRICE); setPrice(priceUrl); }
void getData(String url, Mode mode) { if (mode == Mode.Refresh) { chapter.Clear(); } HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb(); htmlDoc = htmlWeb.Load(url); getInfo(htmlDoc); getSummary(htmlDoc); HtmlNode _nod = htmlDoc.DocumentNode.SelectSingleNode(@"//table[@class='table table-striped']"); HtmlNodeCollection _mainNode = _nod.SelectNodes("tr"); foreach (HtmlNode node in _mainNode) { HtmlNode chap = node.SelectSingleNode("td[2]"); HtmlNode n = node.SelectSingleNode("td[3]"); if (n != null) { String displayName = chap.SelectSingleNode("strong").InnerText + " : " + n.SelectSingleNode("a").InnerText; String chapterUrl = n.SelectSingleNode("a").GetAttributeValue("href", null); chapter.Add(new Chapter(displayName, chapterUrl)); } } currentPage = Convert.ToInt16(htmlDoc.DocumentNode.SelectSingleNode(@"//a[@title='current-page']").InnerText); lvChapter.ItemsSource = chapter; checkNextPage(htmlDoc); }
public List<TimetableItem> GetTimetableForYear(StudyYear year, HalfYear halfYear = HalfYear.None) { List<TimetableItem> timetable; string tempYear = Enum.GetName(typeof(StudyYear), year); string tempHalfYear = Enum.GetName(typeof(HalfYear), halfYear); if (tempHalfYear == "None") tempHalfYear = String.Empty; try { HtmlWeb hw = new HtmlWeb(); HtmlDocument doc = hw.Load(String.Format("http://thor.info.uaic.ro/~orar/participanti/orar_{0}{1}.html", tempYear, halfYear)); doc.DocumentNode.InnerHtml = doc.DocumentNode.InnerHtml.Replace("\r\n", ""); timetable = ParseTable(doc, TimetableType.Year); } catch (WebException ex) { Logger.ExceptionLogger.Log(ex); timetable = null; } catch (NotSupportedException ex) { Logger.ExceptionLogger.Log(ex); timetable = null; } return timetable; }
public static IEnumerable<MangaData> getNews(Source source) { var web = new HtmlAgilityPack.HtmlWeb(); web.AutoDetectEncoding = true; var htmlMainDoc = web.Load(@"http://www.mangahere.com/latest/"); var itemsManga = htmlMainDoc.DocumentNode.SelectNodes(@"/html/body/section[@class='page_main']/div[@class='latest_released']/div[@class='manga_updates']/dl"); for (int i = itemsManga.Count-1; i >+0; i--) { var itemManga = itemsManga[i]; MangaData manga = new MangaData(source, true); var mangaNode = itemManga.SelectSingleNode(@"dt"); var mangaName = mangaNode.SelectSingleNode("a").InnerHtml; var mangaDetailLink = mangaNode.SelectSingleNode("a").GetAttributeValue("href", ""); manga.DetailMangaSource = source.CreateDetailMangaSource(manga, mangaDetailLink); manga.Name = mangaName; foreach (var itemChapter in itemManga.SelectNodes("dd")) { ChapterData chapter = new ChapterData(); //var matches = Regex.Matches(itemChapter.SelectSingleNode("a").InnerText, @"\d+"); //var chapterName = matches[matches.Count - 1].Value; var chapterName = itemChapter.SelectSingleNode("a").InnerText; var chapterLink = itemChapter.SelectSingleNode("a").GetAttributeValue("href", ""); chapter.Name = chapterName; chapter.ChapterSource = source.CreateChapterSource(chapterLink); manga.ChaptersData.Add(chapter); } yield return manga; } }
public static ChapterData getChapters(Source source, string link) { ChapterData chapter = new ChapterData(); var web = new HtmlAgilityPack.HtmlWeb(); web.AutoDetectEncoding = true; var htmlpage1 = web.Load(link); var pages = new List<IObservable<HtmlDocument>>(); pages.Add(Observable.Return(htmlpage1)); var linksToPages = htmlpage1.DocumentNode.SelectNodes(@"/html/body/section[@class='readpage_top']/div[@class='go_page clearfix']/span[@class='right']/select[@class='wid60']/option"); for (int i = 1; i < linksToPages.Count; i++) { var linkToPage=linksToPages[i].GetAttributeValue("value", ""); pages.Add(Observable.Start<HtmlDocument>( ()=>{ var web2 = new HtmlAgilityPack.HtmlWeb(); web.AutoDetectEncoding = true; return htmlpage1 = web.Load(linkToPage); } )); } foreach (IObservable<HtmlDocument> item in pages) { HtmlDocument pagehtml = item.Wait(); chapter.Images.Add(pagehtml.DocumentNode.SelectSingleNode(@"/html/body/section[@id='viewer']/a/img[@id='image']/@src").GetAttributeValue("src","")) ; } return chapter; }
public static HtmlDocument Crawl(string url) { HtmlWeb hw = new HtmlWeb(); HtmlDocument html = hw.Load(url); return html; }
public static List<string> GetChapterUrls(string url) { HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlWeb(); HtmlAgilityPack.HtmlDocument htdoc = htmlWeb.Load(url); List<string> ret = new List<string>(); IEnumerable<HtmlAgilityPack.HtmlNode> selectList = htdoc.DocumentNode.Descendants("select") .Where(x => x.Attributes["class"].Value == "selectBox"); if (selectList.ToList().Count == 0) return null; if (selectList == null) return null; var selectElement = selectList.Single(); foreach (var cNode in selectElement.ChildNodes) { if (cNode.Name == "option") { ret.Add(cNode.GetAttributeValue("value", "NO_URL")); } } //cleanups ret.Remove("#"); return ret; }
public static List<string> getNameOfEmail(string url) { List<string> a = new List<string>(); HtmlWeb website = new HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = website.Load(url); HtmlNodeCollection authors = new HtmlNodeCollection(doc.DocumentNode.ParentNode); ; authors = doc.DocumentNode.SelectNodes(".//li[@itemprop='author']"); if (!Directory.Exists(@"C:\Springer\")) { Directory.CreateDirectory(@"C:\Springer\"); } using (StreamWriter outputFile = new StreamWriter(@"C:\Springer\Springer Emails.txt", true)) { if (authors != null) { foreach (HtmlNode author in authors) { HtmlNode Name = author.SelectSingleNode(".//a[@class='person']"); HtmlNode EMail = author.SelectSingleNode(".//a[@class='envelope']"); if (EMail != null) { outputFile.WriteLine(Name.InnerText + " - " + EMail.Attributes["title"].Value); } } } } return a; }
protected override void OnCreate (Bundle savedInstanceState) { base.OnCreate (savedInstanceState); SetContentView (Resource.Layout.Main); TextView textView = FindViewById<TextView> (Resource.Id.TEXT_STATUS_ID); HtmlWeb web = new HtmlWeb(); HtmlDocument doc = web.Load("https://www.ltd.org/system-map/route_79x/"); HtmlNodeCollection tags = doc.DocumentNode.SelectNodes("//td"); foreach (HtmlNode item in tags) { textView.Text = textView.Text + item.InnerHtml +"\n"; } textView.Text = Regex.Replace(textView.Text, @"<[^>]*>", String.Empty); Button button = FindViewById<Button> (Resource.Id.myButton); button.Click += delegate { //button.Text = string.Format ("{0} clicks!", count++); StartActivity(typeof(Page2)); }; }
async void getData(String url, Mode mode) { fragmentLoad.Visibility = Visibility.Visible; if (mode == Mode.Refresh) { listNovels.Clear(); } HtmlAgilityPack.HtmlWeb htmlWeb = new HtmlAgilityPack.HtmlWeb(); htmlDoc = await htmlWeb.LoadFromWebAsync(url); HtmlNode _nod = htmlDoc.DocumentNode.SelectSingleNode(@"//ul[@class='homeListstory']"); HtmlNodeCollection _mainNode = _nod.SelectNodes("li"); if (_mainNode != null) { foreach (var node in _mainNode) { String name = node.SelectSingleNode("h3").SelectSingleNode("a").GetAttributeValue("title", null); String imgUrl = node.SelectSingleNode("a").SelectSingleNode("img").GetAttributeValue("src", null); String mainUrl = node.SelectSingleNode("h3").SelectSingleNode("a").GetAttributeValue("href", null); listNovels.Add(new Novel(name, imgUrl, mainUrl)); } if (realUrl == null || realUrl.Equals("")) { realUrl = htmlDoc.DocumentNode.SelectSingleNode(@"//link[@rel='canonical']").GetAttributeValue("href", null); } fragmentGridView.lvHomePage.ItemsSource = listNovels; currentPage = Convert.ToInt16(htmlDoc.DocumentNode.SelectSingleNode(@"//a[@title='current-page']").InnerText); fragmentLoad.Visibility = Visibility.Collapsed; } }
public static Bilinformation HentBilinformation(string nummerplade) { try { Bilinformation bilinformation = new Bilinformation(); string html = "http://www.nummerplade.net/soeg/?regnr=" + nummerplade; HtmlWeb web = new HtmlWeb(); HtmlDocument page = web.Load(html); if (page.DocumentNode != null) { bilinformation.Maerke = page.DocumentNode.SelectSingleNode("//td[@id='maerke']").InnerText; bilinformation.Model = page.DocumentNode.SelectSingleNode("//td[@id='model']").InnerText; bilinformation.Variant = page.DocumentNode.SelectSingleNode("//td[@id='variant']").InnerText; bilinformation.Stelnummer = page.DocumentNode.SelectSingleNode("//td[@id='stelnr']").InnerText; bilinformation.Aargang = page.DocumentNode.SelectSingleNode("//td[@id='model_aar']").InnerText; bilinformation.Nummerplade = page.DocumentNode.SelectSingleNode("//td[@id='regnr']").InnerText; } return bilinformation; } catch (Exception ex) { throw new IngenBilinformationException("Der blev ikke fundet nogen bilinformation på nummerpladen.", ex); } }
static void Main(string[] args) { var web = new HtmlWeb(); var doc = web.Load("https://ua.linkedin.com/in/kirillmiroshnichenko"); var name = doc.DocumentNode.SelectNodes("//span[@class='full-name']"); Print(name); var summary = doc.DocumentNode.SelectNodes("//p[@class='description']"); Print(summary); var skills = doc.DocumentNode.SelectNodes("//span[@class='skill-pill']"); Print(skills); Console.WriteLine("-------------"); string[] values = new string[] {"experience", "courses","projects","certifications", "languages", "education","interests", "patents","publications","honors","test-scores","organizations","volunteering"}; foreach (var item in values) { Info(doc, item); } Console.ReadLine(); }
//find out all the plumber information in a city private void ExtractCity(string state, string city) { HtmlWeb web = new HtmlWeb(); string cityUrl = RootUrl + @"/" + state + @"/" + city + @"?" + @"page=1&ipp=All"; HtmlDocument doc = web.Load(cityUrl); var linksOnPage = from lnks in doc.DocumentNode.Descendants() where lnks.Name == "a" && lnks.Attributes["href"] != null && lnks.InnerText.Trim().Length > 0 select lnks; foreach (var li in linksOnPage) { if (li.InnerText == "Phone") { string phone, name, address; phone = li.ParentNode.NextSibling.InnerText; Console.WriteLine(); Console.WriteLine("phone: "+phone); name = li.ParentNode.ParentNode.ParentNode.ParentNode.FirstChild.NextSibling.InnerText.Split('\n')[1].Trim(); address = li.ParentNode.ParentNode.ParentNode.ParentNode.FirstChild.NextSibling.InnerText.Split('\n')[2].Trim(); Console.WriteLine("name: "+ name); Console.WriteLine("address: " + address); file.WriteLine(name + "," + address + "," + city.Replace("-plumbers","") + "," + phone.Replace(" ", "")); } } }
public IEnumerable<Podcast> GetLatestPodcasts(int pageNumber) { var hw = new HtmlWeb(); hw.OverrideEncoding = Encoding.GetEncoding("ISO-8859-2"); var doc = hw.Load("http://www.tok.fm/TOKFM/0,94037.html?str=" + pageNumber.ToString(CultureInfo.InvariantCulture)); doc.OptionOutputAsXml = true; doc.OptionCheckSyntax = true; doc.OptionFixNestedTags = true; var sb = new StringBuilder(); var stringWriter = new StringWriter(sb); doc.Save(stringWriter); var page = sb.ToString(); var stringReader = new StringReader(page); doc.Load(stringReader); var result = new List<Podcast>(); foreach(HtmlNode link in doc.DocumentNode.SelectNodes("//a[@class='tokfm_play']")) { var imgNode = link.SelectSingleNode("img"); var imageURL = String.Empty; if (imgNode != null) imageURL = imgNode.Attributes["src"].Value; result.Add(new Podcast { Href = link.Attributes["href"].Value, Title = link.Attributes["title"].Value, ImageURL = imageURL }); } return result; }
public string getBibTex(string url) { string res = "", temp = ""; HtmlWeb web; HtmlDocument doc; HtmlNode n; if (url.Contains("viewdoc"))//e.g. http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.3487 { web = new HtmlWeb(); doc = web.Load(url); if (doc != null) Console.WriteLine("Document Loaded!"); else Console.WriteLine("Load Error!"); try { if ((n = doc.DocumentNode.SelectSingleNode("//*[@id=\"bibtex\"]/p")) != null) { temp = n.InnerText; temp = temp.Replace(",", ",\n").Replace(" ", " "); } } catch (Exception e) { } res = temp; return res; } else//e.g. http://citeseer.ist.psu.edu/showciting?cid=2131272 return res; }
static void Main() { string mainUrl = "http://urfu.ru/"; string toSchedule = "student/schedule/schedule/list/institute/"; string getInstitutes = "student/schedule/schedule/list/group/institute"; string getGroups = "student/schedule/schedule/list/lesson/institute"; var webGet = new HtmlAgilityPack.HtmlWeb(); var doc = webGet.Load(mainUrl + toSchedule); var listInstituts = doc.DocumentNode.SelectNodes("//a").Where(item => item.Attributes["href"].Value.StartsWith(getInstitutes)); //foreach (var institut in listInstituts) var institut = listInstituts.Skip(7).First(); { Console.WriteLine(institut.InnerHtml); doc = webGet.Load(mainUrl + institut.Attributes["href"].Value); var listGroups = doc.DocumentNode.SelectNodes("//a").Where(item => item.Attributes["href"].Value.StartsWith(getGroups)); foreach (var group in listGroups) { Console.WriteLine(group.InnerHtml); doc = webGet.Load(mainUrl + group.Attributes["href"].Value); var schedule = doc.DocumentNode.SelectNodes("//table");//.Where(item => item.Attributes["class"]) new Schedule(schedule); } } //doc = webGet.Load("http://urfu.ru/student/schedule/schedule/list/lesson/institute/6/sch_group/419/week/odd/semi_semester/2/"); //var schedule = doc.DocumentNode.SelectNodes("//table").Where(item => item.Attributes["class"] != null); //new Schedule(schedule); }
public static string GetFromTerra(string artist, string title) { string rep = string.Empty; artist = (artist + "").ToLowerInvariant(); title = (title + "").ToLowerInvariant(); //Obter a letra da música HtmlWeb web = new HtmlWeb(); HtmlDocument doc = web.Load(string.Format("http://letras.mus.br/winamp.php?t={0}-{1}", HttpUtility.UrlEncode(artist, ISOEncoding), HttpUtility.UrlEncode(title, ISOEncoding))); HtmlNode node = doc.DocumentNode.SelectSingleNode("//div[@id='letra']/p"); //Se encontrar a letra, retorna if (node == null && (artist.Contains("&") || title.Contains("&"))) { artist = artist.Replace('&', 'e'); title = title.Replace('&', 'e'); return GetFromTerra(artist, title); } node.InnerHtml = node.InnerHtml.Replace("<br>", "\r\n"); rep = WebUtility.HtmlDecode(node.InnerText); return rep; }
/// <summary> /// WebCrawl facebook to get likes from ordbogen.com page /// </summary> /// <returns>int</returns> public int GetFaceBookLikes() { int numOfLikes = 0; string searchStart = "omBeskedDelMere"; string searchEnd = " "; try { HtmlDocument doc = new HtmlWeb().Load("https://m.facebook.com/ordbogen"); if (doc != null) { var divNodes = doc.DocumentNode.SelectNodes("//div"); foreach (var div in divNodes) { if (div.InnerText.Contains("personer synes godt om dette")) { int start = div.InnerText.IndexOf(searchStart, 0) + searchStart.Length; int end = div.InnerText.IndexOf(searchEnd, start); string number = div.InnerText.Substring(start, end - start); int.TryParse(number, out numOfLikes); return numOfLikes; } } return -1; } else { return -1; } } catch (Exception) { return -3; } }
protected override string _GetSerieMiniatureUrl(Serie serie) { var web = new HtmlWeb(); var doc = web.Load(serie.URL); var img = doc.DocumentNode.SelectSingleNode("//div[@id='series_info']/div[@class='cover']/img"); return img.GetAttributeValue("src", ""); }
//query gametracker by map public static List<string> GetServersFromMap(List<string> list, string map) { HtmlWeb htmlWeb = new HtmlWeb(); // Creates an HtmlDocument object from an URL HtmlAgilityPack.HtmlDocument document = htmlWeb.Load("http://www.gametracker.com/search/dota2/?search_by=map&query="+map.Trim()+"&searchipp=50"); var query = from table in document.DocumentNode.SelectNodes("//table").Cast<HtmlNode>() from row in table.SelectNodes("tr").Cast<HtmlNode>() from cell in row.SelectNodes("td").Cast<HtmlNode>() select new { Table = table.Id, CellText = cell.InnerText, CellClass = cell.Attributes }; string rep = ""; bool started = false; bool stopped = true; foreach (var cell in query) { if (cell.CellText.Contains("Rank&darr")) { stopped = !stopped; started = false; } if (started && !stopped) { list.Add(cell.CellText.Trim()); } if (cell.CellText.Contains("Server Map ")) { started = true; } } return list; }
public void crawlingPhase(CloudQueueMessage urlMessage) { totalUrls++; String url = urlMessage.AsString; if (!alreadyVisitedUrls.Contains(url)) { alreadyVisitedUrls.Add(url); try { HtmlWeb hw = new HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc = hw.Load(url); String date = ""; if (doc.DocumentNode.SelectSingleNode("//head/meta[@property='og:pubdate']") != null) { String stringDate = doc.DocumentNode.SelectSingleNode("//head/meta[@property='og:pubdate']").GetAttributeValue("content", "default").Substring(0, 10); date = DateTime.ParseExact(stringDate, "yyyy-MM-dd", CultureInfo.InvariantCulture).ToString("yyyy/MM/dd"); } String fullTitle = doc.DocumentNode.SelectSingleNode("//head/title").InnerText; String[] titles = fullTitle.Split(' '); foreach (string partTitle in titles) { if (!partTitle.Equals(" ") && !partTitle.Equals("-") && !partTitle.Equals("CNN.com") && !partTitle.Equals("")) { CrawlerEntry entry = new CrawlerEntry(url, fullTitle, date, partTitle); TableOperation insertOperation = TableOperation.Insert(entry); table.Execute(insertOperation); tableSize++; } } //get urls in page foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]")) { string href = link.GetAttributeValue("href", string.Empty); HashSet<String> links = new HashSet<String>(); String[] hrefSplit = href.Split('/'); String html = hrefSplit[hrefSplit.Length - 1]; //if the href is not in the disallowed urls, is not already crawled, is not a duplicate link, is a valid html page, and on cnn or bleacherreport if (!disallowedUrls.Any(s => href.Contains(s)) && !alreadyVisitedUrls.Any(s => s.Equals(href)) && !links.Contains(href) && rgx.IsMatch(html) && (href.Contains("cnn.com") || href.Contains("bleacherreport.com"))) { //store remaining into queue urlQueue.AddMessage(new CloudQueueMessage(href)); //adds link to current link set links.Add(href); } } } catch { } } updateTotalUrls(); //Update last 10 urls crawled updateLastUrl(urlMessage.AsString); urlQueue.DeleteMessage(urlMessage); }
public void SearchLinks(string gametitle, string imagetype, string searchstring) { searchstring = searchstring.Replace(" ", "%20"); var url = "https://www.qwant.com/?q=" + searchstring + "&t=images"; try { HtmlAgilityPack.HtmlWeb hw = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = hw.Load(url); foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//img")) { string imgValue = link.GetAttributeValue("src", string.Empty); string[] imgLink = imgValue.Split('='); string imglink = imgLink[1].Replace("%3A", ":"); imglink = imglink.Replace("%2F", "/"); imglink = imglink.Remove(imglink.Length - 2); imgValue = "http:" + imgValue; imgValue = imgValue.Substring(0, imgValue.LastIndexOf("&q=")); searchlist.Add(new SearchResults { Thumbnail = imgValue, Image = imglink }); } SearchList = searchlist; ObservableList(); } catch (Exception e) { Console.WriteLine("Error: " + e); } }
public async Task <Models.rominfo> getrominfo(string link) { var doc2 = new HtmlAgilityPack.HtmlWeb(); /////////////se busca la pagina de info de el rom var htmlDoc2 = await doc2.LoadFromWebAsync(link); //////////////////esta pagina si es valida no puede contener 404 ya que el response llega ok pero no tiene nada en las tablas //////////////////lo cual podria provocar futuros crashes if (!htmlDoc2.Text.Contains("404 Page Not Found")) { //////////////se selecciona el 2do div de la pagina var nodelo = htmlDoc2.DocumentNode.SelectNodes("//div")[1]; var klowa = nodelo.SelectNodes("//*[contains(@class,'table table-striped rom-info')]").Where(aaxx => aaxx.Name == "table").First().ChildNodes.Where(aaxx => aaxx.Name == "tbody").First(); ////////////dentro de este se obtiene un inner text de una tabla que hay dentro de ese div el cual contiene la info de el rom var listaelementos = desencriptar(klowa.InnerText).Split(new[] { "^^^???**//" }, StringSplitOptions.None); Models.rominfo info = new Models.rominfo(); /////////////////////////////se busca directamente el elemento rom-link por su ide y se le agregan un par de cosas para hacerlo spliteable info.linkdescarga = htmlDoc2.GetElementbyId("rom-link").Attributes["href"].Value.Replace("&", "").Replace("&", "").Replace("token=", "&token=").Replace("id=", "&id=").Replace("name=", "&name="); ///////////////////////aqui se trata de buscar el id de el rom dentro de 2 parametros los cuales estan de la sig manera ///////////////////////&id=<id>&token=<token> info.id = info.linkdescarga.Split(new[] { "&id=" }, StringSplitOptions.None)[1].Split(new[] { "&token=" }, StringSplitOptions.None)[0].Replace("&", ""); //////////////////////////con los datos "desencriptados" se le agregan a la instancia de la clase de modelo info.nombre = listaelementos[0]; info.size = listaelementos[1]; info.region = listaelementos[2]; info.consola = listaelementos[3]; /////////////////////////se busca entre hijos la imagen y luego se ele extrae su href var imagen = nodelo.SelectNodes("//*[contains(@class,'product__img')]").Where(aaxx => aaxx.Name == "img"); info.imagen = imagen.First().Attributes["src"].Value; ////////////////////aqui se le extrae el info de descargas y votos si estos son existentes por eso estan dentro de un try catch try { info.descargas = listaelementos[4]; info.votos = nodelo.ChildNodes[2].ChildNodes[0].ChildNodes[1].ChildNodes[1].ChildNodes[1].ChildNodes[0].ChildNodes[0].InnerText.Replace("Out of", " De "); } catch { /////////////si no los encuentra se le ponen valores por defecto info.descargas = "0"; info.votos = "0 de 5"; } // info.votos= return(info); } else { return(new Models.rominfo()); } }
public object getData(string url) { HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb(); HtmlAgilityPack.HtmlDocument doc = web.Load(url); var nodes = doc.DocumentNode.SelectNodes("//a[@class='itemDetail']"); return(nodes); }
public IPXHtmlDocument CreateHtmlDocument(string url) { var web = new HtmlAgilityPack.HtmlWeb(); var webWrapper = new HtmlWebWrapper(web); return(webWrapper.Load(url)); }
public void storedata() { //url 변수 try { //첫페이지의 데이터를 수집한다. web = new HtmlAgilityPack.HtmlWeb(); document = web.Load(naverlink); document3 = web.Load(naverlink); collectdata(document); //나머지페이지의 데이터를 수집한다. int index3 = 0; int index4 = 0; int tmp2 = naverlink.IndexOf("=") + 1; int tmp3 = naverlink.IndexOf("&"); int tmp4 = naverlink.IndexOf("query=") + 6; int tmp5 = naverlink.Length; String nvMid = naverlink.Substring(tmp2, tmp3 - tmp2); String query = naverlink.Substring(tmp4, tmp5 - tmp4); String page; String url; var VARIABLES = document.DocumentNode.SelectSingleNode(".//div[@class='co_paginate']").Descendants().Where(x => x.Name == "a"); foreach (var VARIABLE in VARIABLES) { page = VARIABLE.GetAttributeValue("onclick", ""); index3 = page.IndexOf("(") + 1; index4 = page.IndexOf(","); page = page.Substring(index3, index4 - index3); url = "http://shopping.naver.com/detail/section_price_compare.nhn?nvMid=" + nvMid + "&pkey=0&pkey2=0&mallSeq=all&fee=all&page=" + page + "&frm=NVSHATC&query=" + query; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.Method = "GET"; request.Referer = "http://shopping.naver.com/detail/detail.nhn?nv_mid=9535864708&cat_id=50000151&frm=NVSHATC&query=%EC%82%BC%EC%84%B1%EC%A0%84%EC%9E%90+%EB%85%B8%ED%8A%B8%EB%B6%819+metal+NT900X3L-K58S"; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); StreamReader reader = new StreamReader(response.GetResponseStream()); document3.LoadHtml(reader.ReadToEnd()); collectdata(document3); } } catch (WebException e) { Console.WriteLine("네이버url 변수 WebException"); } catch (HtmlWebException e) { Console.WriteLine("네이버url 변수 WebException"); } catch (UriFormatException e) { Console.WriteLine("네이버url 변수 WebException"); } catch (NullReferenceException e) { Console.WriteLine("네이버url 변수 NullReferenceException"); } }
public Product Scrape(Uri uri) { var web = new HtmlAgilityPack.HtmlWeb(); HtmlDocument doc = web.Load(uri); var x = ExtractPrice(doc); return(new Product(uri, ExtractName(doc))); }
static void Main(string[] args) { string url = string.Empty; List <Acao> listaAcao = new List <Acao>(); Acao a = new Acao(); url = "https://www.fundamentus.com.br/detalhes.php"; HtmlAgilityPack.HtmlWeb web = new HtmlAgilityPack.HtmlWeb(); web.CacheOnly = false; web.CachePath = null; web.UsingCache = false; HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc = web.Load(url); int i = 0; foreach (HtmlNode row in doc.DocumentNode.SelectNodes("//table[@id='test1']/tbody/tr/td")) { if (i == 0) { a = new Acao(); a.papel = row.InnerText; i++; } else if (i == 1) { a.nomeComercial = row.InnerText; i++; } else if (i == 2) { a.razaoSocial = row.InnerText; listaAcao.Add(a); i = 0; } Console.WriteLine(row.InnerText); } Console.ReadLine(); string path = @"C:\Users\Yuri\Desktop\Stockbook\SQL\v2\listaAcoes.txt"; if (!File.Exists(path)) { // Create a file to write to. using (StreamWriter sw = File.CreateText(path)) { foreach (var item in listaAcao) { //sw.WriteLine("INSERT INTO tb_empresa (nome_comercial, razao_social) VALUES('" + item.nomeComercial + "', '" + item.razaoSocial + "')"); //sw.WriteLine("INSERT INTO tb_acao (ticker, id_empresa) VALUES('" + item.papel + "', (SELECT id_empresa FROM tb_empresa WHERE razao_social = '" + item.razaoSocial + "'))"); } } } }
public static string ObtainFaceitElo(string playerName) { string url = $"https://faceitstats.com/player/{playerName}"; var web = new HtmlAgilityPack.HtmlWeb(); HtmlDocument doc = web.Load(url); string elo = doc.DocumentNode.SelectNodes("//*[@id=\"app\"]/main/div/div[1]/div[2]/div[1]/div/div[1]/h5")[0].InnerText; return(elo); }
public HtmlDocument GetDocument() { HtmlAgilityPack.HtmlWeb doc1 = new HtmlAgilityPack.HtmlWeb(); doc1.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)"; HtmlAgilityPack.HtmlDocument doc2 = doc1.Load(Url); doc2.OptionOutputAsXml = true; doc2.OptionAutoCloseOnEnd = true; doc2.OptionDefaultStreamEncoding = System.Text.Encoding.UTF8; return(doc2); }
public static string ObtainLastMatchScore(string playerName) { string url = $"https://faceitstats.com/player/{playerName}"; var web = new HtmlAgilityPack.HtmlWeb(); HtmlDocument doc = web.Load(url); string score = doc.DocumentNode.SelectNodes("//*[@id=\"app\"]/main/div/div[7]/div/table/tbody/tr[1]/td[3]")[0].InnerText; return(score); }