// GET: Alarabiya public ActionResult Index() { ArrayList newsList = new ArrayList(); CQ dom = CQ.CreateFromUrl("http://www.alhurra.com/p/349.html"); newsData topNews = new newsData(); CQ tNews = dom.Find(".img-overlay").Eq(0).Find("a").Eq(0); if (tNews.Length != 0) { topNews.alt_url = tNews.Attr("href").ToString(); topNews.text = tNews.Attr("title").ToString(); } CQ cImg = tNews.Find("img").Eq(0); if (cImg.Length != 0) { topNews.url = cImg.Attr("data-src").ToString(); } newsList.Add(topNews); CQ mainArticle = dom.Find("#wrowblock-145_12").Eq(0).Find("li"); for (int i = 0; i < mainArticle.Length; i++) { CQ article = mainArticle.Eq(i).Find("a").Eq(0); newsData nData = new newsData(); nData.text = article.Attr("title").ToString(); nData.alt_url = article.Attr("href").ToString(); nData.url = ""; CQ img = article.Find("img").Eq(0); if (img.Length != 0) { nData.url = img.Attr("data-src").ToString(); } newsList.Add(nData); } ViewBag.newsList = newsList; return(View()); }
public ActionResult detail(string url) { WebClient webClient = new WebClient(); webClient.Headers.Add("user-agent", "Only a test!"); string strUrl = url; string pageContent = webClient.DownloadString(strUrl); CQ dom = pageContent; CQ article_body = dom.Find("div.pg-right-rail-tall").Eq(0).Find("article").Eq(0).Find("div.l-container").Eq(0); ViewBag.title = article_body.Children("h1").Text().ToString(); article_body = article_body.Children("div.pg-rail-tall__wrapper").Eq(0).Children("div.pg-side-of-rail").Eq(0); CQ temp = article_body.Find("div.media__video--thumbnail-wrapper").Eq(0); temp.Remove(); temp = article_body.Find(".el__leafmedia.el__leafmedia--storyhighlights").Eq(0); temp.Remove(); temp = article_body.Find(".el__embedded.el__embedded--standard"); temp.Remove(); temp = article_body.Find(".el__gallery--expandable.js__gallery--expandable.js__leafmedia--gallery"); temp.Remove(); temp = article_body.Find(".cn.cn-list-hierarchical-small-horizontal.cn--idx-0.cn-"); temp.Remove(); temp = article_body.Find("input.sharebar-video-embed-field"); temp.Remove(); temp = article_body.Find(".zn__containers"); temp.Remove(); temp = article_body.Find("img.media__image.media__image--responsive"); for (int i = 0; i < temp.Length; i++) { CQ t = temp.Eq(i); string str = t.Attr("data-src-large"); t.AttrSet(new { src = str }); } temp = article_body.Find(".cn__column.carousel__content__item"); temp.Remove(); ViewBag.text = article_body.RenderSelection().ToString(); return(View()); }
public List <MusicSummary> Search([FromUri] PageParams param) { List <MusicSummary> list = new List <MusicSummary>(); string url = $"http://music.baidu.com/search/song?s=1&key={param.Keyword ?? ""}&jump=0&start={(param.Current - 1) * param.Size}&size={param.Size}&third_type=0"; CQ csquery = Http.GetHttpValue(url); CQ items = csquery.Find("li.bb-dotimg"); for (int i = 0; i < items.Length; i++) { CQ item = items.Eq(i); CQ csTitle = item.Find("span.song-title a"); string title = csTitle.Attr("title"); string ids = Regex.Match(csTitle.Attr("data-songdata") ?? "", "\\d+").Value; MusicSummary summary = new MusicSummary { ArtistName = item.Find("span.author_list").Text().Trim(), AlbumName = item.Find("span.album-title").Text().Trim(), SongName = title, SongId = Convert.ToInt32(ids) }; list.Add(summary); } return(list); }
private static async Task DownloadMetadataAsync(string url, string jsonPath, ConcurrentDictionary <string, RarbgMetadata[]> allSummaries, int partitionIndex, Func <int, bool>? @continue = null) { @continue ??= _ => true; try { using IWebDriver webDriver = WebDriverHelper.Start(@$ "D:\Temp\Chrome Profile {partitionIndex}"); webDriver.Url = url; new WebDriverWait(webDriver, WebDriverHelper.DefaultWait).Until(e => e.FindElement(By.Id("pager_links"))); webDriver.Url = url; IWebElement pager = new WebDriverWait(webDriver, TimeSpan.FromSeconds(100)).Until(e => e.FindElement(By.Id("pager_links"))); int pageIndex = 1; do { if (!@continue(pageIndex)) { break; } await Task.Delay(WebDriverHelper.DefaultDomWait); Trace.WriteLine($"{partitionIndex}:{pageIndex} Start {webDriver.Url}"); CQ page = webDriver.PageSource; page .Find("table.lista2t tr.lista2") .Select(row => { CQ cells = row.Cq().Children(); string[] texts = cells.Eq(1).Text().Trim().Split(" ", StringSplitOptions.RemoveEmptyEntries).Where(text => !string.IsNullOrWhiteSpace(text)).ToArray(); string title = texts[0].Trim(); CQ links = cells.Eq(1).Find("a"); string baseUrl = new Uri(webDriver.Url).GetLeftPart(UriPartial.Authority); string link = $"{baseUrl}{links[0].GetAttribute("href")}"; string imdbId = links.Length > 1 ? links[1].GetAttribute("href").Replace("/torrents.php?imdb=", string.Empty).Trim() : string.Empty; string[] genres = new string[0]; string imdbRating = string.Empty; if (texts.Length > 1) { string[] descriptions = texts[1].Trim().Split(" IMDB: "); if (descriptions.Length > 0) { genres = descriptions[0].Split(", ").Select(genre => genre.Trim()).ToArray(); } if (descriptions.Length > 1) { imdbRating = descriptions[1].Replace("/10", string.Empty).Trim(); } } string image = links[0].GetAttribute("onmouseover")?.Replace(@"return overlib('<img src=\'", string.Empty).Replace(@"\' border=0>')", string.Empty) ?? string.Empty; int seed = int.TryParse(cells.Eq(4).Text().Trim(), out int seedValue) ? seedValue : -1; int leech = int.TryParse(cells.Eq(5).Text().Trim(), out int leechValue) ? leechValue : -1; return(new RarbgMetadata(link, title, imdbId, imdbRating, genres, image, cells.Eq(2).Text().Trim(), cells.Eq(3).Text().Trim(), seed, leech, cells.Eq(7).Text().Trim())); }) .ForEach(summary => { lock (AddItemLock) { allSummaries[summary.ImdbId] = allSummaries.ContainsKey(summary.ImdbId) ? allSummaries[summary.ImdbId].Where(existing => !string.Equals(existing.Title, summary.Title, StringComparison.OrdinalIgnoreCase)).Append(summary).ToArray() : new[] { summary }; } }); if (pageIndex++ % SaveFrequency == 0) { SaveJson(jsonPath, allSummaries); } Trace.WriteLine($"{partitionIndex}:{pageIndex} End {webDriver.Url}"); } while (webDriver.HasNextPage(ref pager)); webDriver.Close(); webDriver.Quit(); } catch (Exception exception) { Trace.WriteLine(exception); } finally { SaveJson(jsonPath, allSummaries); } }