public static List<DownloadData> ParseSjOrgSite(ShowData showData, out string firstcover, UploadCache uploadCache = null) { if(String.IsNullOrWhiteSpace(showData?.Url) || String.IsNullOrWhiteSpace(showData.Name)) throw new ArgumentNullException(); string nextpage = showData.Url; firstcover = ""; var episodes = new List<DownloadData>(); do { string cover; var a = ParseSite(showData, nextpage, out nextpage, out cover,uploadCache); if (firstcover == "") { firstcover = cover; } episodes.AddRange(a); } while (nextpage != ""); return episodes; }
private static List<DownloadData> ParseSite(ShowData showData, string url, out string nextpageurl, out string firstcover, UploadCache uploadCache) { nextpageurl = ""; firstcover = ""; WebResponse resp = null; for (int i = 0; i <= 7; i++) { try { HttpWebRequest req = WebRequest.CreateHttp(url); req.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; req.KeepAlive = false; resp = req.GetResponse(); break; } catch (WebException ex) { if (ex.Status != WebExceptionStatus.Timeout) throw; } } if (resp == null) { throw new TimeoutException(); } HtmlDocument doc = new HtmlDocument(); doc.OptionDefaultStreamEncoding=Encoding.UTF8; doc.Load(resp.GetResponseStream()); resp.Dispose(); List<DownloadData> list = new List<DownloadData>(); HtmlNode content = doc.GetElementbyId("content"); HtmlNode nextLink = content.SelectSingleNode("//div[@class='navigation']//a[@href][@class='next']"); if (nextLink != null) { nextpageurl = nextLink.GetAttributeValue("href", null); } var posts = content.SelectNodes("div[@class='post']"); if (posts == null) return list; foreach (var post in posts) { //--------------Season Title----------------------------------- var title = post.SelectSingleNode("h2/a[@href]"); if (title == null) { Console.WriteLine("SjInfo Parser: No Title"); continue; } var seasonData = new SeasonData(); seasonData.Show = showData; seasonData.Url = title.GetAttributeValue("href", null); seasonData.Title = WebUtility.HtmlDecode(title.InnerText); var postContent = post.SelectSingleNode("div[@class='post-content']"); if(postContent == null) { Console.WriteLine("SjInfo Parser: No Post content"); continue; } //----------------Season Cover------------------------------------------------ var cover = postContent.SelectSingleNode(".//p/img[@src]"); if (cover != null) { seasonData.CoverUrl = cover.GetAttributeValue("src", null); if (String.IsNullOrEmpty(firstcover)) { firstcover = seasonData.CoverUrl; } } //----------------Season description----------------------------------------- var desc = postContent.SelectSingleNode(".//p[count(node())=1][not(@class='post-info-co')]/text()"); if (desc != null) { seasonData.Description = WebUtility.HtmlDecode(desc.InnerText); } UploadData uploadData = null; var ps = postContent.SelectNodes(".//node()[self::p|self::div][count(strong)>=2]"); if (ps == null) { Console.WriteLine("SjInfo Parser: no uploads/headers"); continue; } foreach (var p in ps) { //--------------- Upload Header ------------------------------ if (p.SelectSingleNode("self::node()[not(./a[@target])]") != null) { uploadData = new UploadData(); uploadData.Season = seasonData; String c = WebUtility.HtmlDecode(p.InnerHtml); MatchCollection mc = new Regex("<strong>\\s*(.+?)\\s*</strong>\\s*(.+?)\\s*(?:\\||$)").Matches(c); foreach (Match match in mc) { String key = match.Groups[1].Value.ToLower(); String value = match.Groups[2].Value; if (key.Contains("dauer") || key.Contains("runtime") || key.Contains("duration")) { uploadData.Runtime = value; } else if (key.Contains("grösse") || key.Contains("größe") || key.Contains("size")) { uploadData.Size = value; } else if (key.Contains("uploader")) { uploadData.Uploader = value; } else if (key.Contains("format")) { uploadData.Format = value; } else if (key.Contains("sprache") || key.Contains("language")) { value = value.ToLower(); if (value.Contains("deutsch") || value.Contains("german")) { uploadData.Language |= UploadLanguage.German; } if (value.Contains("englisch") || value.Contains("english")) { uploadData.Language |= UploadLanguage.English; } if (value.Contains("subbed")) { uploadData.Subbed = true; } } } } else if (uploadData != null) { // ------------------ Links ------------------------- var ulTitle = p.SelectSingleNode("strong[position()=1][count(node())=1]/text()"); if (ulTitle == null) { Console.WriteLine("SjInfo Parser: No title for link? " + p.InnerHtml); continue; } string titleStr = WebUtility.HtmlDecode(ulTitle.InnerText).Trim(); var links = p.SelectNodes("a[@href][following-sibling::text()]"); if (links == null) continue; var downloads = new Dictionary<string, string>(); foreach (var link in links) { string ur = link.GetAttributeValue("href", null); string keyOrg = WebUtility.HtmlDecode(link.NextSibling.InnerText.Trim()); if(keyOrg.StartsWith("|")) keyOrg = keyOrg.Substring(1).Trim(); String key = keyOrg; int i = 1; while (downloads.ContainsKey(key)) { key = keyOrg + "(" + i++ + ")"; } downloads.Add(key, ur); } if (titleStr.Contains("720p")) { uploadData.Format = "720p"; } else if (titleStr.Contains("1080p")) { uploadData.Format = "1080p"; } else if (titleStr.Contains("720i")) { uploadData.Format = "720i"; } else if (titleStr.Contains("1080i")) { uploadData.Format = "1080i"; } DownloadData dd = new DownloadData(); dd.Upload = uploadCache == null ? uploadData : uploadCache.GetUniqueUploadData(uploadData); dd.Title = titleStr; if (titleStr.ToLower().Contains("subbed")) { dd.Upload.Subbed = true; } foreach (var download in downloads) { dd.Links.Add(download.Key, download.Value); } list.Add(dd); } else { Console.WriteLine("SjInfo Parser: UploadData was null"); } } } return list; }