예제 #1
        private static List <DownloadData> ParseSite(ShowData showData, string url, out string nextpageurl, out string firstcover, UploadCache uploadCache)
            nextpageurl = "";
            firstcover  = "";

            HttpWebRequest req = HttpWebRequest.CreateHttp(url);

            req.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
            var            resp       = req.GetResponse();
            BufferedStream buffStream = new BufferedStream(req.GetResponse().GetResponseStream());

            url = resp.ResponseUri.ToString();
            StreamReader reader = new StreamReader(buffStream);

            bool inContent           = false;
            bool inPost              = false;
            bool inPostContent       = false;
            List <DownloadData> list = new List <DownloadData>();
            Match      m;
            SeasonData seasonData = null;
            UploadData uploadData = null;

            while (!reader.EndOfStream)
                String line = reader.ReadLine();
                if (line.Contains("line-height") && line.Contains("&nbsp;")) //detect: <div style="line-height:5px;height:5px;background-color:lightgrey;">&nbsp;</div>
                    continue;                                                //I can only hope this will never occour in any other case..
                if (inContent)
                    if (inPost)
                        if (inPostContent)
                            if (seasonData == null)
                                Console.WriteLine("Warning: Invalid Html received while parsing " + showData.Name + ". Trying to continue");
                            if ((m = new Regex("<p>\\s*<strong>\\s*(.+?)\\s*</strong>\\s*[^/]*\\s*<br\\s?/>").Match(line)).Success)
                                //Debug.Assert(uploadData != null);
                                if (uploadData == null)
                                    Console.WriteLine("Warning: Invalid Html received while parsing " + showData.Name + ". Trying to continue");
                                string title = WebUtility.HtmlDecode(m.Groups[1].Value);

                                var   downloads = new Dictionary <string, string>();
                                Regex r         = new Regex("<a\\s+href\\s*=\"([^\"]+)\".+\\s+(.+?)\\s*<");
                                while (true)
                                    line = reader.ReadLine();
                                    Match m2 = r.Match(line);
                                    if (m2.Success)
                                        String keyOrg = WebUtility.HtmlDecode(m2.Groups[2].Value);
                                        String key    = keyOrg;
                                        while (downloads.ContainsKey(key))
                                            Match mx  = new Regex("\\((\\d+)\\)$").Match(key);
                                            int   num = 1;
                                            if (mx.Success)
                                                num = int.Parse(mx.Groups[1].Value);

                                            key = keyOrg + "(" + ++num + ")";
                                        String val = m2.Groups[1].Value;
                                        if (val != null && !String.IsNullOrWhiteSpace(val) && val.Trim().StartsWith("http://"))
                                            downloads.Add(key, val);
                                            Console.WriteLine("Warning: Invalid Download received while parsing " + showData.Name + ". Ignoring link");
                                            //ignoring invalid download
                                    if (line.Contains("</p>"))

                                if (title.Contains("720p"))
                                    uploadData.Format = "720p";
                                else if (title.Contains("1080p"))
                                    uploadData.Format = "1080p";
                                else if (title.Contains("720i"))
                                    uploadData.Format = "720i";
                                else if (title.Contains("1080i"))
                                    uploadData.Format = "1080i";

                                DownloadData dd = new DownloadData();
                                dd.Upload = uploadCache == null ? uploadData : uploadCache.GetUniqueUploadData(uploadData);
                                dd.Title  = title;

                                if (title.ToLower().Contains("subbed"))
                                    dd.Upload.Subbed = true;

                                foreach (var download in downloads)
                                    dd.Links.Add(download.Key, download.Value);

                            else if ((m = new Regex("(?:(?:<p(?:\\s+style\\s*\\=\\s*\\\"[^\\\"]+\\\"\\s*)?>)|(?:<div\\s+class\\s*=\\s*\"info\">))\\s*(.*?(?:Dauer|Größe|Sprache|Format|Uploader).*?)\\s*(?:(?:</p>)|(?:</div>))").Match(line)).Success ||
                                     ((m = new Regex("<p\\s+style\\s*\\=\\s*\\\"[^\\\"]+\\\"\\s*>").Match(line)).Success && ((line = reader.ReadLine()) != "") && (m = new Regex("\\s*(.*?(?:Dauer|Größe|Sprache|Format|Uploader).*?)\\s*</p>").Match(line)).Success))
                                 * Nice case:
                                 * <p><strong>Dauer:</strong> 20:00 | <strong>Größe:</strong> 175 MB | <strong>Sprache:</strong> Englisch &amp; deutsche Untertitel | <strong>Format:</strong> XviD | <strong>HQ-Cover:</strong> <a href="http://justpic.info/?s=cover&amp;id=&amp;name=&amp;keyword=VGhlIEJpZyBCYW5nIFRoZW9yeQ,,">Download</a> | <strong>Uploader:</strong> block06</p>
                                 * Bad case: (note newline!!)
                                 * <p style="background-color: #f0f0f0;">
                                 *  <strong>Dauer:</strong> 20:00 | <strong>Größe:</strong> 175 MB | <strong>Sprache:</strong> Englisch | <strong>Format:</strong> XviD | <strong>HQ-Cover:</strong> <a href="http://justpic.info/?s=cover&#038;id=&#038;name=&#038;keyword=VGhlIEJpZyBCYW5nIFRoZW9yeQ,,">Download</a> | <strong>Uploader:</strong> block06</p>

                                uploadData        = new UploadData();
                                uploadData.Season = seasonData;

                                String          c  = WebUtility.HtmlDecode(m.Groups[1].Value);
                                MatchCollection mc = new Regex("<strong>\\s*(.+?)\\s*</strong>\\s*(.+?)\\s*(?:\\||$)").Matches(c);
                                foreach (Match match in mc)
                                    String key   = match.Groups[1].Value.ToLower();
                                    String value = match.Groups[2].Value;
                                    if (key.Contains("dauer") || key.Contains("runtime") || key.Contains("duration"))
                                        uploadData.Runtime = value;
                                    else if (key.Contains("grösse") || key.Contains("größe") || key.Contains("size"))
                                        uploadData.Size = value;
                                    else if (key.Contains("uploader"))
                                        uploadData.Uploader = value;
                                    else if (key.Contains("format"))
                                        uploadData.Format = value;
                                    else if (key.Contains("sprache") || key.Contains("language"))
                                        value = value.ToLower();
                                        if (value.Contains("deutsch") || value.Contains("german"))
                                            uploadData.Language |= UploadLanguage.German;
                                        if (value.Contains("englisch") || value.Contains("english"))
                                            uploadData.Language |= UploadLanguage.English;
                                        if (value.Contains("subbed"))
                                            uploadData.Subbed = true;
                            else if ((m = new Regex("<p>\\s*([^<]+)\\s*</p>").Match(line)).Success)
                                if (seasonData.Description == "")
                                    seasonData.Description = WebUtility.HtmlDecode(m.Groups[1].Value);
                            else if ((m = new Regex("<p>\\s*<img\\s+src\\s*=\"([^\"]+)\".*?/>\\s*</p>").Match(line)).Success)
                                seasonData.CoverUrl = m.Groups[1].Value;
                                if (firstcover == "")
                                    firstcover = seasonData.CoverUrl;
                            else if (new Regex("</div>").Match(line).Success)
                                inPostContent = false;
                                seasonData    = null;
                                uploadData    = null;
                        else if ((m = new Regex("<h2>\\s*<a\\s+href\\s*=\"([^\"]+)\".*?>(.+?)</a>\\s*</h2>").Match(line)).Success)
                            seasonData      = new SeasonData();
                            seasonData.Show = showData;

                            seasonData.Url   = m.Groups[1].Value;
                            seasonData.Title = WebUtility.HtmlDecode(m.Groups[2].Value);
                        else if (new Regex("<div\\s+class\\s*=\\s*\"post-content\"\\s*>").Match(line).Success)
                            inPostContent = true;
                        else if (new Regex("</div>").Match(line).Success)
                            inPost = false;
                    else if (new Regex("<div\\s+class\\s*=\\s*\"post\"\\s*>").Match(line).Success)
                        inPost = true;
                        if ((m = new Regex("<span\\s+class\\s*=\\s*'page\\s+current'>\\s*(\\d+)\\s*</span>").Match(line)).Success)
                            int currentPage = int.Parse(m.Groups[1].Value);
                            int nextPage    = currentPage + 1;
                            if (new Regex("title\\s*='" + nextPage + "'").Match(line).Success)
                                if (new Regex("/page/" + currentPage + "/?$").Match(url).Success)
                                    nextpageurl = url.Replace("page/" + currentPage, "page/" + nextPage);
                                    nextpageurl = url;
                                    if (!nextpageurl.EndsWith("/"))
                                        nextpageurl += "/";
                                    nextpageurl += "page/" + nextPage + "/";
                        if (new Regex("</div>").Match(line).Success)
                            inContent = false;
                else if (new Regex("<div\\s+id\\s*=\\s*\"content\"\\s*>").Match(line).Success)
                    inContent = true;
예제 #2
        private static List <DownloadData> ParseSite(ShowData showData, string url, out string nextpageurl, out string firstcover, UploadCache uploadCache)
            nextpageurl = "";
            firstcover  = "";

            WebResponse resp = null;

            for (int i = 0; i <= 7; i++)
                    HttpWebRequest req = WebRequest.CreateHttp(url);
                    req.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;
                    req.KeepAlive = false;
                    resp          = req.GetResponse();
                catch (WebException ex)
                    if (ex.Status != WebExceptionStatus.Timeout)
            if (resp == null)
                throw new TimeoutException();

            HtmlDocument doc = new HtmlDocument();

            doc.OptionDefaultStreamEncoding = Encoding.UTF8;
            List <DownloadData> list = new List <DownloadData>();

            HtmlNode content = doc.GetElementbyId("content");

            HtmlNode nextLink = content.SelectSingleNode("//div[@class='navigation']//a[@href][@class='next']");

            if (nextLink != null)
                nextpageurl = nextLink.GetAttributeValue("href", null);

            var posts = content.SelectNodes("div[@class='post']");

            if (posts == null)
            foreach (var post in posts)
                //--------------Season Title-----------------------------------
                var title = post.SelectSingleNode("h2/a[@href]");
                if (title == null)
                    Console.WriteLine("SjInfo Parser: No Title");
                var seasonData = new SeasonData();
                seasonData.Show  = showData;
                seasonData.Url   = title.GetAttributeValue("href", null);
                seasonData.Title = WebUtility.HtmlDecode(title.InnerText);

                var postContent = post.SelectSingleNode("div[@class='post-content']");
                if (postContent == null)
                    Console.WriteLine("SjInfo Parser: No Post content");

                //----------------Season Cover------------------------------------------------
                var cover = postContent.SelectSingleNode(".//p/img[@src]");
                if (cover != null)
                    seasonData.CoverUrl = cover.GetAttributeValue("src", null);
                    if (String.IsNullOrEmpty(firstcover))
                        firstcover = seasonData.CoverUrl;

                //----------------Season description-----------------------------------------
                var desc = postContent.SelectSingleNode(".//p[count(node())=1][not(@class='post-info-co')]/text()");
                if (desc != null)
                    seasonData.Description = WebUtility.HtmlDecode(desc.InnerText);

                UploadData uploadData = null;

                var ps = postContent.SelectNodes(".//node()[self::p|self::div][count(strong)>=2]");
                if (ps == null)
                    Console.WriteLine("SjInfo Parser: no uploads/headers");

                foreach (var p in ps)
                    //--------------- Upload Header ------------------------------
                    if (p.SelectSingleNode("self::node()[not(./a[@target])]") != null)
                        uploadData        = new UploadData();
                        uploadData.Season = seasonData;

                        String          c  = WebUtility.HtmlDecode(p.InnerHtml);
                        MatchCollection mc = new Regex("<strong>\\s*(.+?)\\s*</strong>\\s*(.+?)\\s*(?:\\||$)").Matches(c);
                        foreach (Match match in mc)
                            String key   = match.Groups[1].Value.ToLower();
                            String value = match.Groups[2].Value;
                            if (key.Contains("dauer") || key.Contains("runtime") || key.Contains("duration"))
                                uploadData.Runtime = value;
                            else if (key.Contains("grösse") || key.Contains("größe") || key.Contains("size"))
                                uploadData.Size = value;
                            else if (key.Contains("uploader"))
                                uploadData.Uploader = value;
                            else if (key.Contains("format"))
                                uploadData.Format = value;
                            else if (key.Contains("sprache") || key.Contains("language"))
                                value = value.ToLower();
                                if (value.Contains("deutsch") || value.Contains("german"))
                                    uploadData.Language |= UploadLanguage.German;
                                if (value.Contains("englisch") || value.Contains("english"))
                                    uploadData.Language |= UploadLanguage.English;
                                if (value.Contains("subbed"))
                                    uploadData.Subbed = true;
                    else if (uploadData != null)
                        // ------------------ Links -------------------------
                        var ulTitle = p.SelectSingleNode("strong[position()=1][count(node())=1]/text()");
                        if (ulTitle == null)
                            Console.WriteLine("SjInfo Parser: No title for link? " + p.InnerHtml);
                        string titleStr = WebUtility.HtmlDecode(ulTitle.InnerText).Trim();

                        var links = p.SelectNodes("a[@href][following-sibling::text()]");
                        if (links == null)
                        var downloads = new Dictionary <string, string>();
                        foreach (var link in links)
                            string ur     = link.GetAttributeValue("href", null);
                            string keyOrg = WebUtility.HtmlDecode(link.NextSibling.InnerText.Trim());
                            if (keyOrg.StartsWith("|"))
                                keyOrg = keyOrg.Substring(1).Trim();

                            String key = keyOrg;
                            int    i   = 1;
                            while (downloads.ContainsKey(key))
                                key = keyOrg + "(" + i++ + ")";
                            downloads.Add(key, ur);

                        if (titleStr.Contains("720p"))
                            uploadData.Format = "720p";
                        else if (titleStr.Contains("1080p"))
                            uploadData.Format = "1080p";
                        else if (titleStr.Contains("720i"))
                            uploadData.Format = "720i";
                        else if (titleStr.Contains("1080i"))
                            uploadData.Format = "1080i";

                        DownloadData dd = new DownloadData();
                        dd.Upload = uploadCache == null ? uploadData : uploadCache.GetUniqueUploadData(uploadData);
                        dd.Title  = titleStr;

                        if (titleStr.ToLower().Contains("subbed"))
                            dd.Upload.Subbed = true;

                        foreach (var download in downloads)
                            dd.Links.Add(download.Key, download.Value);

                        Console.WriteLine("SjInfo Parser: UploadData was null");