Esempio n. 1
0
            // provide either downloaded HTML or URL to download html, plus XML path to extract, and return list of found entries for xml path
            // xmlPath = "//body/div/div/div/div/h2"
            public List <string> Parse_HTML_XML(string html = "", string xmlPath = "//body/div/div/div/div/h2", string url = "")
            {
                _AHK ahk = new _AHK();

                string HTML = html;

                if (url != "")
                {
                    HTML = ahk.Download_HTML(url);
                }

                //string xmlPath = "//body/div/div/div/div/h2";
                List <string> lines = new List <string>();

                // Parse HTML by XML Path, Create List of content inside Path
                var htmlDoc = new HtmlAgilityPack.HtmlDocument();

                htmlDoc.LoadHtml(HTML);

                try
                {
                    var htmlNodes = htmlDoc.DocumentNode.SelectNodes(xmlPath);
                    if (htmlNodes != null)
                    {
                        foreach (HtmlNode node in htmlNodes)
                        {
                            HtmlNodeCollection childNodes = node.ChildNodes;

                            foreach (var nd in childNodes)
                            {
                                if (nd.NodeType == HtmlNodeType.Element)
                                {
                                    lines.Add(nd.OuterHtml);
                                    Console.WriteLine(nd.OuterHtml);
                                }
                            }
                        }
                    }
                }
                catch { }



                return(lines);
            }
Esempio n. 2
0
            public List <TCP> Parse_TCPPage(string URL, string imageRoot = @"H:\SiteParse\TheClassic")
            {
                List <TCP> Films = new List <TCP>();

                string        xpath       = "/html/body/div[3]/div/div[2]/div[4]/ul";
                string        html        = ahk.Download_HTML(URL);
                List <string> resultLines = xml.Parse_HTML_XML(html, xpath); // extract sections of text from html xml

                foreach (string line in resultLines)                         // pull segments of page (20 total)
                {
                    TCP Film = new TCP();

                    List <string> ImageLinks = new List <string>();

                    //string FilmURL = ""; string CoverURL = ""; string Title = ""; List<string> ImageLinks = new List<string>();
                    //string VidID = "";


                    string        seg = line.Replace("\">", "\">\n");
                    List <string> newLines = lst.Text_To_List(seg); bool TitleFound = false;
                    foreach (string newline in newLines)
                    {
                        //FILM URL
                        string videoURLCheck = "thumb-video-link\" href=\"/videos";
                        if (newline.Contains(videoURLCheck))
                        {
                            string videoURL = newline.Replace("<a class=\"thumb-video-link\" href=\"", "");
                            videoURL     = videoURL.Replace("/\">", "");
                            Film.FilmURL = "https://theclassicporn.com" + videoURL; // + @"\";
                                                                                    //ahk.MsgBox("FilmURL: " + FilmURL);
                        }

                        // VIDEO ID
                        if (newline.Contains("covers.jpg"))
                        {
                            List <string> items = ahk.StringSplit_List(newline, "/");
                            foreach (string item in items)
                            {
                                if (!item.Contains("covers.jpg"))
                                {
                                    Film.VideoID = item;
                                }
                                if (item.Contains("covers.jpg"))
                                {
                                    break;
                                }
                            }
                            //ahk.MsgBox(vidID);
                        }

                        // VIDEO COVER URL
                        if (newline.Contains("covers.jpg"))
                        {
                            List <string> items = ahk.StringSplit_List(newline, "\"");
                            foreach (string item in items)
                            {
                                if (item.Contains("covers.jpg"))
                                {
                                    Film.CoverURL = item; break;
                                }
                            }
                            //ahk.MsgBox(CoverURL);
                        }

                        // VIDEO TITLE
                        if (newline.Contains("class=\"link-blue link-no-border\""))
                        {
                            TitleFound = true; continue;
                        }
                        if (TitleFound)
                        {
                            Film.FilmName = newline.Replace("</a>", "");
                            //ahk.MsgBox(Title);
                            TitleFound = false;
                        }


                        if (newline.Contains(".jpg") && !newline.Contains("covers.jpg"))
                        {
                            string ImageLine = newline.Replace("\" alt=\"\">", "");
                            ImageLine = ImageLine.Replace("<img class=\"screen-thumb\" src=\"", "");
                            //ahk.MsgBox(ImageLine);
                            ImageLinks.Add(ImageLine);
                        }
                    }


                    string links = "";
                    if (imageRoot != "")
                    {
                        string saveDir = imageRoot + "\\" + Film.VideoID;
                        ahk.FileCreateDir(saveDir);

                        // download film images to dirs sorted by VidID
                        foreach (string image in ImageLinks)
                        {
                            string fileName = ahk.StringSplit(image, "/", 0, true);
                            ahk.Download_File(image, saveDir + "\\" + fileName);

                            if (links == "")
                            {
                                links = image;
                            }
                            else
                            {
                                links = links + "\n" + image;
                            }
                        }

                        // download film cover
                        if (Film.CoverURL != null)
                        {
                            string filename = ahk.StringSplit(Film.CoverURL, "/", 0, true);
                            ahk.Download_File(Film.CoverURL, saveDir + "\\" + filename);
                        }
                    }
                    else
                    {
                        // add image links to string list
                        foreach (string image in ImageLinks)
                        {
                            if (links == "")
                            {
                                links = image;
                            }
                            else
                            {
                                links = links + "\n" + image;
                            }
                        }
                    }



                    Film.ImageLinks = links;
                    Film.ImgLinks   = ImageLinks;
                    Films.Add(Film);

                    bool added = TCP_UpdateInsert(Film);
                }


                return(Films);
            }
Esempio n. 3
0
            public void Download_ReleaseBB_MovieIndex(int StartPage = 1, int LastPage = 1366)
            {
                _Parse.XML xml = new _Parse.XML();
                _Lists     lst = new _Lists();

                _Sites.Imdb imdb = new Imdb();

                string startURL    = "http://rlsbb.ru/category/movies/page/";
                int    LastPageNum = LastPage;

                int pageNum = StartPage;

                do
                {
                    string URL  = startURL + pageNum + "/";
                    string HTML = ahk.Download_HTML(URL);

                    string xmlPath = "//*[@id=\"contentArea\"]";

                    List <string> segs = xml.Parse_HTML_XML(HTML, xmlPath);  // extract sections of text from html xml

                    int segNum = 1;
                    foreach (string seg in segs)
                    {
                        ReleaseBBMovies obj = new ReleaseBBMovies();

                        // skip segs on page that aren't posts
                        if (segNum == 1)
                        {
                            segNum++; continue;
                        }
                        if (segNum == 2)
                        {
                            segNum++; continue;
                        }
                        if (segNum == 13)
                        {
                            segNum++; continue;
                        }
                        if (segNum == 14)
                        {
                            segNum++; continue;
                        }

                        //ahk.MsgBox(segNum + " | " + seg);
                        segNum++;

                        string PostTitle = "";
                        string PostURL   = "";
                        string IMDbID    = "";

                        List <string> links = imdb.Regex_IMDbLinks(seg);

                        if (links.Count > 0)
                        {
                            IMDbID = imdb.IMDb_ID_FromURL(links[0]);
                        }

                        // parse index page segment
                        List <string> lines = lst.Text_To_List(seg, true, true, false);
                        foreach (string line in lines)
                        {
                            if (line.Contains("postTitle"))
                            {
                                PostURL   = line.Replace("<h2 class=\"postTitle\"><span></span><a href=\"", "");
                                PostTitle = ahk.StringSplit(PostURL, ">", 1);
                                PostTitle = ahk.StringSplit(PostTitle, "<", 0);
                                PostURL   = ahk.StringSplit(PostURL, "\"", 0);
                                //ahk.MsgBox("IMDB: " + IMDbID + "\n\n" + PostTitle + "\n\n" + PostURL);
                            }
                        }

                        obj.IMDbID    = IMDbID;
                        obj.PostTitle = PostTitle;
                        obj.PostURL   = PostURL;

                        bool Updated = ReleaseBBMovies_UpdateInsert_IndexEntry(obj);
                    }

                    pageNum++;
                }while (pageNum <= LastPageNum);
            }
Esempio n. 4
0
            //_Parse.XML xml = new _Parse.XML();
            //_AHK ahk = new _AHK();
            //_Database.SQL sql = new _Database.SQL();
            //_Lists lst = new _Lists();
            //_Parse prs = new _Parse();
            ////_Images img = new _Images();
            //_TelerikLib.RadProgress pro = new _TelerikLib.RadProgress();
            //_TelerikLib tel = new _TelerikLib();
            ////_Apps.Chrome cr = new _Apps.Chrome();
            //sharpAHK_Dev._Threads thr = new sharpAHK_Dev._Threads();
            //_TelerikLib.RadTree tree = new _TelerikLib.RadTree();
            //IAFD iafd = new IAFD();
            //_Web.ADBSites.PBBForum pbb = new _Web.ADBSites.PBBForum();
            //_Web.ADBSites.PRNWForum prnw = new _Web.ADBSites.PRNWForum();

            #endregion


            public void Download_Site(int startPage = 1, int LastPage = 230, bool SkipExisting = true, RadProgressBar Bar = null, RadProgressBar Bar2 = null, bool NewThread = true)
            {
                _AHK   ahk = new _AHK();
                _Lists lst = new _Lists();

                _Sites.RapidGator       rg  = new _Sites.RapidGator();
                _TelerikLib.RadProgress pro = new _TelerikLib.RadProgress();

                if (NewThread)
                {
                    Thread newThread = new Thread(() => Download_Site(startPage, LastPage, SkipExisting, Bar, Bar2, false)); // Function To Execute
                    newThread.IsBackground = true;
                    newThread.Start();
                }
                else
                {
                    int pageNum = startPage;
                    do
                    {
                        int           postNum = 0;
                        string        html    = ahk.Download_HTML("http://pornchil.com/page/" + pageNum + "/");
                        List <string> lines   = lst.Text_To_List(html, true, true, false);

                        if (Bar != null)
                        {
                            pro.SetupProgressBar(Bar, 10);
                        }                                                    // # of posts/page

                        foreach (string line in lines)
                        {
                            if (line.Contains("<h1 class=\"entry-title\"><a href="))
                            {
                                PrnChill chill = new PrnChill();
                                //ahk.MsgBox(line);

                                string Line = line.Replace("<h1 class=\"entry-title\"><a href=\"", "");
                                chill.PostURL  = ahk.StringSplit(Line, "\"", 0);
                                chill.PostName = ahk.StringSplit(Line, ">", 1);
                                chill.PostName = ahk.StringSplit(chill.PostName, "<", 0);

                                if (SkipExisting)
                                {
                                    if (AlreadyParsed(chill.PostURL))
                                    {
                                        continue;
                                    }                                                // check to see if this post already has an entry, if so skip
                                }


                                if (Bar != null)
                                {
                                    postNum++; pro.UpdateProgress(Bar, postNum + "/10");
                                }                                                                          // # of posts remaining

                                string        postHTML = ahk.Download_HTML(chill.PostURL);
                                List <string> links    = rg.Regex_RGLinks(postHTML);

                                if (links.Count > 0)
                                {
                                    if (Bar != null)
                                    {
                                        pro.ProgressText(Bar, postNum + "/10 | Verifying Links");
                                    }                                                                                           // # of posts remaining

                                    List <_Sites.RapidGator.RGInfo> checkedLinks = rg.RapidGator_BatchCheck(links, true, Bar2); // batch check list of rg links

                                    string rglinks = ""; int goodLinkCount = 0;
                                    foreach (_Sites.RapidGator.RGInfo link in checkedLinks)
                                    {
                                        if (rglinks == "")
                                        {
                                            rglinks = link.FileURL + "|" + link.FileSize; goodLinkCount++;
                                        }
                                        else
                                        {
                                            rglinks = rglinks + "\n" + link.FileURL + "|" + link.FileSize; goodLinkCount++;
                                        }
                                    }

                                    chill.Links     = rglinks; // online rg links with  filepath|filesize return
                                    chill.LinkCount = goodLinkCount;

                                    chill.LinkCheckDate = DateTime.Now.ToString();
                                }

                                chill.InCollection = "false";

                                List <string> images = rg.JpgImageLinks(postHTML);

                                if (Bar != null)
                                {
                                    pro.ProgressText(Bar, postNum + "/10 | Downloading Images (" + images.Count + ")");
                                }                                                                                                         // # of posts remaining

                                if (images.Count > 0)
                                {
                                    // create save dir
                                    string saveDir = ahk.AppDir() + "\\PrnChillPosts";
                                    ahk.FileCreateDir(saveDir);
                                    saveDir = saveDir + "\\" + chill.PostName;
                                    ahk.FileCreateDir(saveDir);

                                    chill.ImageDir = saveDir;

                                    int imgNum = 1;
                                    foreach (string image in images)
                                    {
                                        ahk.Download_File(image, saveDir + "\\" + imgNum + ".jpg", true); imgNum++;
                                    }
                                }

                                PrnChill_UpdateInsert(chill);
                            }
                        }

                        pageNum++;
                    } while (pageNum <= LastPage);
                }
            }