コード例 #1
0
        public static void Consume(Object obj)
        {
            try
            {
                //File.AppendAllText("Consumer.txt", "Consuming : " + QueueLength + Environment.NewLine);
                //VideoWrapper video = pair.Value;

                VideoWrapper video = obj as VideoWrapper;

                ChannelVideo.parseVideo(video, multithreadingChannelName);
                ///Done Crawling video description
                QueueLength--;
                Console.WriteLine("consuming" + QueueLength);
                if (WaitForComplete)
                {
                    if (QueueLength == 0)
                    {
                        Event.Set();
                    }
                }
                //break;
            }
            catch (Exception ex)
            {
                //File.AppendAllText("Exception In Consume.txt", ex.Message + Environment.NewLine);
            }
        }
コード例 #2
0
ファイル: ChannelComment.cs プロジェクト: QiseD/Crawler
 public static void Produce(VideoWrapper videoDictionary)
 {
     ThreadPool.QueueUserWorkItem(
         new WaitCallback(Consume), videoDictionary);
     QueueLength++;
     Console.WriteLine("producing" + QueueLength);
 }
コード例 #3
0
 public static void Produce(VideoWrapper videoDictionary)
 {
     ThreadPool.QueueUserWorkItem(
         new WaitCallback(Consume), videoDictionary);
     QueueLength++;
     //File.AppendAllText("Producer.txt", "Producing : " + QueueLength + Environment.NewLine);
     Console.WriteLine("producing" + QueueLength);
 }
コード例 #4
0
ファイル: ChannelComment.cs プロジェクト: QiseD/Crawler
        public static bool CrawlComments(Dictionary <string, VideoWrapper> videoDictionary, string pChannelName)
        {
            channelName = pChannelName;
            //Dictionary<int, string> htmlFiles = null;
            //if(File.Exists("ThreadsLog.txt"))
            //{
            //    File.Delete("ThreadsLog.txt");
            //}
            //File.AppendAllText("CommentsTime.txt", "Time Start : " + DateTime.Now);
            int totalThreads = Int32.Parse(ConfigurationManager.AppSettings["totalThreadsAtOneTime"].ToString());

            foreach (KeyValuePair <string, VideoWrapper> pair in videoDictionary)
            {
                try
                {
                    //string videoFile = String.Empty;
                    //int pageNo = 1;

                    VideoWrapper video = pair.Value;
                    Produce(video);
                    while (QueueLength >= totalThreads)
                    {
                        Thread.Sleep(2000);
                    }
                    //htmlFiles = new Dictionary<int, string>();
                    //DownloadHtmls(pChannelName, video, htmlFiles, pageNo);

                    ////GetAllComments(video, pChannelName, htmlFiles);
                    //commentCount = 0;
                    ////break;
                }
                catch (Exception ex)
                {
                    continue;
                }
            }
            while (QueueLength > 0)
            {
                Thread.Sleep(1000);
            }
            //File.AppendAllText("CommentsTime.txt", "Time End : " + DateTime.Now);

            return(true);
        }
コード例 #5
0
ファイル: ChannelComment.cs プロジェクト: QiseD/Crawler
        public static void Consume(Object obj)
        {
            try
            {
                string videoFile = String.Empty;
                int    pageNo    = 1;
                Dictionary <int, string> htmlFiles = null;
                htmlFiles = new Dictionary <int, string>();

                //VideoWrapper video = pair.Value;

                VideoWrapper video = obj as VideoWrapper;


                htmlFiles = new Dictionary <int, string>();

                DownloadHtmls(channelName, video, htmlFiles, pageNo);

                //       GetAllComments(video, pChannelName, htmlFiles);
                commentCount = 0;
                QueueLength--;
                Console.WriteLine("consuming" + QueueLength);
                if (WaitForComplete)
                {
                    if (QueueLength == 0)
                    {
                        Event.Set();
                    }
                }
                //break;
            }
            catch (Exception ex)
            {
                //   continue;
            }
        }
コード例 #6
0
ファイル: ChannelComment.cs プロジェクト: QiseD/Crawler
        public static void GetAllComments(VideoWrapper pVideoWrapper, string pChannelName, Dictionary <int, string> pHtmlFiles)
        {
            string        videoUrl     = "https://www.youtube.com/watch?v=" + pVideoWrapper.getVideoKey();
            bool          videoUrlFlag = false;
            List <string> tempFiles    = new List <string>();

            foreach (KeyValuePair <int, string> pair in pHtmlFiles)
            {
                try
                {
                    string videoName = pair.Value;
                    //string videoName = "Machinima PlayStation Viewer's Choice LiveStream!-1";
                    //Stream stream = File.OpenRead("New folder/Machinima PlayStation Viewer's Choice LiveStream!-1.html");
                    Stream stream = File.OpenRead(pChannelName + "/Comments/" + videoName);
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    StreamReader reader = new StreamReader(stream);
                    doc.LoadHtml(reader.ReadToEnd().ToString());
                    bool breakLoop = false;

                    HtmlNodeCollection totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']//div[@class='content']");


                    foreach (HtmlNode node in totalCollection)
                    {
                        //string[] userArr = node.InnerText.Split(new Char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
                        string             user        = string.Empty;
                        string             displayName = string.Empty;
                        string             date        = string.Empty;
                        string             comment     = string.Empty;
                        HtmlNode           nodeData    = node.ParentNode;
                        string             dataId      = nodeData.Attributes[2].Value.Trim();
                        string             authorId    = nodeData.Attributes[1].Value.Trim();
                        HtmlNodeCollection childNodes  = node.ChildNodes;
                        int divCount = 0;
                        foreach (HtmlNode child in childNodes)
                        {
                            if (child.Name.Equals("p"))
                            {
                                bool userFlag = false;
                                //bool dateFlag = false;
                                HtmlNodeCollection col = child.ChildNodes;
                                foreach (HtmlNode n in col)
                                {
                                    if (n.Name.Equals("span") && !userFlag)
                                    {
                                        foreach (HtmlNode nNode in n.ChildNodes)
                                        {
                                            if (nNode.Name.Equals("a"))
                                            {
                                                user = nNode.Attributes["href"].Value.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)[1];
                                                break;
                                            }
                                        }
                                        displayName = n.InnerText.Trim();
                                        userFlag    = true;
                                    }
                                    else if (n.Name.Equals("span"))
                                    {
                                        date = n.InnerText.Trim();
                                        //dateFlag = true;
                                        break;
                                    }
                                }
                            }
                            else if (child.Name.Equals("div"))
                            {
                                if (divCount == 0)
                                {
                                    //That means Its Comment Text
                                    comment = child.InnerText.Trim();
                                    divCount++;
                                }
                            }
                        }
                        if (!displayName.Equals("") && !comment.Equals("") && !dataId.Equals("") && !authorId.Equals("") && !user.Equals("") && !GlobalConstants.commentDictionary.ContainsKey(dataId))
                        {
                            VideoCommentWrapper commentWrapper = new VideoCommentWrapper();

                            commentWrapper.authorId    = authorId;
                            commentWrapper.commentId   = dataId;
                            commentWrapper.commentText = comment;
                            commentWrapper.time        = date;
                            commentWrapper.displayName = displayName;
                            commentWrapper.userName    = user;

                            GlobalConstants.commentDictionary.Add(dataId, commentWrapper);

                            string videoFileName = pVideoWrapper.getVideoName();
                            //videoFile = videoName;
                            videoName = Common.CleanFileName(videoFileName + "-" + fileComment) + ".txt";
                            if (!Directory.Exists(pChannelName + "/" + "Comments"))
                            {
                                Directory.CreateDirectory(pChannelName + "/" + "Comments");
                            }
                            commentCount++;
                            if (!videoUrlFlag)
                            {
                                File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Video Url : " + videoUrl + Environment.NewLine + "\r\n");
                                videoUrlFlag = true;
                            }
                            File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "User name : " + displayName + Environment.NewLine);
                            File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment Date : " + date + Environment.NewLine);
                            File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment : " + comment + Environment.NewLine);
                        }
                        if (parseAllComments.Equals("false", StringComparison.CurrentCultureIgnoreCase))
                        {
                            if (totalCommentsParse <= commentCount)
                            {
                                breakLoop = true;
                                break;
                            }
                        }
                    }
                    reader.Close();


                    if (breakLoop)
                    {
                        break;
                    }
                }
                catch (Exception ex)
                {
                    //File.AppendAllText("Logs Exception Comments.txt", ex.Message + Environment.NewLine + Environment.NewLine);
                    continue;
                }
            }
            foreach (KeyValuePair <int, string> file in pHtmlFiles)
            {
                tempFiles.Add("/Comments/" + file.Value);
            }

            Common.RemoveTempFiles(tempFiles, pChannelName);
        }
コード例 #7
0
ファイル: ChannelComment.cs プロジェクト: QiseD/Crawler
        public static void DownloadHtmls(string pChannelName, VideoWrapper pVideo, Dictionary <int, string> pHtmlFiles, int pPageNo)
        {
            string url = string.Empty;

            try
            {
                url = ConfigurationManager.AppSettings["VideoAllCommentsUrl"].ToString() + pVideo.getVideoKey() + "&page=" + pPageNo;
                //string url = "http://www.youtube.com/all_comments?v=LMiNEC1M-zY" + "&page=" + pPageNo;
                ///Base Case
                ///
                HtmlWeb hwObject = new HtmlWeb();
                //hwObject.UseCookies = false; // Experimental
                //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " going to hit URL at page # " + pPageNo + ".. " + DateTime.Now + Environment.NewLine);
                HtmlDocument doc = hwObject.Load(url);
                //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " got response of page # " + pPageNo + ".." + DateTime.Now + Environment.NewLine);

                HtmlNodeCollection totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']");
                if (totalCollection == null)
                {
                    return;
                }
                int totalCollectionCount = totalCollection.Count;
                if (totalCollectionCount <= 0)
                {
                    return;
                }
                ///Base Case Ended
                ///

                //Code Added by Me Right Now ....
                ///

                totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']//div[@class='content']");
                string videoUrl     = "https://www.youtube.com/watch?v=" + pVideo.getVideoKey();
                bool   videoUrlFlag = false;
                bool   breakLoop    = false;
                //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " starting to extract data.." + Environment.NewLine);
                foreach (HtmlNode node in totalCollection)
                {
                    //string[] userArr = node.InnerText.Split(new Char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
                    string             user        = string.Empty;
                    string             displayName = string.Empty;
                    string             date        = string.Empty;
                    string             comment     = string.Empty;
                    HtmlNode           nodeData    = node.ParentNode;
                    string             dataId      = nodeData.Attributes[2].Value.Trim();
                    string             authorId    = nodeData.Attributes[1].Value.Trim();
                    HtmlNodeCollection childNodes  = node.ChildNodes;
                    int divCount = 0;
                    foreach (HtmlNode child in childNodes)
                    {
                        if (child.Name.Equals("p"))
                        {
                            bool userFlag = false;
                            //bool dateFlag = false;
                            HtmlNodeCollection col = child.ChildNodes;
                            foreach (HtmlNode n in col)
                            {
                                if (n.Name.Equals("span") && !userFlag)
                                {
                                    foreach (HtmlNode nNode in n.ChildNodes)
                                    {
                                        if (nNode.Name.Equals("a"))
                                        {
                                            user = nNode.Attributes["href"].Value.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)[1];
                                            break;
                                        }
                                    }
                                    displayName = n.InnerText.Trim();
                                    userFlag    = true;
                                }
                                else if (n.Name.Equals("span"))
                                {
                                    date = n.InnerText.Trim();
                                    //dateFlag = true;
                                    break;
                                }
                            }
                        }
                        else if (child.Name.Equals("div"))
                        {
                            if (divCount == 0)
                            {
                                //That means Its Comment Text
                                comment = child.InnerText.Trim();
                                divCount++;
                            }
                        }
                    }

                    //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " starting to write data in file.." + Environment.NewLine);
                    if (!displayName.Equals("") && !comment.Equals("") && !dataId.Equals("") && !authorId.Equals("") && !user.Equals("") && !GlobalConstants.commentDictionary.ContainsKey(dataId))
                    {
                        VideoCommentWrapper commentWrapper = new VideoCommentWrapper();

                        commentWrapper.authorId    = authorId;
                        commentWrapper.commentId   = dataId;
                        commentWrapper.commentText = comment;
                        commentWrapper.time        = date;
                        commentWrapper.displayName = displayName;
                        commentWrapper.userName    = user;

                        GlobalConstants.commentDictionary.Add(dataId, commentWrapper);

                        string videoFileName = pVideo.getVideoName();
                        //videoFile = videoName;
                        string videoName = Common.CleanFileName(videoFileName + "-" + fileComment) + ".txt";
                        if (!Directory.Exists(pChannelName + "/" + "Comments"))
                        {
                            Directory.CreateDirectory(pChannelName + "/" + "Comments");
                        }
                        commentCount++;
                        if (!videoUrlFlag)
                        {
                            File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Video Url : " + videoUrl + Environment.NewLine + "\r\n");
                            videoUrlFlag = true;
                        }
                        File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "User name : " + displayName + Environment.NewLine);
                        File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment Date : " + date + Environment.NewLine);
                        File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment : " + comment + Environment.NewLine);
                    }
                    //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " ended writing data in file.." + Environment.NewLine);
                    if (parseAllComments.Equals("false", StringComparison.CurrentCultureIgnoreCase))
                    {
                        if (totalCommentsParse <= commentCount)
                        {
                            breakLoop = true;
                            break;
                        }
                    }
                }
                //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " extracted all data.." + Environment.NewLine);
                ////Ended Added

                ////Commented by Me


                //File.AppendAllText(pChannelName + "/CommentsTimeLog.txt", "Start Download Time for file : " + pVideo.getVideoName() + "-" + pPageNo + ": " + DateTime.Now + Environment.NewLine);
                //WebRequest nameRequest = WebRequest.Create(url);
                //HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse();
                //Stream nameStream = nameResponse.GetResponseStream();
                //StreamReader nameReader = new StreamReader(nameStream);
                //string htmlData = nameReader.ReadToEnd();
                //if (htmlData != null && !htmlData.Equals(""))
                //{
                //    string videoName = pChannelName + "/Comments/" + Common.CleanFileName(pVideo.getVideoName()) + "-" + pPageNo + ".html";
                //    string dictionaryValue = Common.CleanFileName(pVideo.getVideoName()) + "-" + pPageNo + ".html";
                //    if (!Directory.Exists(pChannelName + "/Comments/"))
                //    {
                //        Directory.CreateDirectory(pChannelName + "/Comments/");
                //    }

                //    File.WriteAllText(videoName, htmlData);
                //    File.AppendAllText(pChannelName + "/CommentsTimeLog.txt", "End Download Time for file : " + pVideo.getVideoName() + "-" + pPageNo + ": " + DateTime.Now + Environment.NewLine + Environment.NewLine);
                //    //tempFiles.Add("/Comments/" + dictionaryValue);
                //    pHtmlFiles.Add(pPageNo, dictionaryValue);
                //}

                ////Comment Ended

                pPageNo++;
                if (parseAllComments.Equals("true", StringComparison.CurrentCultureIgnoreCase))
                {
                    DownloadHtmls(pChannelName, pVideo, pHtmlFiles, pPageNo);   //Recursive Call
                }
            }
            catch (Exception ex)
            {
                //Delete Cookies
                //pPageNo++;
                //File.AppendAllText(pChannelName + "/Comments/" + "ExceptionLogs.txt", "Exception : at URL : " + url + " -> Exception Message : " + ex.Message);
                DownloadHtmls(pChannelName, pVideo, pHtmlFiles, pPageNo);
            }
        }
コード例 #8
0
ファイル: ChannelVideo.cs プロジェクト: gcnonato/Crawler
        public static void parseVideo(VideoWrapper pVideoWrapper, string pChannelName)
        {
            string fileVideo = ConfigurationManager.AppSettings["channelVideo"].ToString();
            string videoChannelName = string.Empty;
            string videoName = string.Empty;
            string date = string.Empty;
            string iDislike = string.Empty;
            string iLike = string.Empty;
            string description = string.Empty;
            string url = string.Empty;
            string videoChannelFileCleaned = Common.CleanFileName(pChannelName);
            try
            {
                string channelFileNameXML = Common.CleanFileName(pVideoWrapper.getVideoName()) + "-" + ConfigurationManager.AppSettings["channelsFileNameXML"].ToString();

                string videoUrl = string.Format("https://gdata.youtube.com/feeds/api/videos/{0}?v=2", pVideoWrapper.getVideoKey());

                WebRequest nameRequest = WebRequest.Create(videoUrl);
                HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse();
                Stream nameStream = nameResponse.GetResponseStream();
                StreamReader nameReader = new StreamReader(nameStream);
                string xmlData = nameReader.ReadToEnd();
                File.WriteAllText(channelFileNameXML, xmlData);

                XmlDocument doc = new XmlDocument();
                doc.Load(channelFileNameXML);
                XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable);

                namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom");
                namespaceManager.AddNamespace("yt", "http://gdata.youtube.com/schemas/2007");
                namespaceManager.AddNamespace("media", "http://search.yahoo.com/mrss/");

                XmlNode node = doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager);

                VideoWrapper videoWrapper = pVideoWrapper;
                //VideoInfoWrapper obj = new VideoInfoWrapper
                //{
                if (doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager) != null && doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager).Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase))
                {
                    url = doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager).Attributes["href"].Value;
                    string[] urlArr = url.Split(new Char[] { '&' }, StringSplitOptions.RemoveEmptyEntries);
                    url = urlArr[0];
                }
                videoChannelName = videoChannelFileCleaned;//doc.SelectSingleNode("//Atom:entry/Atom:author/Atom:name", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/Atom:author/Atom:name", namespaceManager).InnerText.ToString() : string.Empty;
                videoName = doc.SelectSingleNode("//Atom:entry/Atom:title", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/Atom:title", namespaceManager).InnerText.ToString() : string.Empty;
                date = doc.SelectSingleNode("//Atom:entry/Atom:published", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/Atom:published", namespaceManager).InnerText.ToString() : string.Empty;
                iDislike = doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numDislikes"] != null ? doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numDislikes"].Value.ToString() : string.Empty : string.Empty;
                iLike = doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numLikes"] != null ? doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numLikes"].Value.ToString() : string.Empty : string.Empty;
                description = doc.SelectSingleNode(" //Atom:entry/media:group/media:description", namespaceManager) != null ? doc.SelectSingleNode(" //Atom:entry/media:group/media:description", namespaceManager).InnerText.ToString() : string.Empty;
                List<string> videoTags = preapreParamsTags(doc.SelectNodes("//Atom:entry/Atom:category", namespaceManager)) != null ? preapreParamsTags(doc.SelectNodes("//Atom:entry/Atom:category", namespaceManager)) : null;
                string videoViewCount = doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager).Attributes["viewCount"] != null ? doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager).Attributes["viewCount"].Value : string.Empty : string.Empty;
                //};
                string videoNameFile = Common.CleanFileName(videoName + "-" + fileVideo);

                if (!Directory.Exists(videoChannelName + "/" + "Videos"))
                {
                    Directory.CreateDirectory(videoChannelName + "/" + "Videos");
                }
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Channel : " + videoChannelName + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Channel Url : " + videoWrapper.getChannelUrl() + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Name : " + videoName + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Url : " + url + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Date : " + date + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Views : " + videoViewCount + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "I Like : " + iLike + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "I dislike : " + iDislike + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Description : " + description + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Tags : " + string.Join(",", videoTags.ToArray()) + Environment.NewLine);

                File.Delete(channelFileNameXML);
            }
            catch (Exception ex)
            {
                //File.AppendAllText("video_channel_" + videoName + ".txt", "Videoname: " + videoName + ";  Exception" + " : " + ex.ToString() + Environment.NewLine);
                Thread.Sleep(10000);
                parseVideo(pVideoWrapper, pChannelName);
            }
        }
コード例 #9
0
ファイル: Channel.cs プロジェクト: gcnonato/Crawler
        public static void WriteVideoLists(string pChannelName, string pChannelId, int startIndex, Dictionary<string, VideoWrapper> videoDictionary, Enumeration.VideoRequestType requestType)
        {
            try
            {
                //Base Case of Recursion
                //if (startIndex >= 1000)
                //    return;
                //Base Case Ended of Recursion
                string videoName = String.Empty;
                string videoUrl = String.Empty;
                //string url = String.Empty;
                string videoId = String.Empty;

                string videoFileName = ConfigurationManager.AppSettings["channelsVideoFile"].ToString();
                string videFileNameXML = ConfigurationManager.AppSettings["channelsVideoFileXML"].ToString();
                string channelFileName = ConfigurationManager.AppSettings["channelsFileName"].ToString();
                string channelCleanedName = Common.CleanFileName(pChannelName);
                string channelUrl = String.Empty;
                if (requestType == Enumeration.VideoRequestType.All)
                {
                    //http://gdata.youtube.com/feeds/api/users/machinima/uploads?start-index=4000
                    channelUrl = "https://gdata.youtube.com/feeds/api/users/" + pChannelName + "/uploads?&start-index=" + startIndex;
                }

                HttpWebRequest nameRequest = (HttpWebRequest)WebRequest.Create(channelUrl);
                nameRequest.KeepAlive = false;
                nameRequest.ProtocolVersion = HttpVersion.Version10;
                HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse();

                Stream nameStream = nameResponse.GetResponseStream();
                StreamReader nameReader = new StreamReader(nameStream);

                string xmlData = nameReader.ReadToEnd();
                File.WriteAllText(channelCleanedName + "/" + videFileNameXML, xmlData);

                XmlDocument doc = new XmlDocument();
                doc.Load(channelCleanedName + "/" + videFileNameXML);
                XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable);
                namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom");
                XmlNodeList listResult = doc.SelectNodes(channelAtomEntry, namespaceManager);

                ////Getting total Record
                XmlNamespaceManager namespaceManager1 = new XmlNamespaceManager(doc.NameTable);
                namespaceManager1.AddNamespace("openSearch", "http://a9.com/-/spec/opensearchrss/1.0/");

                XmlNode nodeTotal = doc.SelectSingleNode("//openSearch:totalResults", namespaceManager1);
                int total = Int32.Parse(nodeTotal.InnerText);

                //Base Case Started

                if (total == 0)
                {
                    Constant.tempFiles.Add(videFileNameXML);
                    return;
                }
                if (listResult == null)
                {
                    Constant.tempFiles.Add(videFileNameXML);
                    return;
                }

                string flag = ConfigurationManager.AppSettings["testingFlag"].ToString();
                if (flag.Equals("true", StringComparison.CurrentCultureIgnoreCase))
                {
                    if (startIndex >= 26)
                    {
                        Constant.tempFiles.Add(videFileNameXML);
                        return;
                    }
                }
                else
                {
                    if (ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase))
                    {
                        int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString());
                        if (totalVideo <= recordCount)
                        {
                            Constant.tempFiles.Add(videFileNameXML);
                            return;
                        }
                    }
                    else
                    {
                        //This check is not needed as base case is checked already.
                        if (total <= startIndex)
                        {
                            Constant.tempFiles.Add(videFileNameXML);
                            return;
                        }
                    }
                }

                //Base Case Ended

                //File.AppendAllText(channelCleanedName + "/" + log, "\t\tTotal Record : " + total + "; Start Index : " + startIndex + Environment.NewLine);
                foreach (XmlNode entry in listResult)
                {
                    bool idFound = false;
                    bool titleFound = false;
                    foreach (XmlNode node in entry.ChildNodes)
                    {
                        if (node.Name.Equals("id"))
                        {
                            videoUrl = node.InnerText;
                            string id = videoUrl;
                            string[] arrId = id.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries);
                            videoId = arrId[arrId.Length - 1];
                            idFound = true;
                        }
                        else if (node.Name.Equals("title"))
                        {
                            videoName = node.InnerText;
                            titleFound = true;
                        }
                        if (idFound && titleFound)
                        {
                            if (videoDictionary != null && !videoDictionary.ContainsKey(videoId))
                            {
                                VideoWrapper vWrapper = new VideoWrapper(videoName, videoId, videoUrl, channelUrlMain);
                                videoDictionary.Add(videoId, vWrapper);
                                File.AppendAllText(channelCleanedName + "/" + channelFileName, "\t" + videoName + "\r\n");

                                //Multithreading 2/5/2013
                                Produce(vWrapper);
                                //Done 2/5/2013

                                recordCount++;
                            }
                            break;
                        }
                    }
                    int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString());
                    if (totalVideo <= recordCount && ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase))
                    {
                        Constant.tempFiles.Add(videFileNameXML);
                        return;
                    }
                }
                startIndex += 25;
                if (requestType == Enumeration.VideoRequestType.All)
                {
                    WriteVideoLists(pChannelName, channelId, startIndex, videoDictionary, Enumeration.VideoRequestType.All); //Recursive Call
                }
            }
            catch (Exception ex)
            {
                //exceptionCounter++;
                //if (exceptionCounter == 2)
                //{
                //    exceptionCounter = 0;
                //    return;
                //}
                //File.AppendAllText(Common.CleanFileName(pChannelName) + "/zaheerexception videolist " + log, "\t\tException Found : " + ex.Message + Environment.NewLine + "startIndex = " + startIndex + Environment.NewLine + Environment.NewLine);
                //startIndex += 25;
                Thread.Sleep(10000);
                if (requestType == Enumeration.VideoRequestType.All)
                {
                    WriteVideoLists(pChannelName, channelId, startIndex, videoDictionary, Enumeration.VideoRequestType.All); //Recursive Call
                }
            }
        }
コード例 #10
0
ファイル: Channel.cs プロジェクト: gcnonato/Crawler
 public static void Produce(VideoWrapper videoDictionary)
 {
     ThreadPool.QueueUserWorkItem(
     new WaitCallback(Consume), videoDictionary);
     QueueLength++;
     //File.AppendAllText("Producer.txt", "Producing : " + QueueLength + Environment.NewLine);
     Console.WriteLine("producing" + QueueLength);
 }
コード例 #11
0
ファイル: ChannelComment.cs プロジェクト: gcnonato/Crawler
        public static void GetAllComments(VideoWrapper pVideoWrapper, string pChannelName, Dictionary<int, string> pHtmlFiles)
        {
            string videoUrl = "https://www.youtube.com/watch?v=" + pVideoWrapper.getVideoKey();
            bool videoUrlFlag = false;
            List<string> tempFiles = new List<string>();
            foreach (KeyValuePair<int, string> pair in pHtmlFiles)
            {
                try
                {

                    string videoName = pair.Value;
                    //string videoName = "Machinima PlayStation Viewer's Choice LiveStream!-1";
                    //Stream stream = File.OpenRead("New folder/Machinima PlayStation Viewer's Choice LiveStream!-1.html");
                    Stream stream = File.OpenRead(pChannelName + "/Comments/" + videoName);
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    StreamReader reader = new StreamReader(stream);
                    doc.LoadHtml(reader.ReadToEnd().ToString());
                    bool breakLoop = false;

                    HtmlNodeCollection totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']//div[@class='content']");

                    foreach (HtmlNode node in totalCollection)
                    {
                        //string[] userArr = node.InnerText.Split(new Char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
                        string user = string.Empty;
                        string displayName = string.Empty;
                        string date = string.Empty;
                        string comment = string.Empty;
                        HtmlNode nodeData = node.ParentNode;
                        string dataId = nodeData.Attributes[2].Value.Trim();
                        string authorId = nodeData.Attributes[1].Value.Trim();
                        HtmlNodeCollection childNodes = node.ChildNodes;
                        int divCount = 0;
                        foreach (HtmlNode child in childNodes)
                        {
                            if (child.Name.Equals("p"))
                            {
                                bool userFlag = false;
                                //bool dateFlag = false;
                                HtmlNodeCollection col = child.ChildNodes;
                                foreach (HtmlNode n in col)
                                {
                                    if (n.Name.Equals("span") && !userFlag)
                                    {
                                        foreach (HtmlNode nNode in n.ChildNodes)
                                        {
                                            if (nNode.Name.Equals("a"))
                                            {
                                                user = nNode.Attributes["href"].Value.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)[1];
                                                break;
                                            }
                                        }
                                        displayName = n.InnerText.Trim();
                                        userFlag = true;
                                    }
                                    else if (n.Name.Equals("span"))
                                    {
                                        date = n.InnerText.Trim();
                                        //dateFlag = true;
                                        break;
                                    }
                                }
                            }
                            else if (child.Name.Equals("div"))
                            {
                                if (divCount == 0)
                                {
                                    //That means Its Comment Text
                                    comment = child.InnerText.Trim();
                                    divCount++;
                                }
                            }
                        }
                        if (!displayName.Equals("") && !comment.Equals("") && !dataId.Equals("") && !authorId.Equals("") && !user.Equals("") && !GlobalConstants.commentDictionary.ContainsKey(dataId))
                        {
                            VideoCommentWrapper commentWrapper = new VideoCommentWrapper();

                            commentWrapper.authorId = authorId;
                            commentWrapper.commentId = dataId;
                            commentWrapper.commentText = comment;
                            commentWrapper.time = date;
                            commentWrapper.displayName = displayName;
                            commentWrapper.userName = user;

                            GlobalConstants.commentDictionary.Add(dataId, commentWrapper);

                            string videoFileName = pVideoWrapper.getVideoName();
                            //videoFile = videoName;
                            videoName = Common.CleanFileName(videoFileName + "-" + fileComment) + ".txt";
                            if (!Directory.Exists(pChannelName + "/" + "Comments"))
                            {
                                Directory.CreateDirectory(pChannelName + "/" + "Comments");
                            }
                            commentCount++;
                            if (!videoUrlFlag)
                            {
                                File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Video Url : " + videoUrl + Environment.NewLine + "\r\n");
                                videoUrlFlag = true;
                            }
                            File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "User name : " + displayName + Environment.NewLine);
                            File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment Date : " + date + Environment.NewLine);
                            File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment : " + comment + Environment.NewLine);
                        }
                        if (parseAllComments.Equals("false", StringComparison.CurrentCultureIgnoreCase))
                        {
                            if (totalCommentsParse <= commentCount)
                            {
                                breakLoop = true;
                                break;
                            }
                        }

                    }
                    reader.Close();

                    if (breakLoop)
                        break;
                }
                catch (Exception ex)
                {
                    //File.AppendAllText("Logs Exception Comments.txt", ex.Message + Environment.NewLine + Environment.NewLine);
                    continue;
                }
            }
            foreach (KeyValuePair<int, string> file in pHtmlFiles)
            {
                tempFiles.Add("/Comments/" + file.Value);
            }

            Common.RemoveTempFiles(tempFiles, pChannelName);
        }
コード例 #12
0
        public static void ExtractFromUserFavourite(string pChannelName, string pUserId, int pStartIndex)
        {
            try
            {
                string channelFileName    = ConfigurationManager.AppSettings["channelsFileName"].ToString();
                string channelFileNameXML = ConfigurationManager.AppSettings["channelsFileNameXML"].ToString();
                string channelCleanedName = Common.CleanFileName(pChannelName);
                //File.AppendAllText(channelCleanedName + "/" + log, "Entered Inside Parse Channel at : " + DateTime.Now + Environment.NewLine + Environment.NewLine);
                //For Debugging
                if (ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase))
                {
                    int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString());
                    if (totalVideo <= recordCount)
                    {
                        //Constant.tempFiles.Add(videFileNameXML);
                        return;
                    }
                }
                //string channelUrl = ConfigurationManager.AppSettings["ChannelSearchUrl"].ToString() + pChannelName + "&start-index=1&max-results=10&v=2";
                WebRequest      nameRequest;
                HttpWebResponse nameResponse;
                Stream          nameStream;
                StreamReader    nameReader;

                //File.WriteAllText(pChannelName + "/" + channelFileNameXML, xmlData);

                //Other type of extraction here
                //Extract Playlists
                string favouriteUrl = "https://gdata.youtube.com/feeds/api/users/" + pUserId + "/favorites?start-index=" + pStartIndex + "&v=2";    //This will return all Playlists of this user
                nameRequest  = WebRequest.Create(favouriteUrl);
                nameResponse = (HttpWebResponse)nameRequest.GetResponse();

                nameStream = nameResponse.GetResponseStream();
                nameReader = new StreamReader(nameStream);

                string xmlData = nameReader.ReadToEnd();

                XmlDocument doc = new XmlDocument();
                //doc.Load(pChannelName + "/" + channelFileNameXML);


                File.WriteAllText(channelCleanedName + "/" + channelFileNameXML, xmlData);
                Constant.tempFiles.Add(channelFileNameXML);

                doc = new XmlDocument();
                doc.Load(channelCleanedName + "/" + channelFileNameXML);
                XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable);
                namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom");

                XmlNamespaceManager openSearchNameSpace = new XmlNamespaceManager(doc.NameTable);
                openSearchNameSpace.AddNamespace("openSearch", "http://a9.com/-/spec/opensearch/1.1/");
                XmlNode totalRecordNode = doc.SelectSingleNode("//openSearch:totalResults", openSearchNameSpace);
                if (totalRecordNode.InnerText.Equals("0"))
                {
                    return;
                }
                if (totalRecordNode != null && !totalRecordNode.InnerText.Equals("0"))
                {
                    XmlNode titleNode = doc.SelectSingleNode("//Atom:title", namespaceManager);
                    File.AppendAllText(channelCleanedName + "/" + channelFileName, titleNode.InnerText + "\r\n");
                    Dictionary <string, PlaylistWrapper> dictionaryPlayList = new Dictionary <string, PlaylistWrapper>();

                    XmlNodeList listNodes = doc.SelectNodes("//Atom:entry", namespaceManager);
                    if (listNodes.Count == 0)
                    {
                        return;
                    }
                    StringBuilder strBuilder = new StringBuilder();
                    strBuilder.Append("\tFavourite Videos:\r\n");
                    string title  = String.Empty;
                    string key    = String.Empty;
                    string url    = String.Empty;
                    string apiURL = String.Empty;
                    Dictionary <string, VideoWrapper> dictionaryVideoWrapper = new Dictionary <string, VideoWrapper>();
                    foreach (XmlNode n in listNodes)
                    {
                        foreach (XmlNode node in n.ChildNodes)
                        {
                            if (node.Name.Equals("title"))
                            {
                                title = node.InnerText;
                            }
                            else if (node.Name.Equals("link"))
                            {
                                if (node.Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase))
                                {
                                    url = node.Attributes["href"].Value.Split(new Char[] { '&' }, StringSplitOptions.RemoveEmptyEntries)[0];
                                    key = url.Split(new Char[] { '=' }, StringSplitOptions.RemoveEmptyEntries)[1];
                                }
                            }
                        }


                        strBuilder.Append("\t\tVideo Name: " + title + "\r\n");
                        if (!dictionaryVideoWrapper.ContainsKey(key))
                        {
                            VideoWrapper vWrapper = new VideoWrapper();
                            vWrapper.setVideoKey(key);
                            vWrapper.setVideoName(title);
                            vWrapper.setVideoUrl(url);
                            dictionaryVideoWrapper.Add(key, vWrapper);
                            updatedFlag = true;
                            recordCount++;
                        }
                        if (updatedFlag)
                        {
                            File.AppendAllText(channelCleanedName + "/" + channelFileName, strBuilder.ToString());
                        }
                        updatedFlag = false;
                        strBuilder.Remove(0, strBuilder.Length);
                    }
                    ChannelVideo.parseVideo(dictionaryVideoWrapper, pChannelName);
                    ChannelComment.CrawlComments(dictionaryVideoWrapper, pChannelName);
                    pStartIndex += 25;
                    ExtractFromUserFavourite(pChannelName, pUserId, pStartIndex);
                }
                Common.RemoveTempFiles(Constant.tempFiles, channelCleanedName);
            }
            catch (Exception ex)
            {
                //ExtractFromUserFavourite(pChannelName, pUserId, pStartIndex);
            }
        }
コード例 #13
0
        public static void GetPlaylistVideos(string pChannelName, string pPlaylistURL, Dictionary <string, VideoWrapper> pDictionaryVideoWrapper, StringBuilder strBuilder, int pStartIndex)
        {
            try
            {
                string channelFileName    = ConfigurationManager.AppSettings["channelsFileName"].ToString();
                string channelFileNameXML = "Playlist-" + ConfigurationManager.AppSettings["channelsFileNameXML"].ToString();
                string channelCleanedName = Common.CleanFileName(pChannelName);
                //For Debugging
                if (ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase))
                {
                    int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString());
                    if (totalVideo <= recordCount)
                    {
                        //Constant.tempFiles.Add(videFileNameXML);
                        return;
                    }
                }


                WebRequest      nameRequest  = WebRequest.Create(pPlaylistURL + "?start-index=" + pStartIndex);
                HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse();
                Stream          nameStream   = nameResponse.GetResponseStream();
                StreamReader    nameReader   = new StreamReader(nameStream);

                string xmlData = nameReader.ReadToEnd();

                File.WriteAllText(channelCleanedName + "/" + channelFileNameXML, xmlData);
                Constant.tempFiles.Add(channelFileNameXML);

                XmlDocument doc = new XmlDocument();
                doc.Load(channelCleanedName + "/" + channelFileNameXML);
                XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable);
                namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom");
                namespaceManager.AddNamespace("openSearch", "http://a9.com/-/spec/opensearchrss/1.0/");

                //XmlNamespaceManager openSearchNameSpace = new XmlNamespaceManager(doc.NameTable);
                //openSearchNameSpace.AddNamespace("openSearch", "http://a9.com/-/spec/opensearch/1.1/");
                XmlNode totalRecordNode = doc.SelectNodes("//openSearch:totalResults", namespaceManager)[0];//SelectSingleNode("//openSearch:totalResults", namespaceManager);
                if (totalRecordNode.InnerText.Equals("0"))
                {
                    return;
                }
                if (totalRecordNode != null && !totalRecordNode.InnerText.Equals("0"))
                {
                    XmlNodeList listNodes = doc.SelectNodes("//Atom:entry", namespaceManager);
                    if (listNodes.Count == 0)
                    {
                        return;
                    }
                    string title = String.Empty;
                    string url   = String.Empty;
                    string key   = string.Empty;
                    foreach (XmlNode n in listNodes)
                    {
                        foreach (XmlNode node in n.ChildNodes)
                        {
                            if (node.Name.Equals("title"))
                            {
                                title = node.InnerText;
                            }
                            else if (node.Name.Equals("link"))
                            {
                                if (node.Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase))
                                {
                                    string[] linkArr = node.Attributes["href"].Value.Split(new Char[] { '=', '&' }, StringSplitOptions.RemoveEmptyEntries);
                                    key = linkArr[1];
                                    url = "http://www.youtube.com/watch?v=" + key;
                                }
                            }
                        }
                        if (!pDictionaryVideoWrapper.ContainsKey(key))
                        {
                            recordCount++;
                            VideoWrapper vWrapper = new VideoWrapper();
                            vWrapper.setVideoKey(key);
                            vWrapper.setVideoName(title);
                            vWrapper.setVideoUrl(url);

                            pDictionaryVideoWrapper.Add(key, vWrapper);
                            strBuilder.Append("\t\t" + title + "\r\n");
                            updatedFlag = true;
                        }
                    }
                }
                pStartIndex++;
                GetPlaylistVideos(pChannelName, pPlaylistURL, pDictionaryVideoWrapper, strBuilder, pStartIndex);
                Common.RemoveTempFiles(Constant.tempFiles, channelCleanedName);
            }
            catch (Exception ex)
            {
                GetPlaylistVideos(pChannelName, pPlaylistURL, pDictionaryVideoWrapper, strBuilder, pStartIndex);
            }
        }
コード例 #14
0
        public static void WriteVideoLists(string pChannelName, string pChannelId, int startIndex, Dictionary <string, VideoWrapper> videoDictionary, Enumeration.VideoRequestType requestType)
        {
            try
            {
                //Base Case of Recursion
                //if (startIndex >= 1000)
                //    return;
                //Base Case Ended of Recursion
                string videoName = String.Empty;
                string videoUrl  = String.Empty;
                //string url = String.Empty;
                string videoId = String.Empty;

                string videoFileName      = ConfigurationManager.AppSettings["channelsVideoFile"].ToString();
                string videFileNameXML    = ConfigurationManager.AppSettings["channelsVideoFileXML"].ToString();
                string channelFileName    = ConfigurationManager.AppSettings["channelsFileName"].ToString();
                string channelCleanedName = Common.CleanFileName(pChannelName);
                string channelUrl         = String.Empty;
                if (requestType == Enumeration.VideoRequestType.All)
                {
                    //http://gdata.youtube.com/feeds/api/users/machinima/uploads?start-index=4000
                    channelUrl = "https://gdata.youtube.com/feeds/api/users/" + pChannelName + "/uploads?&start-index=" + startIndex;
                }


                HttpWebRequest nameRequest = (HttpWebRequest)WebRequest.Create(channelUrl);
                nameRequest.KeepAlive       = false;
                nameRequest.ProtocolVersion = HttpVersion.Version10;
                HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse();

                Stream       nameStream = nameResponse.GetResponseStream();
                StreamReader nameReader = new StreamReader(nameStream);

                string xmlData = nameReader.ReadToEnd();
                File.WriteAllText(channelCleanedName + "/" + videFileNameXML, xmlData);

                XmlDocument doc = new XmlDocument();
                doc.Load(channelCleanedName + "/" + videFileNameXML);
                XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable);
                namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom");
                XmlNodeList listResult = doc.SelectNodes(channelAtomEntry, namespaceManager);

                ////Getting total Record
                XmlNamespaceManager namespaceManager1 = new XmlNamespaceManager(doc.NameTable);
                namespaceManager1.AddNamespace("openSearch", "http://a9.com/-/spec/opensearchrss/1.0/");

                XmlNode nodeTotal = doc.SelectSingleNode("//openSearch:totalResults", namespaceManager1);
                int     total     = Int32.Parse(nodeTotal.InnerText);

                //Base Case Started

                if (total == 0)
                {
                    Constant.tempFiles.Add(videFileNameXML);
                    return;
                }
                if (listResult == null)
                {
                    Constant.tempFiles.Add(videFileNameXML);
                    return;
                }

                string flag = ConfigurationManager.AppSettings["testingFlag"].ToString();
                if (flag.Equals("true", StringComparison.CurrentCultureIgnoreCase))
                {
                    if (startIndex >= 26)
                    {
                        Constant.tempFiles.Add(videFileNameXML);
                        return;
                    }
                }
                else
                {
                    if (ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase))
                    {
                        int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString());
                        if (totalVideo <= recordCount)
                        {
                            Constant.tempFiles.Add(videFileNameXML);
                            return;
                        }
                    }
                    else
                    {
                        //This check is not needed as base case is checked already.
                        if (total <= startIndex)
                        {
                            Constant.tempFiles.Add(videFileNameXML);
                            return;
                        }
                    }
                }

                //Base Case Ended

                //File.AppendAllText(channelCleanedName + "/" + log, "\t\tTotal Record : " + total + "; Start Index : " + startIndex + Environment.NewLine);
                foreach (XmlNode entry in listResult)
                {
                    bool idFound    = false;
                    bool titleFound = false;
                    foreach (XmlNode node in entry.ChildNodes)
                    {
                        if (node.Name.Equals("id"))
                        {
                            videoUrl = node.InnerText;
                            string   id    = videoUrl;
                            string[] arrId = id.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries);
                            videoId = arrId[arrId.Length - 1];
                            idFound = true;
                        }
                        else if (node.Name.Equals("title"))
                        {
                            videoName  = node.InnerText;
                            titleFound = true;
                        }
                        if (idFound && titleFound)
                        {
                            if (videoDictionary != null && !videoDictionary.ContainsKey(videoId))
                            {
                                VideoWrapper vWrapper = new VideoWrapper(videoName, videoId, videoUrl, channelUrlMain);
                                videoDictionary.Add(videoId, vWrapper);
                                File.AppendAllText(channelCleanedName + "/" + channelFileName, "\t" + videoName + "\r\n");

                                //Multithreading 2/5/2013
                                Produce(vWrapper);
                                //Done 2/5/2013

                                recordCount++;
                            }
                            break;
                        }
                    }
                    int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString());
                    if (totalVideo <= recordCount && ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase))
                    {
                        Constant.tempFiles.Add(videFileNameXML);
                        return;
                    }
                }
                startIndex += 25;
                if (requestType == Enumeration.VideoRequestType.All)
                {
                    WriteVideoLists(pChannelName, channelId, startIndex, videoDictionary, Enumeration.VideoRequestType.All); //Recursive Call
                }
            }
            catch (Exception ex)
            {
                //exceptionCounter++;
                //if (exceptionCounter == 2)
                //{
                //    exceptionCounter = 0;
                //    return;
                //}
                //File.AppendAllText(Common.CleanFileName(pChannelName) + "/zaheerexception videolist " + log, "\t\tException Found : " + ex.Message + Environment.NewLine + "startIndex = " + startIndex + Environment.NewLine + Environment.NewLine);
                //startIndex += 25;
                Thread.Sleep(10000);
                if (requestType == Enumeration.VideoRequestType.All)
                {
                    WriteVideoLists(pChannelName, channelId, startIndex, videoDictionary, Enumeration.VideoRequestType.All); //Recursive Call
                }
            }
        }
コード例 #15
0
ファイル: ChannelComment.cs プロジェクト: gcnonato/Crawler
 public static void Produce(VideoWrapper videoDictionary)
 {
     ThreadPool.QueueUserWorkItem(
     new WaitCallback(Consume), videoDictionary);
     QueueLength++;
     Console.WriteLine("producing" + QueueLength);
 }
コード例 #16
0
ファイル: Channel.cs プロジェクト: gcnonato/Crawler
        public static void ExtractFromUserFavourite(string pChannelName, string pUserId, int pStartIndex)
        {
            try
            {
                string channelFileName = ConfigurationManager.AppSettings["channelsFileName"].ToString();
                string channelFileNameXML = ConfigurationManager.AppSettings["channelsFileNameXML"].ToString();
                string channelCleanedName = Common.CleanFileName(pChannelName);
                //File.AppendAllText(channelCleanedName + "/" + log, "Entered Inside Parse Channel at : " + DateTime.Now + Environment.NewLine + Environment.NewLine);
                //For Debugging
                if (ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase))
                {
                    int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString());
                    if (totalVideo <= recordCount)
                    {
                        //Constant.tempFiles.Add(videFileNameXML);
                        return;
                    }
                }
                //string channelUrl = ConfigurationManager.AppSettings["ChannelSearchUrl"].ToString() + pChannelName + "&start-index=1&max-results=10&v=2";
                WebRequest nameRequest;
                HttpWebResponse nameResponse;
                Stream nameStream;
                StreamReader nameReader;

                //File.WriteAllText(pChannelName + "/" + channelFileNameXML, xmlData);

                //Other type of extraction here
                //Extract Playlists
                string favouriteUrl = "https://gdata.youtube.com/feeds/api/users/" + pUserId + "/favorites?start-index=" + pStartIndex + "&v=2";    //This will return all Playlists of this user
                nameRequest = WebRequest.Create(favouriteUrl);
                nameResponse = (HttpWebResponse)nameRequest.GetResponse();

                nameStream = nameResponse.GetResponseStream();
                nameReader = new StreamReader(nameStream);

                string xmlData = nameReader.ReadToEnd();

                XmlDocument doc = new XmlDocument();
                //doc.Load(pChannelName + "/" + channelFileNameXML);

                File.WriteAllText(channelCleanedName + "/" + channelFileNameXML, xmlData);
                Constant.tempFiles.Add(channelFileNameXML);

                doc = new XmlDocument();
                doc.Load(channelCleanedName + "/" + channelFileNameXML);
                XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable);
                namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom");

                XmlNamespaceManager openSearchNameSpace = new XmlNamespaceManager(doc.NameTable);
                openSearchNameSpace.AddNamespace("openSearch", "http://a9.com/-/spec/opensearch/1.1/");
                XmlNode totalRecordNode = doc.SelectSingleNode("//openSearch:totalResults", openSearchNameSpace);
                if (totalRecordNode.InnerText.Equals("0"))
                    return;
                if (totalRecordNode != null && !totalRecordNode.InnerText.Equals("0"))
                {
                    XmlNode titleNode = doc.SelectSingleNode("//Atom:title", namespaceManager);
                    File.AppendAllText(channelCleanedName + "/" + channelFileName, titleNode.InnerText + "\r\n");
                    Dictionary<string, PlaylistWrapper> dictionaryPlayList = new Dictionary<string, PlaylistWrapper>();

                    XmlNodeList listNodes = doc.SelectNodes("//Atom:entry", namespaceManager);
                    if (listNodes.Count == 0)
                        return;
                    StringBuilder strBuilder = new StringBuilder();
                    strBuilder.Append("\tFavourite Videos:\r\n");
                    string title = String.Empty;
                    string key = String.Empty;
                    string url = String.Empty;
                    string apiURL = String.Empty;
                    Dictionary<string, VideoWrapper> dictionaryVideoWrapper = new Dictionary<string, VideoWrapper>();
                    foreach (XmlNode n in listNodes)
                    {
                        foreach (XmlNode node in n.ChildNodes)
                        {
                            if (node.Name.Equals("title"))
                            {
                                title = node.InnerText;
                            }
                            else if (node.Name.Equals("link"))
                            {
                                if (node.Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase))
                                {
                                    url = node.Attributes["href"].Value.Split(new Char[] { '&' }, StringSplitOptions.RemoveEmptyEntries)[0];
                                    key = url.Split(new Char[] { '=' }, StringSplitOptions.RemoveEmptyEntries)[1];
                                }
                            }
                        }

                        strBuilder.Append("\t\tVideo Name: " + title + "\r\n");
                        if (!dictionaryVideoWrapper.ContainsKey(key))
                        {
                            VideoWrapper vWrapper = new VideoWrapper();
                            vWrapper.setVideoKey(key);
                            vWrapper.setVideoName(title);
                            vWrapper.setVideoUrl(url);
                            dictionaryVideoWrapper.Add(key, vWrapper);
                            updatedFlag = true;
                            recordCount++;
                        }
                        if (updatedFlag)
                            File.AppendAllText(channelCleanedName + "/" + channelFileName, strBuilder.ToString());
                        updatedFlag = false;
                        strBuilder.Remove(0, strBuilder.Length);
                    }
                    ChannelVideo.parseVideo(dictionaryVideoWrapper, pChannelName);
                    ChannelComment.CrawlComments(dictionaryVideoWrapper, pChannelName);
                    pStartIndex += 25;
                    ExtractFromUserFavourite(pChannelName, pUserId, pStartIndex);
                }
                Common.RemoveTempFiles(Constant.tempFiles, channelCleanedName);
            }
            catch (Exception ex)
            {
                //ExtractFromUserFavourite(pChannelName, pUserId, pStartIndex);
            }
        }
コード例 #17
0
ファイル: Channel.cs プロジェクト: gcnonato/Crawler
        public static void GetPlaylistVideos(string pChannelName, string pPlaylistURL, Dictionary<string, VideoWrapper> pDictionaryVideoWrapper, StringBuilder strBuilder, int pStartIndex)
        {
            try
            {
                string channelFileName = ConfigurationManager.AppSettings["channelsFileName"].ToString();
                string channelFileNameXML = "Playlist-" + ConfigurationManager.AppSettings["channelsFileNameXML"].ToString();
                string channelCleanedName = Common.CleanFileName(pChannelName);
                //For Debugging
                if (ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase))
                {
                    int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString());
                    if (totalVideo <= recordCount)
                    {
                        //Constant.tempFiles.Add(videFileNameXML);
                        return;
                    }
                }

                WebRequest nameRequest = WebRequest.Create(pPlaylistURL + "?start-index=" + pStartIndex);
                HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse();
                Stream nameStream = nameResponse.GetResponseStream();
                StreamReader nameReader = new StreamReader(nameStream);

                string xmlData = nameReader.ReadToEnd();

                File.WriteAllText(channelCleanedName + "/" + channelFileNameXML, xmlData);
                Constant.tempFiles.Add(channelFileNameXML);

                XmlDocument doc = new XmlDocument();
                doc.Load(channelCleanedName + "/" + channelFileNameXML);
                XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable);
                namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom");
                namespaceManager.AddNamespace("openSearch", "http://a9.com/-/spec/opensearchrss/1.0/");

                //XmlNamespaceManager openSearchNameSpace = new XmlNamespaceManager(doc.NameTable);
                //openSearchNameSpace.AddNamespace("openSearch", "http://a9.com/-/spec/opensearch/1.1/");
                XmlNode totalRecordNode = doc.SelectNodes("//openSearch:totalResults", namespaceManager)[0];//SelectSingleNode("//openSearch:totalResults", namespaceManager);
                if(totalRecordNode.InnerText.Equals("0"))
                    return;
                if (totalRecordNode != null && !totalRecordNode.InnerText.Equals("0"))
                {
                    XmlNodeList listNodes = doc.SelectNodes("//Atom:entry", namespaceManager);
                    if (listNodes.Count == 0)
                        return;
                    string title = String.Empty;
                    string url = String.Empty;
                    string key = string.Empty;
                    foreach (XmlNode n in listNodes)
                    {
                        foreach (XmlNode node in n.ChildNodes)
                        {
                            if (node.Name.Equals("title"))
                            {
                                title = node.InnerText;
                            }
                            else if (node.Name.Equals("link"))
                            {
                                if (node.Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase))
                                {
                                    string[] linkArr = node.Attributes["href"].Value.Split(new Char[] { '=', '&' }, StringSplitOptions.RemoveEmptyEntries);
                                    key = linkArr[1];
                                    url = "http://www.youtube.com/watch?v=" + key;
                                }
                            }
                        }
                        if (!pDictionaryVideoWrapper.ContainsKey(key))
                        {
                            recordCount++;
                            VideoWrapper vWrapper = new VideoWrapper();
                            vWrapper.setVideoKey(key);
                            vWrapper.setVideoName(title);
                            vWrapper.setVideoUrl(url);

                            pDictionaryVideoWrapper.Add(key, vWrapper);
                            strBuilder.Append("\t\t" + title + "\r\n");
                            updatedFlag = true;
                        }
                    }
                }
                pStartIndex++;
                GetPlaylistVideos(pChannelName, pPlaylistURL, pDictionaryVideoWrapper, strBuilder, pStartIndex);
                Common.RemoveTempFiles(Constant.tempFiles, channelCleanedName);
            }
            catch (Exception ex)
            {
                GetPlaylistVideos(pChannelName, pPlaylistURL, pDictionaryVideoWrapper, strBuilder, pStartIndex);
            }
        }
コード例 #18
0
ファイル: ChannelVideo.cs プロジェクト: QiseD/Crawler
        public static void parseVideo(VideoWrapper pVideoWrapper, string pChannelName)
        {
            string fileVideo        = ConfigurationManager.AppSettings["channelVideo"].ToString();
            string videoChannelName = string.Empty;
            string videoName        = string.Empty;
            string date             = string.Empty;
            string iDislike         = string.Empty;
            string iLike            = string.Empty;
            string description      = string.Empty;
            string url = string.Empty;
            string videoChannelFileCleaned = Common.CleanFileName(pChannelName);

            try
            {
                string channelFileNameXML = Common.CleanFileName(pVideoWrapper.getVideoName()) + "-" + ConfigurationManager.AppSettings["channelsFileNameXML"].ToString();

                string videoUrl = string.Format("https://gdata.youtube.com/feeds/api/videos/{0}?v=2", pVideoWrapper.getVideoKey());

                WebRequest      nameRequest  = WebRequest.Create(videoUrl);
                HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse();
                Stream          nameStream   = nameResponse.GetResponseStream();
                StreamReader    nameReader   = new StreamReader(nameStream);
                string          xmlData      = nameReader.ReadToEnd();
                File.WriteAllText(channelFileNameXML, xmlData);

                XmlDocument doc = new XmlDocument();
                doc.Load(channelFileNameXML);
                XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable);

                namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom");
                namespaceManager.AddNamespace("yt", "http://gdata.youtube.com/schemas/2007");
                namespaceManager.AddNamespace("media", "http://search.yahoo.com/mrss/");

                XmlNode node = doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager);

                VideoWrapper videoWrapper = pVideoWrapper;
                //VideoInfoWrapper obj = new VideoInfoWrapper
                //{
                if (doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager) != null && doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager).Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase))
                {
                    url = doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager).Attributes["href"].Value;
                    string[] urlArr = url.Split(new Char[] { '&' }, StringSplitOptions.RemoveEmptyEntries);
                    url = urlArr[0];
                }
                videoChannelName = videoChannelFileCleaned;//doc.SelectSingleNode("//Atom:entry/Atom:author/Atom:name", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/Atom:author/Atom:name", namespaceManager).InnerText.ToString() : string.Empty;
                videoName        = doc.SelectSingleNode("//Atom:entry/Atom:title", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/Atom:title", namespaceManager).InnerText.ToString() : string.Empty;

                date = doc.SelectSingleNode("//Atom:entry/Atom:published", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/Atom:published", namespaceManager).InnerText.ToString() : string.Empty;

                iDislike = doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numDislikes"] != null?doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numDislikes"].Value.ToString() : string.Empty : string.Empty;

                iLike = doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numLikes"] != null?doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numLikes"].Value.ToString() : string.Empty : string.Empty;

                description = doc.SelectSingleNode(" //Atom:entry/media:group/media:description", namespaceManager) != null?doc.SelectSingleNode(" //Atom:entry/media:group/media:description", namespaceManager).InnerText.ToString() : string.Empty;

                List <string> videoTags = preapreParamsTags(doc.SelectNodes("//Atom:entry/Atom:category", namespaceManager)) != null?preapreParamsTags(doc.SelectNodes("//Atom:entry/Atom:category", namespaceManager)) : null;

                string videoViewCount = doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager).Attributes["viewCount"] != null?doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager).Attributes["viewCount"].Value : string.Empty : string.Empty;

                //};
                string videoNameFile = Common.CleanFileName(videoName + "-" + fileVideo);

                if (!Directory.Exists(videoChannelName + "/" + "Videos"))
                {
                    Directory.CreateDirectory(videoChannelName + "/" + "Videos");
                }
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Channel : " + videoChannelName + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Channel Url : " + videoWrapper.getChannelUrl() + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Name : " + videoName + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Url : " + url + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Date : " + date + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Views : " + videoViewCount + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "I Like : " + iLike + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "I dislike : " + iDislike + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Description : " + description + Environment.NewLine);
                File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Tags : " + string.Join(",", videoTags.ToArray()) + Environment.NewLine);

                File.Delete(channelFileNameXML);
            }
            catch (Exception ex)
            {
                //File.AppendAllText("video_channel_" + videoName + ".txt", "Videoname: " + videoName + ";  Exception" + " : " + ex.ToString() + Environment.NewLine);
                Thread.Sleep(10000);
                parseVideo(pVideoWrapper, pChannelName);
            }
        }
コード例 #19
0
ファイル: ChannelComment.cs プロジェクト: gcnonato/Crawler
        public static void DownloadHtmls(string pChannelName, VideoWrapper pVideo, Dictionary<int, string> pHtmlFiles, int pPageNo)
        {
            string url = string.Empty;
            try
            {
                url = ConfigurationManager.AppSettings["VideoAllCommentsUrl"].ToString() + pVideo.getVideoKey() + "&page=" + pPageNo;
                //string url = "http://www.youtube.com/all_comments?v=LMiNEC1M-zY" + "&page=" + pPageNo;
                ///Base Case
                ///
                HtmlWeb hwObject = new HtmlWeb();
                //hwObject.UseCookies = false; // Experimental
                //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " going to hit URL at page # " + pPageNo + ".. " + DateTime.Now + Environment.NewLine);
                HtmlDocument doc = hwObject.Load(url);
                //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " got response of page # " + pPageNo + ".." + DateTime.Now + Environment.NewLine);

                HtmlNodeCollection totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']");
                if (totalCollection == null)
                    return;
                int totalCollectionCount = totalCollection.Count;
                if (totalCollectionCount <= 0)
                    return;
                ///Base Case Ended
                ///

                //Code Added by Me Right Now ....
                ///

                totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']//div[@class='content']");
                string videoUrl = "https://www.youtube.com/watch?v=" + pVideo.getVideoKey();
                bool videoUrlFlag = false;
                bool breakLoop = false;
                //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " starting to extract data.." + Environment.NewLine);
                foreach (HtmlNode node in totalCollection)
                {
                    //string[] userArr = node.InnerText.Split(new Char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries);
                    string user = string.Empty;
                    string displayName = string.Empty;
                    string date = string.Empty;
                    string comment = string.Empty;
                    HtmlNode nodeData = node.ParentNode;
                    string dataId = nodeData.Attributes[2].Value.Trim();
                    string authorId = nodeData.Attributes[1].Value.Trim();
                    HtmlNodeCollection childNodes = node.ChildNodes;
                    int divCount = 0;
                    foreach (HtmlNode child in childNodes)
                    {
                        if (child.Name.Equals("p"))
                        {
                            bool userFlag = false;
                            //bool dateFlag = false;
                            HtmlNodeCollection col = child.ChildNodes;
                            foreach (HtmlNode n in col)
                            {
                                if (n.Name.Equals("span") && !userFlag)
                                {
                                    foreach (HtmlNode nNode in n.ChildNodes)
                                    {
                                        if (nNode.Name.Equals("a"))
                                        {
                                            user = nNode.Attributes["href"].Value.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)[1];
                                            break;
                                        }
                                    }
                                    displayName = n.InnerText.Trim();
                                    userFlag = true;
                                }
                                else if (n.Name.Equals("span"))
                                {
                                    date = n.InnerText.Trim();
                                    //dateFlag = true;
                                    break;
                                }
                            }
                        }
                        else if (child.Name.Equals("div"))
                        {
                            if (divCount == 0)
                            {
                                //That means Its Comment Text
                                comment = child.InnerText.Trim();
                                divCount++;
                            }
                        }
                    }

                    //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " starting to write data in file.." + Environment.NewLine);
                    if (!displayName.Equals("") && !comment.Equals("") && !dataId.Equals("") && !authorId.Equals("") && !user.Equals("") && !GlobalConstants.commentDictionary.ContainsKey(dataId))
                    {
                        VideoCommentWrapper commentWrapper = new VideoCommentWrapper();

                        commentWrapper.authorId = authorId;
                        commentWrapper.commentId = dataId;
                        commentWrapper.commentText = comment;
                        commentWrapper.time = date;
                        commentWrapper.displayName = displayName;
                        commentWrapper.userName = user;

                        GlobalConstants.commentDictionary.Add(dataId, commentWrapper);

                        string videoFileName = pVideo.getVideoName();
                        //videoFile = videoName;
                        string videoName = Common.CleanFileName(videoFileName + "-" + fileComment) + ".txt";
                        if (!Directory.Exists(pChannelName + "/" + "Comments"))
                        {
                            Directory.CreateDirectory(pChannelName + "/" + "Comments");
                        }
                        commentCount++;
                        if (!videoUrlFlag)
                        {
                            File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Video Url : " + videoUrl + Environment.NewLine + "\r\n");
                            videoUrlFlag = true;
                        }
                        File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "User name : " + displayName + Environment.NewLine);
                        File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment Date : " + date + Environment.NewLine);
                        File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment : " + comment + Environment.NewLine);
                    }
                    //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " ended writing data in file.." + Environment.NewLine);
                    if (parseAllComments.Equals("false", StringComparison.CurrentCultureIgnoreCase))
                    {
                        if (totalCommentsParse <= commentCount)
                        {
                            breakLoop = true;
                            break;
                        }
                    }

                }
                //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " extracted all data.." + Environment.NewLine);
                ////Ended Added

                ////Commented by Me

                //File.AppendAllText(pChannelName + "/CommentsTimeLog.txt", "Start Download Time for file : " + pVideo.getVideoName() + "-" + pPageNo + ": " + DateTime.Now + Environment.NewLine);
                //WebRequest nameRequest = WebRequest.Create(url);
                //HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse();
                //Stream nameStream = nameResponse.GetResponseStream();
                //StreamReader nameReader = new StreamReader(nameStream);
                //string htmlData = nameReader.ReadToEnd();
                //if (htmlData != null && !htmlData.Equals(""))
                //{
                //    string videoName = pChannelName + "/Comments/" + Common.CleanFileName(pVideo.getVideoName()) + "-" + pPageNo + ".html";
                //    string dictionaryValue = Common.CleanFileName(pVideo.getVideoName()) + "-" + pPageNo + ".html";
                //    if (!Directory.Exists(pChannelName + "/Comments/"))
                //    {
                //        Directory.CreateDirectory(pChannelName + "/Comments/");
                //    }

                //    File.WriteAllText(videoName, htmlData);
                //    File.AppendAllText(pChannelName + "/CommentsTimeLog.txt", "End Download Time for file : " + pVideo.getVideoName() + "-" + pPageNo + ": " + DateTime.Now + Environment.NewLine + Environment.NewLine);
                //    //tempFiles.Add("/Comments/" + dictionaryValue);
                //    pHtmlFiles.Add(pPageNo, dictionaryValue);
                //}

                ////Comment Ended

                pPageNo++;
                if(parseAllComments.Equals("true", StringComparison.CurrentCultureIgnoreCase))
                    DownloadHtmls(pChannelName, pVideo, pHtmlFiles, pPageNo);   //Recursive Call
            }
            catch (Exception ex)
            {
                //Delete Cookies
                //pPageNo++;
                //File.AppendAllText(pChannelName + "/Comments/" + "ExceptionLogs.txt", "Exception : at URL : " + url + " -> Exception Message : " + ex.Message);
                DownloadHtmls(pChannelName, pVideo, pHtmlFiles, pPageNo);
            }
        }