public static void Consume(Object obj) { try { //File.AppendAllText("Consumer.txt", "Consuming : " + QueueLength + Environment.NewLine); //VideoWrapper video = pair.Value; VideoWrapper video = obj as VideoWrapper; ChannelVideo.parseVideo(video, multithreadingChannelName); ///Done Crawling video description QueueLength--; Console.WriteLine("consuming" + QueueLength); if (WaitForComplete) { if (QueueLength == 0) { Event.Set(); } } //break; } catch (Exception ex) { //File.AppendAllText("Exception In Consume.txt", ex.Message + Environment.NewLine); } }
public static void Produce(VideoWrapper videoDictionary) { ThreadPool.QueueUserWorkItem( new WaitCallback(Consume), videoDictionary); QueueLength++; Console.WriteLine("producing" + QueueLength); }
public static void Produce(VideoWrapper videoDictionary) { ThreadPool.QueueUserWorkItem( new WaitCallback(Consume), videoDictionary); QueueLength++; //File.AppendAllText("Producer.txt", "Producing : " + QueueLength + Environment.NewLine); Console.WriteLine("producing" + QueueLength); }
public static bool CrawlComments(Dictionary <string, VideoWrapper> videoDictionary, string pChannelName) { channelName = pChannelName; //Dictionary<int, string> htmlFiles = null; //if(File.Exists("ThreadsLog.txt")) //{ // File.Delete("ThreadsLog.txt"); //} //File.AppendAllText("CommentsTime.txt", "Time Start : " + DateTime.Now); int totalThreads = Int32.Parse(ConfigurationManager.AppSettings["totalThreadsAtOneTime"].ToString()); foreach (KeyValuePair <string, VideoWrapper> pair in videoDictionary) { try { //string videoFile = String.Empty; //int pageNo = 1; VideoWrapper video = pair.Value; Produce(video); while (QueueLength >= totalThreads) { Thread.Sleep(2000); } //htmlFiles = new Dictionary<int, string>(); //DownloadHtmls(pChannelName, video, htmlFiles, pageNo); ////GetAllComments(video, pChannelName, htmlFiles); //commentCount = 0; ////break; } catch (Exception ex) { continue; } } while (QueueLength > 0) { Thread.Sleep(1000); } //File.AppendAllText("CommentsTime.txt", "Time End : " + DateTime.Now); return(true); }
public static void Consume(Object obj) { try { string videoFile = String.Empty; int pageNo = 1; Dictionary <int, string> htmlFiles = null; htmlFiles = new Dictionary <int, string>(); //VideoWrapper video = pair.Value; VideoWrapper video = obj as VideoWrapper; htmlFiles = new Dictionary <int, string>(); DownloadHtmls(channelName, video, htmlFiles, pageNo); // GetAllComments(video, pChannelName, htmlFiles); commentCount = 0; QueueLength--; Console.WriteLine("consuming" + QueueLength); if (WaitForComplete) { if (QueueLength == 0) { Event.Set(); } } //break; } catch (Exception ex) { // continue; } }
public static void GetAllComments(VideoWrapper pVideoWrapper, string pChannelName, Dictionary <int, string> pHtmlFiles) { string videoUrl = "https://www.youtube.com/watch?v=" + pVideoWrapper.getVideoKey(); bool videoUrlFlag = false; List <string> tempFiles = new List <string>(); foreach (KeyValuePair <int, string> pair in pHtmlFiles) { try { string videoName = pair.Value; //string videoName = "Machinima PlayStation Viewer's Choice LiveStream!-1"; //Stream stream = File.OpenRead("New folder/Machinima PlayStation Viewer's Choice LiveStream!-1.html"); Stream stream = File.OpenRead(pChannelName + "/Comments/" + videoName); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); StreamReader reader = new StreamReader(stream); doc.LoadHtml(reader.ReadToEnd().ToString()); bool breakLoop = false; HtmlNodeCollection totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']//div[@class='content']"); foreach (HtmlNode node in totalCollection) { //string[] userArr = node.InnerText.Split(new Char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); string user = string.Empty; string displayName = string.Empty; string date = string.Empty; string comment = string.Empty; HtmlNode nodeData = node.ParentNode; string dataId = nodeData.Attributes[2].Value.Trim(); string authorId = nodeData.Attributes[1].Value.Trim(); HtmlNodeCollection childNodes = node.ChildNodes; int divCount = 0; foreach (HtmlNode child in childNodes) { if (child.Name.Equals("p")) { bool userFlag = false; //bool dateFlag = false; HtmlNodeCollection col = child.ChildNodes; foreach (HtmlNode n in col) { if (n.Name.Equals("span") && !userFlag) { foreach (HtmlNode nNode in n.ChildNodes) { if (nNode.Name.Equals("a")) { user = nNode.Attributes["href"].Value.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)[1]; break; } } displayName = n.InnerText.Trim(); userFlag = true; } else if (n.Name.Equals("span")) { date = n.InnerText.Trim(); //dateFlag = true; break; } } } else if (child.Name.Equals("div")) { if (divCount == 0) { //That means Its Comment Text comment = child.InnerText.Trim(); divCount++; } } } if (!displayName.Equals("") && !comment.Equals("") && !dataId.Equals("") && !authorId.Equals("") && !user.Equals("") && !GlobalConstants.commentDictionary.ContainsKey(dataId)) { VideoCommentWrapper commentWrapper = new VideoCommentWrapper(); commentWrapper.authorId = authorId; commentWrapper.commentId = dataId; commentWrapper.commentText = comment; commentWrapper.time = date; commentWrapper.displayName = displayName; commentWrapper.userName = user; GlobalConstants.commentDictionary.Add(dataId, commentWrapper); string videoFileName = pVideoWrapper.getVideoName(); //videoFile = videoName; videoName = Common.CleanFileName(videoFileName + "-" + fileComment) + ".txt"; if (!Directory.Exists(pChannelName + "/" + "Comments")) { Directory.CreateDirectory(pChannelName + "/" + "Comments"); } commentCount++; if (!videoUrlFlag) { File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Video Url : " + videoUrl + Environment.NewLine + "\r\n"); videoUrlFlag = true; } File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "User name : " + displayName + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment Date : " + date + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment : " + comment + Environment.NewLine); } if (parseAllComments.Equals("false", StringComparison.CurrentCultureIgnoreCase)) { if (totalCommentsParse <= commentCount) { breakLoop = true; break; } } } reader.Close(); if (breakLoop) { break; } } catch (Exception ex) { //File.AppendAllText("Logs Exception Comments.txt", ex.Message + Environment.NewLine + Environment.NewLine); continue; } } foreach (KeyValuePair <int, string> file in pHtmlFiles) { tempFiles.Add("/Comments/" + file.Value); } Common.RemoveTempFiles(tempFiles, pChannelName); }
public static void DownloadHtmls(string pChannelName, VideoWrapper pVideo, Dictionary <int, string> pHtmlFiles, int pPageNo) { string url = string.Empty; try { url = ConfigurationManager.AppSettings["VideoAllCommentsUrl"].ToString() + pVideo.getVideoKey() + "&page=" + pPageNo; //string url = "http://www.youtube.com/all_comments?v=LMiNEC1M-zY" + "&page=" + pPageNo; ///Base Case /// HtmlWeb hwObject = new HtmlWeb(); //hwObject.UseCookies = false; // Experimental //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " going to hit URL at page # " + pPageNo + ".. " + DateTime.Now + Environment.NewLine); HtmlDocument doc = hwObject.Load(url); //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " got response of page # " + pPageNo + ".." + DateTime.Now + Environment.NewLine); HtmlNodeCollection totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']"); if (totalCollection == null) { return; } int totalCollectionCount = totalCollection.Count; if (totalCollectionCount <= 0) { return; } ///Base Case Ended /// //Code Added by Me Right Now .... /// totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']//div[@class='content']"); string videoUrl = "https://www.youtube.com/watch?v=" + pVideo.getVideoKey(); bool videoUrlFlag = false; bool breakLoop = false; //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " starting to extract data.." + Environment.NewLine); foreach (HtmlNode node in totalCollection) { //string[] userArr = node.InnerText.Split(new Char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); string user = string.Empty; string displayName = string.Empty; string date = string.Empty; string comment = string.Empty; HtmlNode nodeData = node.ParentNode; string dataId = nodeData.Attributes[2].Value.Trim(); string authorId = nodeData.Attributes[1].Value.Trim(); HtmlNodeCollection childNodes = node.ChildNodes; int divCount = 0; foreach (HtmlNode child in childNodes) { if (child.Name.Equals("p")) { bool userFlag = false; //bool dateFlag = false; HtmlNodeCollection col = child.ChildNodes; foreach (HtmlNode n in col) { if (n.Name.Equals("span") && !userFlag) { foreach (HtmlNode nNode in n.ChildNodes) { if (nNode.Name.Equals("a")) { user = nNode.Attributes["href"].Value.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)[1]; break; } } displayName = n.InnerText.Trim(); userFlag = true; } else if (n.Name.Equals("span")) { date = n.InnerText.Trim(); //dateFlag = true; break; } } } else if (child.Name.Equals("div")) { if (divCount == 0) { //That means Its Comment Text comment = child.InnerText.Trim(); divCount++; } } } //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " starting to write data in file.." + Environment.NewLine); if (!displayName.Equals("") && !comment.Equals("") && !dataId.Equals("") && !authorId.Equals("") && !user.Equals("") && !GlobalConstants.commentDictionary.ContainsKey(dataId)) { VideoCommentWrapper commentWrapper = new VideoCommentWrapper(); commentWrapper.authorId = authorId; commentWrapper.commentId = dataId; commentWrapper.commentText = comment; commentWrapper.time = date; commentWrapper.displayName = displayName; commentWrapper.userName = user; GlobalConstants.commentDictionary.Add(dataId, commentWrapper); string videoFileName = pVideo.getVideoName(); //videoFile = videoName; string videoName = Common.CleanFileName(videoFileName + "-" + fileComment) + ".txt"; if (!Directory.Exists(pChannelName + "/" + "Comments")) { Directory.CreateDirectory(pChannelName + "/" + "Comments"); } commentCount++; if (!videoUrlFlag) { File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Video Url : " + videoUrl + Environment.NewLine + "\r\n"); videoUrlFlag = true; } File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "User name : " + displayName + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment Date : " + date + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment : " + comment + Environment.NewLine); } //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " ended writing data in file.." + Environment.NewLine); if (parseAllComments.Equals("false", StringComparison.CurrentCultureIgnoreCase)) { if (totalCommentsParse <= commentCount) { breakLoop = true; break; } } } //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " extracted all data.." + Environment.NewLine); ////Ended Added ////Commented by Me //File.AppendAllText(pChannelName + "/CommentsTimeLog.txt", "Start Download Time for file : " + pVideo.getVideoName() + "-" + pPageNo + ": " + DateTime.Now + Environment.NewLine); //WebRequest nameRequest = WebRequest.Create(url); //HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse(); //Stream nameStream = nameResponse.GetResponseStream(); //StreamReader nameReader = new StreamReader(nameStream); //string htmlData = nameReader.ReadToEnd(); //if (htmlData != null && !htmlData.Equals("")) //{ // string videoName = pChannelName + "/Comments/" + Common.CleanFileName(pVideo.getVideoName()) + "-" + pPageNo + ".html"; // string dictionaryValue = Common.CleanFileName(pVideo.getVideoName()) + "-" + pPageNo + ".html"; // if (!Directory.Exists(pChannelName + "/Comments/")) // { // Directory.CreateDirectory(pChannelName + "/Comments/"); // } // File.WriteAllText(videoName, htmlData); // File.AppendAllText(pChannelName + "/CommentsTimeLog.txt", "End Download Time for file : " + pVideo.getVideoName() + "-" + pPageNo + ": " + DateTime.Now + Environment.NewLine + Environment.NewLine); // //tempFiles.Add("/Comments/" + dictionaryValue); // pHtmlFiles.Add(pPageNo, dictionaryValue); //} ////Comment Ended pPageNo++; if (parseAllComments.Equals("true", StringComparison.CurrentCultureIgnoreCase)) { DownloadHtmls(pChannelName, pVideo, pHtmlFiles, pPageNo); //Recursive Call } } catch (Exception ex) { //Delete Cookies //pPageNo++; //File.AppendAllText(pChannelName + "/Comments/" + "ExceptionLogs.txt", "Exception : at URL : " + url + " -> Exception Message : " + ex.Message); DownloadHtmls(pChannelName, pVideo, pHtmlFiles, pPageNo); } }
public static void parseVideo(VideoWrapper pVideoWrapper, string pChannelName) { string fileVideo = ConfigurationManager.AppSettings["channelVideo"].ToString(); string videoChannelName = string.Empty; string videoName = string.Empty; string date = string.Empty; string iDislike = string.Empty; string iLike = string.Empty; string description = string.Empty; string url = string.Empty; string videoChannelFileCleaned = Common.CleanFileName(pChannelName); try { string channelFileNameXML = Common.CleanFileName(pVideoWrapper.getVideoName()) + "-" + ConfigurationManager.AppSettings["channelsFileNameXML"].ToString(); string videoUrl = string.Format("https://gdata.youtube.com/feeds/api/videos/{0}?v=2", pVideoWrapper.getVideoKey()); WebRequest nameRequest = WebRequest.Create(videoUrl); HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse(); Stream nameStream = nameResponse.GetResponseStream(); StreamReader nameReader = new StreamReader(nameStream); string xmlData = nameReader.ReadToEnd(); File.WriteAllText(channelFileNameXML, xmlData); XmlDocument doc = new XmlDocument(); doc.Load(channelFileNameXML); XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable); namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom"); namespaceManager.AddNamespace("yt", "http://gdata.youtube.com/schemas/2007"); namespaceManager.AddNamespace("media", "http://search.yahoo.com/mrss/"); XmlNode node = doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager); VideoWrapper videoWrapper = pVideoWrapper; //VideoInfoWrapper obj = new VideoInfoWrapper //{ if (doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager) != null && doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager).Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase)) { url = doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager).Attributes["href"].Value; string[] urlArr = url.Split(new Char[] { '&' }, StringSplitOptions.RemoveEmptyEntries); url = urlArr[0]; } videoChannelName = videoChannelFileCleaned;//doc.SelectSingleNode("//Atom:entry/Atom:author/Atom:name", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/Atom:author/Atom:name", namespaceManager).InnerText.ToString() : string.Empty; videoName = doc.SelectSingleNode("//Atom:entry/Atom:title", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/Atom:title", namespaceManager).InnerText.ToString() : string.Empty; date = doc.SelectSingleNode("//Atom:entry/Atom:published", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/Atom:published", namespaceManager).InnerText.ToString() : string.Empty; iDislike = doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numDislikes"] != null ? doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numDislikes"].Value.ToString() : string.Empty : string.Empty; iLike = doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numLikes"] != null ? doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numLikes"].Value.ToString() : string.Empty : string.Empty; description = doc.SelectSingleNode(" //Atom:entry/media:group/media:description", namespaceManager) != null ? doc.SelectSingleNode(" //Atom:entry/media:group/media:description", namespaceManager).InnerText.ToString() : string.Empty; List<string> videoTags = preapreParamsTags(doc.SelectNodes("//Atom:entry/Atom:category", namespaceManager)) != null ? preapreParamsTags(doc.SelectNodes("//Atom:entry/Atom:category", namespaceManager)) : null; string videoViewCount = doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager).Attributes["viewCount"] != null ? doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager).Attributes["viewCount"].Value : string.Empty : string.Empty; //}; string videoNameFile = Common.CleanFileName(videoName + "-" + fileVideo); if (!Directory.Exists(videoChannelName + "/" + "Videos")) { Directory.CreateDirectory(videoChannelName + "/" + "Videos"); } File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Channel : " + videoChannelName + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Channel Url : " + videoWrapper.getChannelUrl() + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Name : " + videoName + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Url : " + url + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Date : " + date + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Views : " + videoViewCount + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "I Like : " + iLike + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "I dislike : " + iDislike + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Description : " + description + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Tags : " + string.Join(",", videoTags.ToArray()) + Environment.NewLine); File.Delete(channelFileNameXML); } catch (Exception ex) { //File.AppendAllText("video_channel_" + videoName + ".txt", "Videoname: " + videoName + "; Exception" + " : " + ex.ToString() + Environment.NewLine); Thread.Sleep(10000); parseVideo(pVideoWrapper, pChannelName); } }
public static void WriteVideoLists(string pChannelName, string pChannelId, int startIndex, Dictionary<string, VideoWrapper> videoDictionary, Enumeration.VideoRequestType requestType) { try { //Base Case of Recursion //if (startIndex >= 1000) // return; //Base Case Ended of Recursion string videoName = String.Empty; string videoUrl = String.Empty; //string url = String.Empty; string videoId = String.Empty; string videoFileName = ConfigurationManager.AppSettings["channelsVideoFile"].ToString(); string videFileNameXML = ConfigurationManager.AppSettings["channelsVideoFileXML"].ToString(); string channelFileName = ConfigurationManager.AppSettings["channelsFileName"].ToString(); string channelCleanedName = Common.CleanFileName(pChannelName); string channelUrl = String.Empty; if (requestType == Enumeration.VideoRequestType.All) { //http://gdata.youtube.com/feeds/api/users/machinima/uploads?start-index=4000 channelUrl = "https://gdata.youtube.com/feeds/api/users/" + pChannelName + "/uploads?&start-index=" + startIndex; } HttpWebRequest nameRequest = (HttpWebRequest)WebRequest.Create(channelUrl); nameRequest.KeepAlive = false; nameRequest.ProtocolVersion = HttpVersion.Version10; HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse(); Stream nameStream = nameResponse.GetResponseStream(); StreamReader nameReader = new StreamReader(nameStream); string xmlData = nameReader.ReadToEnd(); File.WriteAllText(channelCleanedName + "/" + videFileNameXML, xmlData); XmlDocument doc = new XmlDocument(); doc.Load(channelCleanedName + "/" + videFileNameXML); XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable); namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom"); XmlNodeList listResult = doc.SelectNodes(channelAtomEntry, namespaceManager); ////Getting total Record XmlNamespaceManager namespaceManager1 = new XmlNamespaceManager(doc.NameTable); namespaceManager1.AddNamespace("openSearch", "http://a9.com/-/spec/opensearchrss/1.0/"); XmlNode nodeTotal = doc.SelectSingleNode("//openSearch:totalResults", namespaceManager1); int total = Int32.Parse(nodeTotal.InnerText); //Base Case Started if (total == 0) { Constant.tempFiles.Add(videFileNameXML); return; } if (listResult == null) { Constant.tempFiles.Add(videFileNameXML); return; } string flag = ConfigurationManager.AppSettings["testingFlag"].ToString(); if (flag.Equals("true", StringComparison.CurrentCultureIgnoreCase)) { if (startIndex >= 26) { Constant.tempFiles.Add(videFileNameXML); return; } } else { if (ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase)) { int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString()); if (totalVideo <= recordCount) { Constant.tempFiles.Add(videFileNameXML); return; } } else { //This check is not needed as base case is checked already. if (total <= startIndex) { Constant.tempFiles.Add(videFileNameXML); return; } } } //Base Case Ended //File.AppendAllText(channelCleanedName + "/" + log, "\t\tTotal Record : " + total + "; Start Index : " + startIndex + Environment.NewLine); foreach (XmlNode entry in listResult) { bool idFound = false; bool titleFound = false; foreach (XmlNode node in entry.ChildNodes) { if (node.Name.Equals("id")) { videoUrl = node.InnerText; string id = videoUrl; string[] arrId = id.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries); videoId = arrId[arrId.Length - 1]; idFound = true; } else if (node.Name.Equals("title")) { videoName = node.InnerText; titleFound = true; } if (idFound && titleFound) { if (videoDictionary != null && !videoDictionary.ContainsKey(videoId)) { VideoWrapper vWrapper = new VideoWrapper(videoName, videoId, videoUrl, channelUrlMain); videoDictionary.Add(videoId, vWrapper); File.AppendAllText(channelCleanedName + "/" + channelFileName, "\t" + videoName + "\r\n"); //Multithreading 2/5/2013 Produce(vWrapper); //Done 2/5/2013 recordCount++; } break; } } int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString()); if (totalVideo <= recordCount && ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase)) { Constant.tempFiles.Add(videFileNameXML); return; } } startIndex += 25; if (requestType == Enumeration.VideoRequestType.All) { WriteVideoLists(pChannelName, channelId, startIndex, videoDictionary, Enumeration.VideoRequestType.All); //Recursive Call } } catch (Exception ex) { //exceptionCounter++; //if (exceptionCounter == 2) //{ // exceptionCounter = 0; // return; //} //File.AppendAllText(Common.CleanFileName(pChannelName) + "/zaheerexception videolist " + log, "\t\tException Found : " + ex.Message + Environment.NewLine + "startIndex = " + startIndex + Environment.NewLine + Environment.NewLine); //startIndex += 25; Thread.Sleep(10000); if (requestType == Enumeration.VideoRequestType.All) { WriteVideoLists(pChannelName, channelId, startIndex, videoDictionary, Enumeration.VideoRequestType.All); //Recursive Call } } }
public static void GetAllComments(VideoWrapper pVideoWrapper, string pChannelName, Dictionary<int, string> pHtmlFiles) { string videoUrl = "https://www.youtube.com/watch?v=" + pVideoWrapper.getVideoKey(); bool videoUrlFlag = false; List<string> tempFiles = new List<string>(); foreach (KeyValuePair<int, string> pair in pHtmlFiles) { try { string videoName = pair.Value; //string videoName = "Machinima PlayStation Viewer's Choice LiveStream!-1"; //Stream stream = File.OpenRead("New folder/Machinima PlayStation Viewer's Choice LiveStream!-1.html"); Stream stream = File.OpenRead(pChannelName + "/Comments/" + videoName); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); StreamReader reader = new StreamReader(stream); doc.LoadHtml(reader.ReadToEnd().ToString()); bool breakLoop = false; HtmlNodeCollection totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']//div[@class='content']"); foreach (HtmlNode node in totalCollection) { //string[] userArr = node.InnerText.Split(new Char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); string user = string.Empty; string displayName = string.Empty; string date = string.Empty; string comment = string.Empty; HtmlNode nodeData = node.ParentNode; string dataId = nodeData.Attributes[2].Value.Trim(); string authorId = nodeData.Attributes[1].Value.Trim(); HtmlNodeCollection childNodes = node.ChildNodes; int divCount = 0; foreach (HtmlNode child in childNodes) { if (child.Name.Equals("p")) { bool userFlag = false; //bool dateFlag = false; HtmlNodeCollection col = child.ChildNodes; foreach (HtmlNode n in col) { if (n.Name.Equals("span") && !userFlag) { foreach (HtmlNode nNode in n.ChildNodes) { if (nNode.Name.Equals("a")) { user = nNode.Attributes["href"].Value.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)[1]; break; } } displayName = n.InnerText.Trim(); userFlag = true; } else if (n.Name.Equals("span")) { date = n.InnerText.Trim(); //dateFlag = true; break; } } } else if (child.Name.Equals("div")) { if (divCount == 0) { //That means Its Comment Text comment = child.InnerText.Trim(); divCount++; } } } if (!displayName.Equals("") && !comment.Equals("") && !dataId.Equals("") && !authorId.Equals("") && !user.Equals("") && !GlobalConstants.commentDictionary.ContainsKey(dataId)) { VideoCommentWrapper commentWrapper = new VideoCommentWrapper(); commentWrapper.authorId = authorId; commentWrapper.commentId = dataId; commentWrapper.commentText = comment; commentWrapper.time = date; commentWrapper.displayName = displayName; commentWrapper.userName = user; GlobalConstants.commentDictionary.Add(dataId, commentWrapper); string videoFileName = pVideoWrapper.getVideoName(); //videoFile = videoName; videoName = Common.CleanFileName(videoFileName + "-" + fileComment) + ".txt"; if (!Directory.Exists(pChannelName + "/" + "Comments")) { Directory.CreateDirectory(pChannelName + "/" + "Comments"); } commentCount++; if (!videoUrlFlag) { File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Video Url : " + videoUrl + Environment.NewLine + "\r\n"); videoUrlFlag = true; } File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "User name : " + displayName + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment Date : " + date + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment : " + comment + Environment.NewLine); } if (parseAllComments.Equals("false", StringComparison.CurrentCultureIgnoreCase)) { if (totalCommentsParse <= commentCount) { breakLoop = true; break; } } } reader.Close(); if (breakLoop) break; } catch (Exception ex) { //File.AppendAllText("Logs Exception Comments.txt", ex.Message + Environment.NewLine + Environment.NewLine); continue; } } foreach (KeyValuePair<int, string> file in pHtmlFiles) { tempFiles.Add("/Comments/" + file.Value); } Common.RemoveTempFiles(tempFiles, pChannelName); }
public static void ExtractFromUserFavourite(string pChannelName, string pUserId, int pStartIndex) { try { string channelFileName = ConfigurationManager.AppSettings["channelsFileName"].ToString(); string channelFileNameXML = ConfigurationManager.AppSettings["channelsFileNameXML"].ToString(); string channelCleanedName = Common.CleanFileName(pChannelName); //File.AppendAllText(channelCleanedName + "/" + log, "Entered Inside Parse Channel at : " + DateTime.Now + Environment.NewLine + Environment.NewLine); //For Debugging if (ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase)) { int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString()); if (totalVideo <= recordCount) { //Constant.tempFiles.Add(videFileNameXML); return; } } //string channelUrl = ConfigurationManager.AppSettings["ChannelSearchUrl"].ToString() + pChannelName + "&start-index=1&max-results=10&v=2"; WebRequest nameRequest; HttpWebResponse nameResponse; Stream nameStream; StreamReader nameReader; //File.WriteAllText(pChannelName + "/" + channelFileNameXML, xmlData); //Other type of extraction here //Extract Playlists string favouriteUrl = "https://gdata.youtube.com/feeds/api/users/" + pUserId + "/favorites?start-index=" + pStartIndex + "&v=2"; //This will return all Playlists of this user nameRequest = WebRequest.Create(favouriteUrl); nameResponse = (HttpWebResponse)nameRequest.GetResponse(); nameStream = nameResponse.GetResponseStream(); nameReader = new StreamReader(nameStream); string xmlData = nameReader.ReadToEnd(); XmlDocument doc = new XmlDocument(); //doc.Load(pChannelName + "/" + channelFileNameXML); File.WriteAllText(channelCleanedName + "/" + channelFileNameXML, xmlData); Constant.tempFiles.Add(channelFileNameXML); doc = new XmlDocument(); doc.Load(channelCleanedName + "/" + channelFileNameXML); XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable); namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom"); XmlNamespaceManager openSearchNameSpace = new XmlNamespaceManager(doc.NameTable); openSearchNameSpace.AddNamespace("openSearch", "http://a9.com/-/spec/opensearch/1.1/"); XmlNode totalRecordNode = doc.SelectSingleNode("//openSearch:totalResults", openSearchNameSpace); if (totalRecordNode.InnerText.Equals("0")) { return; } if (totalRecordNode != null && !totalRecordNode.InnerText.Equals("0")) { XmlNode titleNode = doc.SelectSingleNode("//Atom:title", namespaceManager); File.AppendAllText(channelCleanedName + "/" + channelFileName, titleNode.InnerText + "\r\n"); Dictionary <string, PlaylistWrapper> dictionaryPlayList = new Dictionary <string, PlaylistWrapper>(); XmlNodeList listNodes = doc.SelectNodes("//Atom:entry", namespaceManager); if (listNodes.Count == 0) { return; } StringBuilder strBuilder = new StringBuilder(); strBuilder.Append("\tFavourite Videos:\r\n"); string title = String.Empty; string key = String.Empty; string url = String.Empty; string apiURL = String.Empty; Dictionary <string, VideoWrapper> dictionaryVideoWrapper = new Dictionary <string, VideoWrapper>(); foreach (XmlNode n in listNodes) { foreach (XmlNode node in n.ChildNodes) { if (node.Name.Equals("title")) { title = node.InnerText; } else if (node.Name.Equals("link")) { if (node.Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase)) { url = node.Attributes["href"].Value.Split(new Char[] { '&' }, StringSplitOptions.RemoveEmptyEntries)[0]; key = url.Split(new Char[] { '=' }, StringSplitOptions.RemoveEmptyEntries)[1]; } } } strBuilder.Append("\t\tVideo Name: " + title + "\r\n"); if (!dictionaryVideoWrapper.ContainsKey(key)) { VideoWrapper vWrapper = new VideoWrapper(); vWrapper.setVideoKey(key); vWrapper.setVideoName(title); vWrapper.setVideoUrl(url); dictionaryVideoWrapper.Add(key, vWrapper); updatedFlag = true; recordCount++; } if (updatedFlag) { File.AppendAllText(channelCleanedName + "/" + channelFileName, strBuilder.ToString()); } updatedFlag = false; strBuilder.Remove(0, strBuilder.Length); } ChannelVideo.parseVideo(dictionaryVideoWrapper, pChannelName); ChannelComment.CrawlComments(dictionaryVideoWrapper, pChannelName); pStartIndex += 25; ExtractFromUserFavourite(pChannelName, pUserId, pStartIndex); } Common.RemoveTempFiles(Constant.tempFiles, channelCleanedName); } catch (Exception ex) { //ExtractFromUserFavourite(pChannelName, pUserId, pStartIndex); } }
public static void GetPlaylistVideos(string pChannelName, string pPlaylistURL, Dictionary <string, VideoWrapper> pDictionaryVideoWrapper, StringBuilder strBuilder, int pStartIndex) { try { string channelFileName = ConfigurationManager.AppSettings["channelsFileName"].ToString(); string channelFileNameXML = "Playlist-" + ConfigurationManager.AppSettings["channelsFileNameXML"].ToString(); string channelCleanedName = Common.CleanFileName(pChannelName); //For Debugging if (ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase)) { int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString()); if (totalVideo <= recordCount) { //Constant.tempFiles.Add(videFileNameXML); return; } } WebRequest nameRequest = WebRequest.Create(pPlaylistURL + "?start-index=" + pStartIndex); HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse(); Stream nameStream = nameResponse.GetResponseStream(); StreamReader nameReader = new StreamReader(nameStream); string xmlData = nameReader.ReadToEnd(); File.WriteAllText(channelCleanedName + "/" + channelFileNameXML, xmlData); Constant.tempFiles.Add(channelFileNameXML); XmlDocument doc = new XmlDocument(); doc.Load(channelCleanedName + "/" + channelFileNameXML); XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable); namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom"); namespaceManager.AddNamespace("openSearch", "http://a9.com/-/spec/opensearchrss/1.0/"); //XmlNamespaceManager openSearchNameSpace = new XmlNamespaceManager(doc.NameTable); //openSearchNameSpace.AddNamespace("openSearch", "http://a9.com/-/spec/opensearch/1.1/"); XmlNode totalRecordNode = doc.SelectNodes("//openSearch:totalResults", namespaceManager)[0];//SelectSingleNode("//openSearch:totalResults", namespaceManager); if (totalRecordNode.InnerText.Equals("0")) { return; } if (totalRecordNode != null && !totalRecordNode.InnerText.Equals("0")) { XmlNodeList listNodes = doc.SelectNodes("//Atom:entry", namespaceManager); if (listNodes.Count == 0) { return; } string title = String.Empty; string url = String.Empty; string key = string.Empty; foreach (XmlNode n in listNodes) { foreach (XmlNode node in n.ChildNodes) { if (node.Name.Equals("title")) { title = node.InnerText; } else if (node.Name.Equals("link")) { if (node.Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase)) { string[] linkArr = node.Attributes["href"].Value.Split(new Char[] { '=', '&' }, StringSplitOptions.RemoveEmptyEntries); key = linkArr[1]; url = "http://www.youtube.com/watch?v=" + key; } } } if (!pDictionaryVideoWrapper.ContainsKey(key)) { recordCount++; VideoWrapper vWrapper = new VideoWrapper(); vWrapper.setVideoKey(key); vWrapper.setVideoName(title); vWrapper.setVideoUrl(url); pDictionaryVideoWrapper.Add(key, vWrapper); strBuilder.Append("\t\t" + title + "\r\n"); updatedFlag = true; } } } pStartIndex++; GetPlaylistVideos(pChannelName, pPlaylistURL, pDictionaryVideoWrapper, strBuilder, pStartIndex); Common.RemoveTempFiles(Constant.tempFiles, channelCleanedName); } catch (Exception ex) { GetPlaylistVideos(pChannelName, pPlaylistURL, pDictionaryVideoWrapper, strBuilder, pStartIndex); } }
public static void WriteVideoLists(string pChannelName, string pChannelId, int startIndex, Dictionary <string, VideoWrapper> videoDictionary, Enumeration.VideoRequestType requestType) { try { //Base Case of Recursion //if (startIndex >= 1000) // return; //Base Case Ended of Recursion string videoName = String.Empty; string videoUrl = String.Empty; //string url = String.Empty; string videoId = String.Empty; string videoFileName = ConfigurationManager.AppSettings["channelsVideoFile"].ToString(); string videFileNameXML = ConfigurationManager.AppSettings["channelsVideoFileXML"].ToString(); string channelFileName = ConfigurationManager.AppSettings["channelsFileName"].ToString(); string channelCleanedName = Common.CleanFileName(pChannelName); string channelUrl = String.Empty; if (requestType == Enumeration.VideoRequestType.All) { //http://gdata.youtube.com/feeds/api/users/machinima/uploads?start-index=4000 channelUrl = "https://gdata.youtube.com/feeds/api/users/" + pChannelName + "/uploads?&start-index=" + startIndex; } HttpWebRequest nameRequest = (HttpWebRequest)WebRequest.Create(channelUrl); nameRequest.KeepAlive = false; nameRequest.ProtocolVersion = HttpVersion.Version10; HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse(); Stream nameStream = nameResponse.GetResponseStream(); StreamReader nameReader = new StreamReader(nameStream); string xmlData = nameReader.ReadToEnd(); File.WriteAllText(channelCleanedName + "/" + videFileNameXML, xmlData); XmlDocument doc = new XmlDocument(); doc.Load(channelCleanedName + "/" + videFileNameXML); XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable); namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom"); XmlNodeList listResult = doc.SelectNodes(channelAtomEntry, namespaceManager); ////Getting total Record XmlNamespaceManager namespaceManager1 = new XmlNamespaceManager(doc.NameTable); namespaceManager1.AddNamespace("openSearch", "http://a9.com/-/spec/opensearchrss/1.0/"); XmlNode nodeTotal = doc.SelectSingleNode("//openSearch:totalResults", namespaceManager1); int total = Int32.Parse(nodeTotal.InnerText); //Base Case Started if (total == 0) { Constant.tempFiles.Add(videFileNameXML); return; } if (listResult == null) { Constant.tempFiles.Add(videFileNameXML); return; } string flag = ConfigurationManager.AppSettings["testingFlag"].ToString(); if (flag.Equals("true", StringComparison.CurrentCultureIgnoreCase)) { if (startIndex >= 26) { Constant.tempFiles.Add(videFileNameXML); return; } } else { if (ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase)) { int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString()); if (totalVideo <= recordCount) { Constant.tempFiles.Add(videFileNameXML); return; } } else { //This check is not needed as base case is checked already. if (total <= startIndex) { Constant.tempFiles.Add(videFileNameXML); return; } } } //Base Case Ended //File.AppendAllText(channelCleanedName + "/" + log, "\t\tTotal Record : " + total + "; Start Index : " + startIndex + Environment.NewLine); foreach (XmlNode entry in listResult) { bool idFound = false; bool titleFound = false; foreach (XmlNode node in entry.ChildNodes) { if (node.Name.Equals("id")) { videoUrl = node.InnerText; string id = videoUrl; string[] arrId = id.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries); videoId = arrId[arrId.Length - 1]; idFound = true; } else if (node.Name.Equals("title")) { videoName = node.InnerText; titleFound = true; } if (idFound && titleFound) { if (videoDictionary != null && !videoDictionary.ContainsKey(videoId)) { VideoWrapper vWrapper = new VideoWrapper(videoName, videoId, videoUrl, channelUrlMain); videoDictionary.Add(videoId, vWrapper); File.AppendAllText(channelCleanedName + "/" + channelFileName, "\t" + videoName + "\r\n"); //Multithreading 2/5/2013 Produce(vWrapper); //Done 2/5/2013 recordCount++; } break; } } int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString()); if (totalVideo <= recordCount && ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase)) { Constant.tempFiles.Add(videFileNameXML); return; } } startIndex += 25; if (requestType == Enumeration.VideoRequestType.All) { WriteVideoLists(pChannelName, channelId, startIndex, videoDictionary, Enumeration.VideoRequestType.All); //Recursive Call } } catch (Exception ex) { //exceptionCounter++; //if (exceptionCounter == 2) //{ // exceptionCounter = 0; // return; //} //File.AppendAllText(Common.CleanFileName(pChannelName) + "/zaheerexception videolist " + log, "\t\tException Found : " + ex.Message + Environment.NewLine + "startIndex = " + startIndex + Environment.NewLine + Environment.NewLine); //startIndex += 25; Thread.Sleep(10000); if (requestType == Enumeration.VideoRequestType.All) { WriteVideoLists(pChannelName, channelId, startIndex, videoDictionary, Enumeration.VideoRequestType.All); //Recursive Call } } }
public static void ExtractFromUserFavourite(string pChannelName, string pUserId, int pStartIndex) { try { string channelFileName = ConfigurationManager.AppSettings["channelsFileName"].ToString(); string channelFileNameXML = ConfigurationManager.AppSettings["channelsFileNameXML"].ToString(); string channelCleanedName = Common.CleanFileName(pChannelName); //File.AppendAllText(channelCleanedName + "/" + log, "Entered Inside Parse Channel at : " + DateTime.Now + Environment.NewLine + Environment.NewLine); //For Debugging if (ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase)) { int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString()); if (totalVideo <= recordCount) { //Constant.tempFiles.Add(videFileNameXML); return; } } //string channelUrl = ConfigurationManager.AppSettings["ChannelSearchUrl"].ToString() + pChannelName + "&start-index=1&max-results=10&v=2"; WebRequest nameRequest; HttpWebResponse nameResponse; Stream nameStream; StreamReader nameReader; //File.WriteAllText(pChannelName + "/" + channelFileNameXML, xmlData); //Other type of extraction here //Extract Playlists string favouriteUrl = "https://gdata.youtube.com/feeds/api/users/" + pUserId + "/favorites?start-index=" + pStartIndex + "&v=2"; //This will return all Playlists of this user nameRequest = WebRequest.Create(favouriteUrl); nameResponse = (HttpWebResponse)nameRequest.GetResponse(); nameStream = nameResponse.GetResponseStream(); nameReader = new StreamReader(nameStream); string xmlData = nameReader.ReadToEnd(); XmlDocument doc = new XmlDocument(); //doc.Load(pChannelName + "/" + channelFileNameXML); File.WriteAllText(channelCleanedName + "/" + channelFileNameXML, xmlData); Constant.tempFiles.Add(channelFileNameXML); doc = new XmlDocument(); doc.Load(channelCleanedName + "/" + channelFileNameXML); XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable); namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom"); XmlNamespaceManager openSearchNameSpace = new XmlNamespaceManager(doc.NameTable); openSearchNameSpace.AddNamespace("openSearch", "http://a9.com/-/spec/opensearch/1.1/"); XmlNode totalRecordNode = doc.SelectSingleNode("//openSearch:totalResults", openSearchNameSpace); if (totalRecordNode.InnerText.Equals("0")) return; if (totalRecordNode != null && !totalRecordNode.InnerText.Equals("0")) { XmlNode titleNode = doc.SelectSingleNode("//Atom:title", namespaceManager); File.AppendAllText(channelCleanedName + "/" + channelFileName, titleNode.InnerText + "\r\n"); Dictionary<string, PlaylistWrapper> dictionaryPlayList = new Dictionary<string, PlaylistWrapper>(); XmlNodeList listNodes = doc.SelectNodes("//Atom:entry", namespaceManager); if (listNodes.Count == 0) return; StringBuilder strBuilder = new StringBuilder(); strBuilder.Append("\tFavourite Videos:\r\n"); string title = String.Empty; string key = String.Empty; string url = String.Empty; string apiURL = String.Empty; Dictionary<string, VideoWrapper> dictionaryVideoWrapper = new Dictionary<string, VideoWrapper>(); foreach (XmlNode n in listNodes) { foreach (XmlNode node in n.ChildNodes) { if (node.Name.Equals("title")) { title = node.InnerText; } else if (node.Name.Equals("link")) { if (node.Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase)) { url = node.Attributes["href"].Value.Split(new Char[] { '&' }, StringSplitOptions.RemoveEmptyEntries)[0]; key = url.Split(new Char[] { '=' }, StringSplitOptions.RemoveEmptyEntries)[1]; } } } strBuilder.Append("\t\tVideo Name: " + title + "\r\n"); if (!dictionaryVideoWrapper.ContainsKey(key)) { VideoWrapper vWrapper = new VideoWrapper(); vWrapper.setVideoKey(key); vWrapper.setVideoName(title); vWrapper.setVideoUrl(url); dictionaryVideoWrapper.Add(key, vWrapper); updatedFlag = true; recordCount++; } if (updatedFlag) File.AppendAllText(channelCleanedName + "/" + channelFileName, strBuilder.ToString()); updatedFlag = false; strBuilder.Remove(0, strBuilder.Length); } ChannelVideo.parseVideo(dictionaryVideoWrapper, pChannelName); ChannelComment.CrawlComments(dictionaryVideoWrapper, pChannelName); pStartIndex += 25; ExtractFromUserFavourite(pChannelName, pUserId, pStartIndex); } Common.RemoveTempFiles(Constant.tempFiles, channelCleanedName); } catch (Exception ex) { //ExtractFromUserFavourite(pChannelName, pUserId, pStartIndex); } }
public static void GetPlaylistVideos(string pChannelName, string pPlaylistURL, Dictionary<string, VideoWrapper> pDictionaryVideoWrapper, StringBuilder strBuilder, int pStartIndex) { try { string channelFileName = ConfigurationManager.AppSettings["channelsFileName"].ToString(); string channelFileNameXML = "Playlist-" + ConfigurationManager.AppSettings["channelsFileNameXML"].ToString(); string channelCleanedName = Common.CleanFileName(pChannelName); //For Debugging if (ConfigurationManager.AppSettings["ExtractAllVideosFlag"].ToString().Equals("False", StringComparison.InvariantCultureIgnoreCase)) { int totalVideo = Int32.Parse(ConfigurationManager.AppSettings["totalVideos"].ToString()); if (totalVideo <= recordCount) { //Constant.tempFiles.Add(videFileNameXML); return; } } WebRequest nameRequest = WebRequest.Create(pPlaylistURL + "?start-index=" + pStartIndex); HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse(); Stream nameStream = nameResponse.GetResponseStream(); StreamReader nameReader = new StreamReader(nameStream); string xmlData = nameReader.ReadToEnd(); File.WriteAllText(channelCleanedName + "/" + channelFileNameXML, xmlData); Constant.tempFiles.Add(channelFileNameXML); XmlDocument doc = new XmlDocument(); doc.Load(channelCleanedName + "/" + channelFileNameXML); XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable); namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom"); namespaceManager.AddNamespace("openSearch", "http://a9.com/-/spec/opensearchrss/1.0/"); //XmlNamespaceManager openSearchNameSpace = new XmlNamespaceManager(doc.NameTable); //openSearchNameSpace.AddNamespace("openSearch", "http://a9.com/-/spec/opensearch/1.1/"); XmlNode totalRecordNode = doc.SelectNodes("//openSearch:totalResults", namespaceManager)[0];//SelectSingleNode("//openSearch:totalResults", namespaceManager); if(totalRecordNode.InnerText.Equals("0")) return; if (totalRecordNode != null && !totalRecordNode.InnerText.Equals("0")) { XmlNodeList listNodes = doc.SelectNodes("//Atom:entry", namespaceManager); if (listNodes.Count == 0) return; string title = String.Empty; string url = String.Empty; string key = string.Empty; foreach (XmlNode n in listNodes) { foreach (XmlNode node in n.ChildNodes) { if (node.Name.Equals("title")) { title = node.InnerText; } else if (node.Name.Equals("link")) { if (node.Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase)) { string[] linkArr = node.Attributes["href"].Value.Split(new Char[] { '=', '&' }, StringSplitOptions.RemoveEmptyEntries); key = linkArr[1]; url = "http://www.youtube.com/watch?v=" + key; } } } if (!pDictionaryVideoWrapper.ContainsKey(key)) { recordCount++; VideoWrapper vWrapper = new VideoWrapper(); vWrapper.setVideoKey(key); vWrapper.setVideoName(title); vWrapper.setVideoUrl(url); pDictionaryVideoWrapper.Add(key, vWrapper); strBuilder.Append("\t\t" + title + "\r\n"); updatedFlag = true; } } } pStartIndex++; GetPlaylistVideos(pChannelName, pPlaylistURL, pDictionaryVideoWrapper, strBuilder, pStartIndex); Common.RemoveTempFiles(Constant.tempFiles, channelCleanedName); } catch (Exception ex) { GetPlaylistVideos(pChannelName, pPlaylistURL, pDictionaryVideoWrapper, strBuilder, pStartIndex); } }
public static void parseVideo(VideoWrapper pVideoWrapper, string pChannelName) { string fileVideo = ConfigurationManager.AppSettings["channelVideo"].ToString(); string videoChannelName = string.Empty; string videoName = string.Empty; string date = string.Empty; string iDislike = string.Empty; string iLike = string.Empty; string description = string.Empty; string url = string.Empty; string videoChannelFileCleaned = Common.CleanFileName(pChannelName); try { string channelFileNameXML = Common.CleanFileName(pVideoWrapper.getVideoName()) + "-" + ConfigurationManager.AppSettings["channelsFileNameXML"].ToString(); string videoUrl = string.Format("https://gdata.youtube.com/feeds/api/videos/{0}?v=2", pVideoWrapper.getVideoKey()); WebRequest nameRequest = WebRequest.Create(videoUrl); HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse(); Stream nameStream = nameResponse.GetResponseStream(); StreamReader nameReader = new StreamReader(nameStream); string xmlData = nameReader.ReadToEnd(); File.WriteAllText(channelFileNameXML, xmlData); XmlDocument doc = new XmlDocument(); doc.Load(channelFileNameXML); XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable); namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom"); namespaceManager.AddNamespace("yt", "http://gdata.youtube.com/schemas/2007"); namespaceManager.AddNamespace("media", "http://search.yahoo.com/mrss/"); XmlNode node = doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager); VideoWrapper videoWrapper = pVideoWrapper; //VideoInfoWrapper obj = new VideoInfoWrapper //{ if (doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager) != null && doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager).Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase)) { url = doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager).Attributes["href"].Value; string[] urlArr = url.Split(new Char[] { '&' }, StringSplitOptions.RemoveEmptyEntries); url = urlArr[0]; } videoChannelName = videoChannelFileCleaned;//doc.SelectSingleNode("//Atom:entry/Atom:author/Atom:name", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/Atom:author/Atom:name", namespaceManager).InnerText.ToString() : string.Empty; videoName = doc.SelectSingleNode("//Atom:entry/Atom:title", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/Atom:title", namespaceManager).InnerText.ToString() : string.Empty; date = doc.SelectSingleNode("//Atom:entry/Atom:published", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/Atom:published", namespaceManager).InnerText.ToString() : string.Empty; iDislike = doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numDislikes"] != null?doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numDislikes"].Value.ToString() : string.Empty : string.Empty; iLike = doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numLikes"] != null?doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numLikes"].Value.ToString() : string.Empty : string.Empty; description = doc.SelectSingleNode(" //Atom:entry/media:group/media:description", namespaceManager) != null?doc.SelectSingleNode(" //Atom:entry/media:group/media:description", namespaceManager).InnerText.ToString() : string.Empty; List <string> videoTags = preapreParamsTags(doc.SelectNodes("//Atom:entry/Atom:category", namespaceManager)) != null?preapreParamsTags(doc.SelectNodes("//Atom:entry/Atom:category", namespaceManager)) : null; string videoViewCount = doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager).Attributes["viewCount"] != null?doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager).Attributes["viewCount"].Value : string.Empty : string.Empty; //}; string videoNameFile = Common.CleanFileName(videoName + "-" + fileVideo); if (!Directory.Exists(videoChannelName + "/" + "Videos")) { Directory.CreateDirectory(videoChannelName + "/" + "Videos"); } File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Channel : " + videoChannelName + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Channel Url : " + videoWrapper.getChannelUrl() + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Name : " + videoName + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Url : " + url + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Date : " + date + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Views : " + videoViewCount + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "I Like : " + iLike + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "I dislike : " + iDislike + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Description : " + description + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Tags : " + string.Join(",", videoTags.ToArray()) + Environment.NewLine); File.Delete(channelFileNameXML); } catch (Exception ex) { //File.AppendAllText("video_channel_" + videoName + ".txt", "Videoname: " + videoName + "; Exception" + " : " + ex.ToString() + Environment.NewLine); Thread.Sleep(10000); parseVideo(pVideoWrapper, pChannelName); } }
public static void DownloadHtmls(string pChannelName, VideoWrapper pVideo, Dictionary<int, string> pHtmlFiles, int pPageNo) { string url = string.Empty; try { url = ConfigurationManager.AppSettings["VideoAllCommentsUrl"].ToString() + pVideo.getVideoKey() + "&page=" + pPageNo; //string url = "http://www.youtube.com/all_comments?v=LMiNEC1M-zY" + "&page=" + pPageNo; ///Base Case /// HtmlWeb hwObject = new HtmlWeb(); //hwObject.UseCookies = false; // Experimental //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " going to hit URL at page # " + pPageNo + ".. " + DateTime.Now + Environment.NewLine); HtmlDocument doc = hwObject.Load(url); //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " got response of page # " + pPageNo + ".." + DateTime.Now + Environment.NewLine); HtmlNodeCollection totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']"); if (totalCollection == null) return; int totalCollectionCount = totalCollection.Count; if (totalCollectionCount <= 0) return; ///Base Case Ended /// //Code Added by Me Right Now .... /// totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']//div[@class='content']"); string videoUrl = "https://www.youtube.com/watch?v=" + pVideo.getVideoKey(); bool videoUrlFlag = false; bool breakLoop = false; //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " starting to extract data.." + Environment.NewLine); foreach (HtmlNode node in totalCollection) { //string[] userArr = node.InnerText.Split(new Char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); string user = string.Empty; string displayName = string.Empty; string date = string.Empty; string comment = string.Empty; HtmlNode nodeData = node.ParentNode; string dataId = nodeData.Attributes[2].Value.Trim(); string authorId = nodeData.Attributes[1].Value.Trim(); HtmlNodeCollection childNodes = node.ChildNodes; int divCount = 0; foreach (HtmlNode child in childNodes) { if (child.Name.Equals("p")) { bool userFlag = false; //bool dateFlag = false; HtmlNodeCollection col = child.ChildNodes; foreach (HtmlNode n in col) { if (n.Name.Equals("span") && !userFlag) { foreach (HtmlNode nNode in n.ChildNodes) { if (nNode.Name.Equals("a")) { user = nNode.Attributes["href"].Value.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)[1]; break; } } displayName = n.InnerText.Trim(); userFlag = true; } else if (n.Name.Equals("span")) { date = n.InnerText.Trim(); //dateFlag = true; break; } } } else if (child.Name.Equals("div")) { if (divCount == 0) { //That means Its Comment Text comment = child.InnerText.Trim(); divCount++; } } } //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " starting to write data in file.." + Environment.NewLine); if (!displayName.Equals("") && !comment.Equals("") && !dataId.Equals("") && !authorId.Equals("") && !user.Equals("") && !GlobalConstants.commentDictionary.ContainsKey(dataId)) { VideoCommentWrapper commentWrapper = new VideoCommentWrapper(); commentWrapper.authorId = authorId; commentWrapper.commentId = dataId; commentWrapper.commentText = comment; commentWrapper.time = date; commentWrapper.displayName = displayName; commentWrapper.userName = user; GlobalConstants.commentDictionary.Add(dataId, commentWrapper); string videoFileName = pVideo.getVideoName(); //videoFile = videoName; string videoName = Common.CleanFileName(videoFileName + "-" + fileComment) + ".txt"; if (!Directory.Exists(pChannelName + "/" + "Comments")) { Directory.CreateDirectory(pChannelName + "/" + "Comments"); } commentCount++; if (!videoUrlFlag) { File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Video Url : " + videoUrl + Environment.NewLine + "\r\n"); videoUrlFlag = true; } File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "User name : " + displayName + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment Date : " + date + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment : " + comment + Environment.NewLine); } //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " ended writing data in file.." + Environment.NewLine); if (parseAllComments.Equals("false", StringComparison.CurrentCultureIgnoreCase)) { if (totalCommentsParse <= commentCount) { breakLoop = true; break; } } } //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " extracted all data.." + Environment.NewLine); ////Ended Added ////Commented by Me //File.AppendAllText(pChannelName + "/CommentsTimeLog.txt", "Start Download Time for file : " + pVideo.getVideoName() + "-" + pPageNo + ": " + DateTime.Now + Environment.NewLine); //WebRequest nameRequest = WebRequest.Create(url); //HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse(); //Stream nameStream = nameResponse.GetResponseStream(); //StreamReader nameReader = new StreamReader(nameStream); //string htmlData = nameReader.ReadToEnd(); //if (htmlData != null && !htmlData.Equals("")) //{ // string videoName = pChannelName + "/Comments/" + Common.CleanFileName(pVideo.getVideoName()) + "-" + pPageNo + ".html"; // string dictionaryValue = Common.CleanFileName(pVideo.getVideoName()) + "-" + pPageNo + ".html"; // if (!Directory.Exists(pChannelName + "/Comments/")) // { // Directory.CreateDirectory(pChannelName + "/Comments/"); // } // File.WriteAllText(videoName, htmlData); // File.AppendAllText(pChannelName + "/CommentsTimeLog.txt", "End Download Time for file : " + pVideo.getVideoName() + "-" + pPageNo + ": " + DateTime.Now + Environment.NewLine + Environment.NewLine); // //tempFiles.Add("/Comments/" + dictionaryValue); // pHtmlFiles.Add(pPageNo, dictionaryValue); //} ////Comment Ended pPageNo++; if(parseAllComments.Equals("true", StringComparison.CurrentCultureIgnoreCase)) DownloadHtmls(pChannelName, pVideo, pHtmlFiles, pPageNo); //Recursive Call } catch (Exception ex) { //Delete Cookies //pPageNo++; //File.AppendAllText(pChannelName + "/Comments/" + "ExceptionLogs.txt", "Exception : at URL : " + url + " -> Exception Message : " + ex.Message); DownloadHtmls(pChannelName, pVideo, pHtmlFiles, pPageNo); } }