public static void parseVideo(VideoWrapper pVideoWrapper, string pChannelName) { string fileVideo = ConfigurationManager.AppSettings["channelVideo"].ToString(); string videoChannelName = string.Empty; string videoName = string.Empty; string date = string.Empty; string iDislike = string.Empty; string iLike = string.Empty; string description = string.Empty; string url = string.Empty; string videoChannelFileCleaned = Common.CleanFileName(pChannelName); try { string channelFileNameXML = Common.CleanFileName(pVideoWrapper.getVideoName()) + "-" + ConfigurationManager.AppSettings["channelsFileNameXML"].ToString(); string videoUrl = string.Format("https://gdata.youtube.com/feeds/api/videos/{0}?v=2", pVideoWrapper.getVideoKey()); WebRequest nameRequest = WebRequest.Create(videoUrl); HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse(); Stream nameStream = nameResponse.GetResponseStream(); StreamReader nameReader = new StreamReader(nameStream); string xmlData = nameReader.ReadToEnd(); File.WriteAllText(channelFileNameXML, xmlData); XmlDocument doc = new XmlDocument(); doc.Load(channelFileNameXML); XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable); namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom"); namespaceManager.AddNamespace("yt", "http://gdata.youtube.com/schemas/2007"); namespaceManager.AddNamespace("media", "http://search.yahoo.com/mrss/"); XmlNode node = doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager); VideoWrapper videoWrapper = pVideoWrapper; //VideoInfoWrapper obj = new VideoInfoWrapper //{ if (doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager) != null && doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager).Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase)) { url = doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager).Attributes["href"].Value; string[] urlArr = url.Split(new Char[] { '&' }, StringSplitOptions.RemoveEmptyEntries); url = urlArr[0]; } videoChannelName = videoChannelFileCleaned;//doc.SelectSingleNode("//Atom:entry/Atom:author/Atom:name", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/Atom:author/Atom:name", namespaceManager).InnerText.ToString() : string.Empty; videoName = doc.SelectSingleNode("//Atom:entry/Atom:title", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/Atom:title", namespaceManager).InnerText.ToString() : string.Empty; date = doc.SelectSingleNode("//Atom:entry/Atom:published", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/Atom:published", namespaceManager).InnerText.ToString() : string.Empty; iDislike = doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numDislikes"] != null?doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numDislikes"].Value.ToString() : string.Empty : string.Empty; iLike = doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numLikes"] != null?doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numLikes"].Value.ToString() : string.Empty : string.Empty; description = doc.SelectSingleNode(" //Atom:entry/media:group/media:description", namespaceManager) != null?doc.SelectSingleNode(" //Atom:entry/media:group/media:description", namespaceManager).InnerText.ToString() : string.Empty; List <string> videoTags = preapreParamsTags(doc.SelectNodes("//Atom:entry/Atom:category", namespaceManager)) != null?preapreParamsTags(doc.SelectNodes("//Atom:entry/Atom:category", namespaceManager)) : null; string videoViewCount = doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager) != null?doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager).Attributes["viewCount"] != null?doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager).Attributes["viewCount"].Value : string.Empty : string.Empty; //}; string videoNameFile = Common.CleanFileName(videoName + "-" + fileVideo); if (!Directory.Exists(videoChannelName + "/" + "Videos")) { Directory.CreateDirectory(videoChannelName + "/" + "Videos"); } File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Channel : " + videoChannelName + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Channel Url : " + videoWrapper.getChannelUrl() + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Name : " + videoName + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Url : " + url + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Date : " + date + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Views : " + videoViewCount + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "I Like : " + iLike + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "I dislike : " + iDislike + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Description : " + description + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Tags : " + string.Join(",", videoTags.ToArray()) + Environment.NewLine); File.Delete(channelFileNameXML); } catch (Exception ex) { //File.AppendAllText("video_channel_" + videoName + ".txt", "Videoname: " + videoName + "; Exception" + " : " + ex.ToString() + Environment.NewLine); Thread.Sleep(10000); parseVideo(pVideoWrapper, pChannelName); } }
public static void GetAllComments(VideoWrapper pVideoWrapper, string pChannelName, Dictionary <int, string> pHtmlFiles) { string videoUrl = "https://www.youtube.com/watch?v=" + pVideoWrapper.getVideoKey(); bool videoUrlFlag = false; List <string> tempFiles = new List <string>(); foreach (KeyValuePair <int, string> pair in pHtmlFiles) { try { string videoName = pair.Value; //string videoName = "Machinima PlayStation Viewer's Choice LiveStream!-1"; //Stream stream = File.OpenRead("New folder/Machinima PlayStation Viewer's Choice LiveStream!-1.html"); Stream stream = File.OpenRead(pChannelName + "/Comments/" + videoName); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); StreamReader reader = new StreamReader(stream); doc.LoadHtml(reader.ReadToEnd().ToString()); bool breakLoop = false; HtmlNodeCollection totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']//div[@class='content']"); foreach (HtmlNode node in totalCollection) { //string[] userArr = node.InnerText.Split(new Char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); string user = string.Empty; string displayName = string.Empty; string date = string.Empty; string comment = string.Empty; HtmlNode nodeData = node.ParentNode; string dataId = nodeData.Attributes[2].Value.Trim(); string authorId = nodeData.Attributes[1].Value.Trim(); HtmlNodeCollection childNodes = node.ChildNodes; int divCount = 0; foreach (HtmlNode child in childNodes) { if (child.Name.Equals("p")) { bool userFlag = false; //bool dateFlag = false; HtmlNodeCollection col = child.ChildNodes; foreach (HtmlNode n in col) { if (n.Name.Equals("span") && !userFlag) { foreach (HtmlNode nNode in n.ChildNodes) { if (nNode.Name.Equals("a")) { user = nNode.Attributes["href"].Value.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)[1]; break; } } displayName = n.InnerText.Trim(); userFlag = true; } else if (n.Name.Equals("span")) { date = n.InnerText.Trim(); //dateFlag = true; break; } } } else if (child.Name.Equals("div")) { if (divCount == 0) { //That means Its Comment Text comment = child.InnerText.Trim(); divCount++; } } } if (!displayName.Equals("") && !comment.Equals("") && !dataId.Equals("") && !authorId.Equals("") && !user.Equals("") && !GlobalConstants.commentDictionary.ContainsKey(dataId)) { VideoCommentWrapper commentWrapper = new VideoCommentWrapper(); commentWrapper.authorId = authorId; commentWrapper.commentId = dataId; commentWrapper.commentText = comment; commentWrapper.time = date; commentWrapper.displayName = displayName; commentWrapper.userName = user; GlobalConstants.commentDictionary.Add(dataId, commentWrapper); string videoFileName = pVideoWrapper.getVideoName(); //videoFile = videoName; videoName = Common.CleanFileName(videoFileName + "-" + fileComment) + ".txt"; if (!Directory.Exists(pChannelName + "/" + "Comments")) { Directory.CreateDirectory(pChannelName + "/" + "Comments"); } commentCount++; if (!videoUrlFlag) { File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Video Url : " + videoUrl + Environment.NewLine + "\r\n"); videoUrlFlag = true; } File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "User name : " + displayName + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment Date : " + date + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment : " + comment + Environment.NewLine); } if (parseAllComments.Equals("false", StringComparison.CurrentCultureIgnoreCase)) { if (totalCommentsParse <= commentCount) { breakLoop = true; break; } } } reader.Close(); if (breakLoop) { break; } } catch (Exception ex) { //File.AppendAllText("Logs Exception Comments.txt", ex.Message + Environment.NewLine + Environment.NewLine); continue; } } foreach (KeyValuePair <int, string> file in pHtmlFiles) { tempFiles.Add("/Comments/" + file.Value); } Common.RemoveTempFiles(tempFiles, pChannelName); }
public static void DownloadHtmls(string pChannelName, VideoWrapper pVideo, Dictionary <int, string> pHtmlFiles, int pPageNo) { string url = string.Empty; try { url = ConfigurationManager.AppSettings["VideoAllCommentsUrl"].ToString() + pVideo.getVideoKey() + "&page=" + pPageNo; //string url = "http://www.youtube.com/all_comments?v=LMiNEC1M-zY" + "&page=" + pPageNo; ///Base Case /// HtmlWeb hwObject = new HtmlWeb(); //hwObject.UseCookies = false; // Experimental //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " going to hit URL at page # " + pPageNo + ".. " + DateTime.Now + Environment.NewLine); HtmlDocument doc = hwObject.Load(url); //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " got response of page # " + pPageNo + ".." + DateTime.Now + Environment.NewLine); HtmlNodeCollection totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']"); if (totalCollection == null) { return; } int totalCollectionCount = totalCollection.Count; if (totalCollectionCount <= 0) { return; } ///Base Case Ended /// //Code Added by Me Right Now .... /// totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']//div[@class='content']"); string videoUrl = "https://www.youtube.com/watch?v=" + pVideo.getVideoKey(); bool videoUrlFlag = false; bool breakLoop = false; //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " starting to extract data.." + Environment.NewLine); foreach (HtmlNode node in totalCollection) { //string[] userArr = node.InnerText.Split(new Char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); string user = string.Empty; string displayName = string.Empty; string date = string.Empty; string comment = string.Empty; HtmlNode nodeData = node.ParentNode; string dataId = nodeData.Attributes[2].Value.Trim(); string authorId = nodeData.Attributes[1].Value.Trim(); HtmlNodeCollection childNodes = node.ChildNodes; int divCount = 0; foreach (HtmlNode child in childNodes) { if (child.Name.Equals("p")) { bool userFlag = false; //bool dateFlag = false; HtmlNodeCollection col = child.ChildNodes; foreach (HtmlNode n in col) { if (n.Name.Equals("span") && !userFlag) { foreach (HtmlNode nNode in n.ChildNodes) { if (nNode.Name.Equals("a")) { user = nNode.Attributes["href"].Value.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)[1]; break; } } displayName = n.InnerText.Trim(); userFlag = true; } else if (n.Name.Equals("span")) { date = n.InnerText.Trim(); //dateFlag = true; break; } } } else if (child.Name.Equals("div")) { if (divCount == 0) { //That means Its Comment Text comment = child.InnerText.Trim(); divCount++; } } } //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " starting to write data in file.." + Environment.NewLine); if (!displayName.Equals("") && !comment.Equals("") && !dataId.Equals("") && !authorId.Equals("") && !user.Equals("") && !GlobalConstants.commentDictionary.ContainsKey(dataId)) { VideoCommentWrapper commentWrapper = new VideoCommentWrapper(); commentWrapper.authorId = authorId; commentWrapper.commentId = dataId; commentWrapper.commentText = comment; commentWrapper.time = date; commentWrapper.displayName = displayName; commentWrapper.userName = user; GlobalConstants.commentDictionary.Add(dataId, commentWrapper); string videoFileName = pVideo.getVideoName(); //videoFile = videoName; string videoName = Common.CleanFileName(videoFileName + "-" + fileComment) + ".txt"; if (!Directory.Exists(pChannelName + "/" + "Comments")) { Directory.CreateDirectory(pChannelName + "/" + "Comments"); } commentCount++; if (!videoUrlFlag) { File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Video Url : " + videoUrl + Environment.NewLine + "\r\n"); videoUrlFlag = true; } File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "User name : " + displayName + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment Date : " + date + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment : " + comment + Environment.NewLine); } //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " ended writing data in file.." + Environment.NewLine); if (parseAllComments.Equals("false", StringComparison.CurrentCultureIgnoreCase)) { if (totalCommentsParse <= commentCount) { breakLoop = true; break; } } } //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " extracted all data.." + Environment.NewLine); ////Ended Added ////Commented by Me //File.AppendAllText(pChannelName + "/CommentsTimeLog.txt", "Start Download Time for file : " + pVideo.getVideoName() + "-" + pPageNo + ": " + DateTime.Now + Environment.NewLine); //WebRequest nameRequest = WebRequest.Create(url); //HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse(); //Stream nameStream = nameResponse.GetResponseStream(); //StreamReader nameReader = new StreamReader(nameStream); //string htmlData = nameReader.ReadToEnd(); //if (htmlData != null && !htmlData.Equals("")) //{ // string videoName = pChannelName + "/Comments/" + Common.CleanFileName(pVideo.getVideoName()) + "-" + pPageNo + ".html"; // string dictionaryValue = Common.CleanFileName(pVideo.getVideoName()) + "-" + pPageNo + ".html"; // if (!Directory.Exists(pChannelName + "/Comments/")) // { // Directory.CreateDirectory(pChannelName + "/Comments/"); // } // File.WriteAllText(videoName, htmlData); // File.AppendAllText(pChannelName + "/CommentsTimeLog.txt", "End Download Time for file : " + pVideo.getVideoName() + "-" + pPageNo + ": " + DateTime.Now + Environment.NewLine + Environment.NewLine); // //tempFiles.Add("/Comments/" + dictionaryValue); // pHtmlFiles.Add(pPageNo, dictionaryValue); //} ////Comment Ended pPageNo++; if (parseAllComments.Equals("true", StringComparison.CurrentCultureIgnoreCase)) { DownloadHtmls(pChannelName, pVideo, pHtmlFiles, pPageNo); //Recursive Call } } catch (Exception ex) { //Delete Cookies //pPageNo++; //File.AppendAllText(pChannelName + "/Comments/" + "ExceptionLogs.txt", "Exception : at URL : " + url + " -> Exception Message : " + ex.Message); DownloadHtmls(pChannelName, pVideo, pHtmlFiles, pPageNo); } }
public static void parseVideo(VideoWrapper pVideoWrapper, string pChannelName) { string fileVideo = ConfigurationManager.AppSettings["channelVideo"].ToString(); string videoChannelName = string.Empty; string videoName = string.Empty; string date = string.Empty; string iDislike = string.Empty; string iLike = string.Empty; string description = string.Empty; string url = string.Empty; string videoChannelFileCleaned = Common.CleanFileName(pChannelName); try { string channelFileNameXML = Common.CleanFileName(pVideoWrapper.getVideoName()) + "-" + ConfigurationManager.AppSettings["channelsFileNameXML"].ToString(); string videoUrl = string.Format("https://gdata.youtube.com/feeds/api/videos/{0}?v=2", pVideoWrapper.getVideoKey()); WebRequest nameRequest = WebRequest.Create(videoUrl); HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse(); Stream nameStream = nameResponse.GetResponseStream(); StreamReader nameReader = new StreamReader(nameStream); string xmlData = nameReader.ReadToEnd(); File.WriteAllText(channelFileNameXML, xmlData); XmlDocument doc = new XmlDocument(); doc.Load(channelFileNameXML); XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable); namespaceManager.AddNamespace("Atom", "http://www.w3.org/2005/Atom"); namespaceManager.AddNamespace("yt", "http://gdata.youtube.com/schemas/2007"); namespaceManager.AddNamespace("media", "http://search.yahoo.com/mrss/"); XmlNode node = doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager); VideoWrapper videoWrapper = pVideoWrapper; //VideoInfoWrapper obj = new VideoInfoWrapper //{ if (doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager) != null && doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager).Attributes["rel"].Value.Equals("alternate", StringComparison.CurrentCultureIgnoreCase)) { url = doc.SelectSingleNode("//Atom:entry/Atom:link", namespaceManager).Attributes["href"].Value; string[] urlArr = url.Split(new Char[] { '&' }, StringSplitOptions.RemoveEmptyEntries); url = urlArr[0]; } videoChannelName = videoChannelFileCleaned;//doc.SelectSingleNode("//Atom:entry/Atom:author/Atom:name", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/Atom:author/Atom:name", namespaceManager).InnerText.ToString() : string.Empty; videoName = doc.SelectSingleNode("//Atom:entry/Atom:title", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/Atom:title", namespaceManager).InnerText.ToString() : string.Empty; date = doc.SelectSingleNode("//Atom:entry/Atom:published", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/Atom:published", namespaceManager).InnerText.ToString() : string.Empty; iDislike = doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numDislikes"] != null ? doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numDislikes"].Value.ToString() : string.Empty : string.Empty; iLike = doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numLikes"] != null ? doc.SelectSingleNode("//Atom:entry/yt:rating", namespaceManager).Attributes["numLikes"].Value.ToString() : string.Empty : string.Empty; description = doc.SelectSingleNode(" //Atom:entry/media:group/media:description", namespaceManager) != null ? doc.SelectSingleNode(" //Atom:entry/media:group/media:description", namespaceManager).InnerText.ToString() : string.Empty; List<string> videoTags = preapreParamsTags(doc.SelectNodes("//Atom:entry/Atom:category", namespaceManager)) != null ? preapreParamsTags(doc.SelectNodes("//Atom:entry/Atom:category", namespaceManager)) : null; string videoViewCount = doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager) != null ? doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager).Attributes["viewCount"] != null ? doc.SelectSingleNode("//Atom:entry/yt:statistics", namespaceManager).Attributes["viewCount"].Value : string.Empty : string.Empty; //}; string videoNameFile = Common.CleanFileName(videoName + "-" + fileVideo); if (!Directory.Exists(videoChannelName + "/" + "Videos")) { Directory.CreateDirectory(videoChannelName + "/" + "Videos"); } File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Channel : " + videoChannelName + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Channel Url : " + videoWrapper.getChannelUrl() + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Name : " + videoName + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Url : " + url + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Date : " + date + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Video Views : " + videoViewCount + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "I Like : " + iLike + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "I dislike : " + iDislike + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Description : " + description + Environment.NewLine); File.AppendAllText(videoChannelName + "/" + "Videos" + "/" + "channel_video_" + videoNameFile, "Tags : " + string.Join(",", videoTags.ToArray()) + Environment.NewLine); File.Delete(channelFileNameXML); } catch (Exception ex) { //File.AppendAllText("video_channel_" + videoName + ".txt", "Videoname: " + videoName + "; Exception" + " : " + ex.ToString() + Environment.NewLine); Thread.Sleep(10000); parseVideo(pVideoWrapper, pChannelName); } }
public static void GetAllComments(VideoWrapper pVideoWrapper, string pChannelName, Dictionary<int, string> pHtmlFiles) { string videoUrl = "https://www.youtube.com/watch?v=" + pVideoWrapper.getVideoKey(); bool videoUrlFlag = false; List<string> tempFiles = new List<string>(); foreach (KeyValuePair<int, string> pair in pHtmlFiles) { try { string videoName = pair.Value; //string videoName = "Machinima PlayStation Viewer's Choice LiveStream!-1"; //Stream stream = File.OpenRead("New folder/Machinima PlayStation Viewer's Choice LiveStream!-1.html"); Stream stream = File.OpenRead(pChannelName + "/Comments/" + videoName); HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); StreamReader reader = new StreamReader(stream); doc.LoadHtml(reader.ReadToEnd().ToString()); bool breakLoop = false; HtmlNodeCollection totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']//div[@class='content']"); foreach (HtmlNode node in totalCollection) { //string[] userArr = node.InnerText.Split(new Char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); string user = string.Empty; string displayName = string.Empty; string date = string.Empty; string comment = string.Empty; HtmlNode nodeData = node.ParentNode; string dataId = nodeData.Attributes[2].Value.Trim(); string authorId = nodeData.Attributes[1].Value.Trim(); HtmlNodeCollection childNodes = node.ChildNodes; int divCount = 0; foreach (HtmlNode child in childNodes) { if (child.Name.Equals("p")) { bool userFlag = false; //bool dateFlag = false; HtmlNodeCollection col = child.ChildNodes; foreach (HtmlNode n in col) { if (n.Name.Equals("span") && !userFlag) { foreach (HtmlNode nNode in n.ChildNodes) { if (nNode.Name.Equals("a")) { user = nNode.Attributes["href"].Value.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)[1]; break; } } displayName = n.InnerText.Trim(); userFlag = true; } else if (n.Name.Equals("span")) { date = n.InnerText.Trim(); //dateFlag = true; break; } } } else if (child.Name.Equals("div")) { if (divCount == 0) { //That means Its Comment Text comment = child.InnerText.Trim(); divCount++; } } } if (!displayName.Equals("") && !comment.Equals("") && !dataId.Equals("") && !authorId.Equals("") && !user.Equals("") && !GlobalConstants.commentDictionary.ContainsKey(dataId)) { VideoCommentWrapper commentWrapper = new VideoCommentWrapper(); commentWrapper.authorId = authorId; commentWrapper.commentId = dataId; commentWrapper.commentText = comment; commentWrapper.time = date; commentWrapper.displayName = displayName; commentWrapper.userName = user; GlobalConstants.commentDictionary.Add(dataId, commentWrapper); string videoFileName = pVideoWrapper.getVideoName(); //videoFile = videoName; videoName = Common.CleanFileName(videoFileName + "-" + fileComment) + ".txt"; if (!Directory.Exists(pChannelName + "/" + "Comments")) { Directory.CreateDirectory(pChannelName + "/" + "Comments"); } commentCount++; if (!videoUrlFlag) { File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Video Url : " + videoUrl + Environment.NewLine + "\r\n"); videoUrlFlag = true; } File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "User name : " + displayName + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment Date : " + date + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment : " + comment + Environment.NewLine); } if (parseAllComments.Equals("false", StringComparison.CurrentCultureIgnoreCase)) { if (totalCommentsParse <= commentCount) { breakLoop = true; break; } } } reader.Close(); if (breakLoop) break; } catch (Exception ex) { //File.AppendAllText("Logs Exception Comments.txt", ex.Message + Environment.NewLine + Environment.NewLine); continue; } } foreach (KeyValuePair<int, string> file in pHtmlFiles) { tempFiles.Add("/Comments/" + file.Value); } Common.RemoveTempFiles(tempFiles, pChannelName); }
public static void DownloadHtmls(string pChannelName, VideoWrapper pVideo, Dictionary<int, string> pHtmlFiles, int pPageNo) { string url = string.Empty; try { url = ConfigurationManager.AppSettings["VideoAllCommentsUrl"].ToString() + pVideo.getVideoKey() + "&page=" + pPageNo; //string url = "http://www.youtube.com/all_comments?v=LMiNEC1M-zY" + "&page=" + pPageNo; ///Base Case /// HtmlWeb hwObject = new HtmlWeb(); //hwObject.UseCookies = false; // Experimental //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " going to hit URL at page # " + pPageNo + ".. " + DateTime.Now + Environment.NewLine); HtmlDocument doc = hwObject.Load(url); //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " got response of page # " + pPageNo + ".." + DateTime.Now + Environment.NewLine); HtmlNodeCollection totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']"); if (totalCollection == null) return; int totalCollectionCount = totalCollection.Count; if (totalCollectionCount <= 0) return; ///Base Case Ended /// //Code Added by Me Right Now .... /// totalCollection = doc.DocumentNode.SelectNodes("//ul[@id='all-comments']//li[@class='comment']//div[@class='content']"); string videoUrl = "https://www.youtube.com/watch?v=" + pVideo.getVideoKey(); bool videoUrlFlag = false; bool breakLoop = false; //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " starting to extract data.." + Environment.NewLine); foreach (HtmlNode node in totalCollection) { //string[] userArr = node.InnerText.Split(new Char[] { '\n' }, StringSplitOptions.RemoveEmptyEntries); string user = string.Empty; string displayName = string.Empty; string date = string.Empty; string comment = string.Empty; HtmlNode nodeData = node.ParentNode; string dataId = nodeData.Attributes[2].Value.Trim(); string authorId = nodeData.Attributes[1].Value.Trim(); HtmlNodeCollection childNodes = node.ChildNodes; int divCount = 0; foreach (HtmlNode child in childNodes) { if (child.Name.Equals("p")) { bool userFlag = false; //bool dateFlag = false; HtmlNodeCollection col = child.ChildNodes; foreach (HtmlNode n in col) { if (n.Name.Equals("span") && !userFlag) { foreach (HtmlNode nNode in n.ChildNodes) { if (nNode.Name.Equals("a")) { user = nNode.Attributes["href"].Value.Split(new Char[] { '/' }, StringSplitOptions.RemoveEmptyEntries)[1]; break; } } displayName = n.InnerText.Trim(); userFlag = true; } else if (n.Name.Equals("span")) { date = n.InnerText.Trim(); //dateFlag = true; break; } } } else if (child.Name.Equals("div")) { if (divCount == 0) { //That means Its Comment Text comment = child.InnerText.Trim(); divCount++; } } } //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " starting to write data in file.." + Environment.NewLine); if (!displayName.Equals("") && !comment.Equals("") && !dataId.Equals("") && !authorId.Equals("") && !user.Equals("") && !GlobalConstants.commentDictionary.ContainsKey(dataId)) { VideoCommentWrapper commentWrapper = new VideoCommentWrapper(); commentWrapper.authorId = authorId; commentWrapper.commentId = dataId; commentWrapper.commentText = comment; commentWrapper.time = date; commentWrapper.displayName = displayName; commentWrapper.userName = user; GlobalConstants.commentDictionary.Add(dataId, commentWrapper); string videoFileName = pVideo.getVideoName(); //videoFile = videoName; string videoName = Common.CleanFileName(videoFileName + "-" + fileComment) + ".txt"; if (!Directory.Exists(pChannelName + "/" + "Comments")) { Directory.CreateDirectory(pChannelName + "/" + "Comments"); } commentCount++; if (!videoUrlFlag) { File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Video Url : " + videoUrl + Environment.NewLine + "\r\n"); videoUrlFlag = true; } File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "User name : " + displayName + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment Date : " + date + Environment.NewLine); File.AppendAllText(pChannelName + "/" + "Comments" + "/" + videoName, "Comment : " + comment + Environment.NewLine); } //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " ended writing data in file.." + Environment.NewLine); if (parseAllComments.Equals("false", StringComparison.CurrentCultureIgnoreCase)) { if (totalCommentsParse <= commentCount) { breakLoop = true; break; } } } //File.AppendAllText("ThreadsLog.txt", "Thread " + Thread.CurrentThread.GetHashCode() + " extracted all data.." + Environment.NewLine); ////Ended Added ////Commented by Me //File.AppendAllText(pChannelName + "/CommentsTimeLog.txt", "Start Download Time for file : " + pVideo.getVideoName() + "-" + pPageNo + ": " + DateTime.Now + Environment.NewLine); //WebRequest nameRequest = WebRequest.Create(url); //HttpWebResponse nameResponse = (HttpWebResponse)nameRequest.GetResponse(); //Stream nameStream = nameResponse.GetResponseStream(); //StreamReader nameReader = new StreamReader(nameStream); //string htmlData = nameReader.ReadToEnd(); //if (htmlData != null && !htmlData.Equals("")) //{ // string videoName = pChannelName + "/Comments/" + Common.CleanFileName(pVideo.getVideoName()) + "-" + pPageNo + ".html"; // string dictionaryValue = Common.CleanFileName(pVideo.getVideoName()) + "-" + pPageNo + ".html"; // if (!Directory.Exists(pChannelName + "/Comments/")) // { // Directory.CreateDirectory(pChannelName + "/Comments/"); // } // File.WriteAllText(videoName, htmlData); // File.AppendAllText(pChannelName + "/CommentsTimeLog.txt", "End Download Time for file : " + pVideo.getVideoName() + "-" + pPageNo + ": " + DateTime.Now + Environment.NewLine + Environment.NewLine); // //tempFiles.Add("/Comments/" + dictionaryValue); // pHtmlFiles.Add(pPageNo, dictionaryValue); //} ////Comment Ended pPageNo++; if(parseAllComments.Equals("true", StringComparison.CurrentCultureIgnoreCase)) DownloadHtmls(pChannelName, pVideo, pHtmlFiles, pPageNo); //Recursive Call } catch (Exception ex) { //Delete Cookies //pPageNo++; //File.AppendAllText(pChannelName + "/Comments/" + "ExceptionLogs.txt", "Exception : at URL : " + url + " -> Exception Message : " + ex.Message); DownloadHtmls(pChannelName, pVideo, pHtmlFiles, pPageNo); } }