// 글의 텍스트와 코멘트를 읽어서 채워넣는다 private bool UpdateArticleTextAndComments(string input, out List <Picture> pictures, out List <DCInsideComment> comments, out string commentUserID, out string text) { text = string.Empty; pictures = new List <Picture>(); comments = new List <DCInsideComment>(); commentUserID = string.Empty; // 1. 이미지들을 찾는다 // <img id='dc_image_elm_*.. src="()"/> 이미지 // <img id=dc_image_elm0 src='http://dcimg1.dcinside.com/viewimage.php?id=windowsphone&no=29bcc427b78a77a16fb3dab004c86b6fc3a0be4a5f9fd1a8cc77865e83c2029da6f5d553d560d273a5d0802458ed844942b60ffcef4cc95a9e820f3d0eb76388a4ded971bc29b6cc1fd6a780e7e52f627fdf1b9b6a40491c7fa25f4acaa4663f080794f8abd4e01cc6&f_no=7bee837eb38760f73f8081e240847d6ecaa51e16c7795ecc2584471ef43a7f730867c7d42ef66cf9f0827af5263d'width=550 /></a><br/> <br/> </p> StringEngine se = new StringEngine(input); Match match; while (se.Next(DCRegexManager.TextImage, out match)) { string url = match.Groups[1].Value; string browseurl = url; int fIdx = url.IndexOf("&f_no="); if (fIdx != -1) { url = url.Substring(0, fIdx); } pictures.Add( new Picture(url, browseurl, "http://gall.dcinside.com")); } // div를 개수를 세서 안에 있는 div if (!se.Next(DCRegexManager.TextStart, out match)) { return(false); } int start = se.Cursor; int count = 1; while (count > 0) { if (!se.Next(DCRegexManager.TextDIV, out match)) { break; } if (match.Groups[1].Value.Length != 0) { count++; } else { count--; if (count == 0) { break; } } } if (count != 0) { text = input.Substring(start).Trim(); return(true); } else { text = input.Substring(start, match.Index - start).Trim(); } comments.Clear(); // 댓글 가져오기 while (se.Next(DCRegexManager.CommentStart, out match)) { string line; if (!se.GetNextLine(out line)) { continue; } match = DCRegexManager.CommentName.Match(line); if (!match.Success) { continue; } var cmt = new DCInsideComment(); cmt.Level = 0; cmt.Name = match.Groups[2].Value.Trim(); if (line.Contains("gallercon.gif")) { cmt.MemberStatus = MemberStatus.Fix; } else if (line.Contains("gallercon1.gif")) { cmt.MemberStatus = MemberStatus.Default; } else { cmt.MemberStatus = MemberStatus.Anonymous; } // 내용 if (!se.Next(DCRegexManager.CommentText, out match)) { continue; } cmt.Text = match.Groups[1].Value.Trim(); comments.Add(cmt); } // CommentUserID 얻기 if (se.Next(DCRegexManager.TextCommentUserID, out match)) { commentUserID = match.Groups[1].Value; } return(true); }
public bool Next(System.Threading.CancellationToken ct, out System.Collections.Generic.IEnumerable <IArticle> elems) { WebClientEx client = new WebClientEx(); string result = client.DownloadStringAsyncTask( new Uri(string.Format("http://clien.career.co.kr/cs2/bbs/board.php?bo_table={0}&page={1}&{2}", id, page + 1, DateTime.Now.Ticks), UriKind.Absolute), ct).GetResult(); StringEngine se = new StringEngine(result); List <IArticle> articles = new List <IArticle>(); bool curBool = true; while (true) { Match match; if (!se.Next(new Regex("<tr class=\"mytr\">"), out match)) { break; } string line; if (!se.GetNextLine(out line)) { continue; } match = Regex.Match(line, @"<td>(\d+)</td>"); if (!match.Success) { continue; } string articleID = match.Groups[1].Value; int curArticleID = int.Parse(articleID); if (recentArticle <= curArticleID) { continue; } recentArticle = curArticleID; ClienArticle article = new ClienArticle(board, articleID); article.HasImage = curBool; curBool = !curBool; // 글 제목과 댓글 개수 if (!se.GetNextLine(out line)) { continue; } match = Regex.Match(line, @"<a[^>]*?>(.*?)</a>\s*(<span>\[(\d+)\]</span>)?"); if (!match.Success) { continue; } article.Title = match.Groups[1].Value; if (match.Groups[3].Success) { article.CommentCount = int.Parse(match.Groups[3].Value); } else { article.CommentCount = 0; } // 이름 if (!se.GetNextLine(out line)) { continue; } match = Regex.Match(line, @"<span class='member'>(.*?)</span>"); if (match.Success) { article.Name = match.Groups[1].Value; } else { match = Regex.Match(line, @"<img src='/cs2/data/member/.*?/(.*?).gif"); if (!match.Success) { continue; } article.Name = match.Groups[1].Value; } // 시간 if (!se.GetNextLine(out line)) { continue; } match = Regex.Match(line, "<span title=\"([^\"]*?)\">"); if (!match.Success) { continue; } article.Date = DateTime.Parse(match.Groups[1].Value); articles.Add(article); } elems = articles; page++; return(true); }
public bool GetText(System.Threading.CancellationToken ct, out string text) { text = string.Empty; WebClientEx client = new WebClientEx(); var result = client.DownloadStringAsyncTask( new Uri(string.Format("http://clien.career.co.kr/cs2/bbs/board.php?bo_table={0}&wr_id={1}", board.ID, id), UriKind.Absolute), ct).GetResult(); StringEngine se = new StringEngine(result); Match match; pictures.Clear(); if (se.Next(new Regex("<div class=\"attachedImage\"><img.*?src=(?<quote>'|\")(?<url>.*?)\\k<quote>"), out match)) { Picture pic = new Picture(string.Format("http://clien.career.co.kr/cs2/bbs/{0}", match.Groups["url"].Value), Uri.ToString()); pictures.Add(pic); HasImage = true; } var textRegex = new Regex("<span id=\"writeContents\"(.*?)>"); if (!se.Next(textRegex, out match)) { return(false); } int start = se.Cursor; int count = 1; var divRegex = new Regex("(<\\s*span[^>]*>)|(<\\s*/\\s*span\\s*>)", RegexOptions.IgnoreCase); // div 또는 /div while (count > 0) { if (!se.Next(divRegex, out match)) { break; } if (match.Groups[1].Value.Length != 0) { count++; } else { count--; if (count == 0) { break; } } } if (count != 0) { text = result.Substring(start).Trim(); return(true); } else { text = result.Substring(start, match.Index - start).Trim(); } comments.Clear(); // 댓글 파트 while (se.Next(new Regex("<ul class=\"reply_info\">"), out match)) { string line; if (!se.GetNextLine(out line)) { continue; } ClienComment comment = new ClienComment(); comment.Level = 0; if (line.Contains("<img src=\"../skin/board/cheditor/img/blet_re2.gif\">")) { comment.Level = 1; } match = Regex.Match(line, "<img src='/cs2/data/member/.*?/(.*?).gif"); if (match.Success) { comment.Name = match.Groups[1].Value; } else { match = Regex.Match(line, @"<span class='member'>(.*?)</span>"); if (match.Success) { comment.Name = match.Groups[1].Value; } else { continue; } } if (!se.Next(new Regex("<div class=\"reply_content\">"), out match)) { continue; } StringBuilder sb = new StringBuilder(); while (se.GetNextLine(out line)) { match = Regex.Match(line, "(.*?)<span id='edit"); if (match.Success) { sb.Append(match.Groups[1].Value); break; } sb.Append(line); } comment.Text = sb.ToString().Trim(); comments.Add(comment); } return(true); }