//details_html 쪼개는 함수 protected override void ParseContent(ContentRevisionDTO contentrevision) { try { var loadedContent = webGetutf.Load(contentrevision.Content.Contents_URL); var checkifdeprecated = loadedContent.DocumentNode.SelectNodes("//li[@class = 'hx_cate']"); if (checkifdeprecated != null && checkifdeprecated.FirstOrDefault().InnerText.Trim() == "삭제된 글입니다.") { contentrevision.isDepricate = true; return; } var articlecontent = loadedContent.DocumentNode.SelectNodes("//div[@id = 'copy_layer_1']").LastOrDefault(); contentrevision.Details = articlecontent.InnerText.Trim(); contentrevision.Details_Html = articlecontent.InnerHtml.Trim(); contentrevision.isDepricate = false; var imgnodes = articlecontent.SelectNodes(".//img"); contentrevision.SrcDatas = new List <SrcdataDTO>(); if (imgnodes == null) { return; } foreach (var img in imgnodes) { var srcurl = new Uri(img.GetAttributeValue("src", "default")); var srcdata = new SrcdataDTO { SourceUrl = srcurl.AbsoluteUri, IsDepricated = false, FileName = System.IO.Path.GetFileName(srcurl.LocalPath), SrcGuId = Guid.NewGuid(), }; img.SetAttributeValue("guid", srcdata.SrcGuId.ToString()); contentrevision.SrcDatas.Add(srcdata); } } catch (ArgumentNullException) { contentrevision.isDepricate = true; return; } catch (UriFormatException) { return; } catch (Exception e) { Console.WriteLine("ReStart"); ParseContent(contentrevision); } }
protected override void ParseContent(ContentRevisionDTO contentrevision) { try { var loadedContent = webGetutf.Load(contentrevision.Content.Contents_URL); //var checkifdeprecated = loadedContent.DocumentNode.SelectNodes("//div(@class='whole_box')").ToList().Where(p => p.InnerText.Trim() == "해당 게시물이 존재하지 않습니다."); //if (checkifdeprecated != null) //{ // contentrevision.isDepricate = true; // return; //} //var content_count = 0;SSS List <string> details = new List <string>(); var articlecontent = loadedContent.DocumentNode.SelectNodes("//div[@class = 'view_content']").SingleOrDefault(); contentrevision.Details = articlecontent.InnerText.Trim(); contentrevision.Details_Html = articlecontent.InnerHtml.Trim(); contentrevision.isDepricate = false; var imgnodes = articlecontent.SelectNodes(".//img"); contentrevision.SrcDatas = new List <SrcdataDTO>(); if (imgnodes == null) { return; } foreach (var img in imgnodes) { var srcurl = new Uri(img.GetAttributeValue("src", "default")); var srcdata = new SrcdataDTO { SourceUrl = srcurl.AbsoluteUri, IsDepricated = false, FileName = System.IO.Path.GetFileName(srcurl.LocalPath), SrcGuId = Guid.NewGuid(), }; img.SetAttributeValue("guid", srcdata.SrcGuId.ToString()); contentrevision.SrcDatas.Add(srcdata); } } catch (ArgumentNullException) { contentrevision.isDepricate = true; return; } catch (UriFormatException) { return; } catch (Exception e) { Console.WriteLine("ReStart"); ParseContent(contentrevision); } }
//content 내용, html 가져오는 함수 protected override void ParseContent(ContentRevisionDTO contentrevision) { //var content_count = 0; try { var loadedContent = webGetutf.Load(contentrevision.Content.Contents_URL); var articlecontent = loadedContent.DocumentNode.SelectNodes("//div[@id = 'body_frame']").FirstOrDefault(); foreach (var ct in articlecontent.Descendants()) { if (ct.Name == "style") { ct.InnerHtml = ""; } } contentrevision.Details = articlecontent.InnerText.Trim(); contentrevision.Details_Html = articlecontent.InnerHtml.Trim(); contentrevision.isDepricate = false; var imgnodes = articlecontent.SelectNodes("./img"); contentrevision.SrcDatas = new List <SrcdataDTO>(); if (imgnodes == null) { return; } foreach (var img in imgnodes) { var srcurl = new Uri(img.GetAttributeValue("src", "default")); var srcdata = new SrcdataDTO { SourceUrl = srcurl.AbsoluteUri, IsDepricated = false, FileName = System.IO.Path.GetFileName(srcurl.LocalPath), SrcGuId = Guid.NewGuid(), }; img.SetAttributeValue("guid", srcdata.SrcGuId.ToString()); contentrevision.SrcDatas.Add(srcdata); } } catch (ArgumentNullException) { contentrevision.isDepricate = true; return; } catch (UriFormatException) { return; } catch (Exception e) { Console.WriteLine("ReStart"); ParseContent(contentrevision); } }
protected override void ParseContent(ContentRevisionDTO contentrevision) { try { var loadedContent = webGetutf.Load(contentrevision.Content.Contents_URL); var articlecontent = loadedContent.DocumentNode.SelectNodes("//div[@id = 'pann-content']").FirstOrDefault(); contentrevision.Details = articlecontent.InnerText.Trim(); contentrevision.Details_Html = articlecontent.InnerHtml.Trim(); contentrevision.isDepricate = false; var imgnodes = articlecontent.SelectNodes(".//img"); contentrevision.SrcDatas = new List <SrcdataDTO>(); if (imgnodes == null) { return; } foreach (var img in imgnodes) { var srcurl = new Uri(img.GetAttributeValue("src", "default")); var srcdata = new SrcdataDTO { SourceUrl = srcurl.AbsoluteUri, IsDepricated = false, FileName = System.IO.Path.GetFileName(srcurl.LocalPath), SrcGuId = Guid.NewGuid(), }; img.SetAttributeValue("guid", srcdata.SrcGuId.ToString()); contentrevision.SrcDatas.Add(srcdata); } } catch (ArgumentNullException) { contentrevision.isDepricate = true; return; } catch (UriFormatException) { return; } catch (WebException wex) { if (((HttpWebResponse)wex.Response).StatusCode == HttpStatusCode.NotFound) { // error 404, do what you need to do } } catch (Exception e) { Console.WriteLine("ReStart"); ParseContent(contentrevision); } }
protected override void ParseContent(ContentRevisionDTO contentrevision) { try { var ruiwebContents = webGetkr.Load(contentrevision.Content.Contents_URL); var checkifdeprecated = ruiwebContents.DocumentNode.SelectNodes("//td[@class = 'te2']"); if (checkifdeprecated != null && checkifdeprecated.FirstOrDefault().InnerText.Trim() == "이미 삭제 된 게시글 입니다.") { contentrevision.isDepricate = true; return; } //var content_count = 0; List<string> details = new List<string>(); var content = ruiwebContents.DocumentNode.SelectNodes("//div[@id = 'DocContent']").SingleOrDefault(); contentrevision.Details = content.InnerText.Trim(); contentrevision.Details_Html = content.InnerHtml.Trim(); contentrevision.isDepricate = false; var imgnodes = content.SelectNodes(".//img"); contentrevision.SrcDatas = new List<SrcdataDTO>(); if (imgnodes == null) return; foreach (var img in imgnodes) { var srcurl = new Uri(img.GetAttributeValue("src", "default")); var srcdata = new SrcdataDTO { SourceUrl = srcurl.AbsoluteUri, IsDepricated = false, FileName = System.IO.Path.GetFileName(srcurl.LocalPath), SrcGuId = Guid.NewGuid(), }; img.SetAttributeValue("guid", srcdata.SrcGuId.ToString()); contentrevision.SrcDatas.Add(srcdata); } } catch (ArgumentNullException) { contentrevision.isDepricate = true; return; } catch (UriFormatException) { return; } catch (Exception e) { Console.WriteLine("ReStart"); ParseContent(contentrevision); } }
//Srcdata가 있을경우 이미지 파일을 파싱 해옴. protected void CacheImage(ContentRevisionDTO ContentRevision) { try { if (ContentRevision.SrcDatas != null) { Parallel.ForEach(ContentRevision.SrcDatas, srcdata => { var client = new WebClient(); //System.Console.WriteLine(content.Contents_URL); var url = new Uri(HttpUtility.HtmlDecode(srcdata.SourceUrl)); client.Headers.Add("Accept", @"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); client.Headers.Add("Referer", ContentRevision.Content.Contents_URL); client.Headers.Add("Accept-Encoding", @"gzip, deflate, sdch"); client.Headers.Add("Accept-Language", @"ko,en-US;q=0.8,en;q=0.6"); client.Headers.Add("User-Agent", webGetkr.UserAgent); client.UseDefaultCredentials = true; try { var data = client.DownloadData(url); srcdata.OriginalPayload = data; srcdata.OriginalPayload_Size = data.LongLength; } catch (ArgumentNullException) { try { var data = client.DownloadData(url); srcdata.OriginalPayload = data; srcdata.OriginalPayload_Size = data.LongLength; } catch (ArgumentNullException) { try { var data = client.DownloadData(url); srcdata.OriginalPayload = data; srcdata.OriginalPayload_Size = data.LongLength; } catch (ArgumentNullException) { srcdata.IsDepricated = true; return; } } } catch (Exception e) { var errorlog = new ErrorLogDTO { Error_Address = "SrcData", Error_URL = url.AbsoluteUri, Error_Details = e.Message.ToString(), Hresult = e.HResult }; //SendErrorLog(errorlog); srcdata.IsDepricated = true; return; } }); } } catch (WebException wex) { if (((HttpWebResponse)wex.Response).StatusCode == HttpStatusCode.NotFound) { // error 404, do what you need to do } } }
//ContentRevigion의 Details, Detail_Html등의 정보를 가져옴 protected abstract void ParseContent(ContentRevisionDTO contentrevision);