public static DaumCafePageBody CreateFromComment(string content) { var result = new DaumCafePageBody(); var doc = new HtmlDocument(); doc.LoadHtml(content); result.Text = doc.DocumentNode.Descendants("span").FirstOrDefault(x => x.HasClass("txt_detail"))?.ToPlainText().Trim(); result.ImageUrl = doc.DocumentNode.Descendants("img").FirstOrDefault(x => x.HasClass("thumb_info"))?.GetAttributeValue("src", null)?.Trim().Replace("C120x120", "R640x0"); // Discord stopped embedding the scaled down links (eg. https://img1.daumcdn.net/thumb/R640x0/?fname=http://cfile277.uf.daum.net/image/99D447415BA4896424BC9D) var i = result.ImageUrl?.LastIndexOf("fname=") ?? -1; if (i >= 0) { result.ImageUrl = result.ImageUrl.Substring(i + "fname=".Length); } // Protocol sometimes missing if (result.ImageUrl?.StartsWith("//") ?? false) { result.ImageUrl = "https:" + result.ImageUrl; } return(result); }
public async Task <DaumCafePage> GetPage(Uri mobileUrl, CancellationToken ct) { string content; var response = await _client.GetAsync(mobileUrl, ct); if (response.StatusCode == (HttpStatusCode)308) { // Deal with the wonky 308 status code (permanent redirect) - HttpClient should redirect, but it doesn't (probably because 308 is not even in .NET docs) var location = response.Headers.Location; var absoluteLocation = location.IsAbsoluteUri ? location : new Uri(new Uri(mobileUrl.GetComponents(UriComponents.Scheme | UriComponents.StrongAuthority, UriFormat.Unescaped)), location); content = await _client.GetStringAsync(absoluteLocation); } else { content = await response.Content.ReadAsStringAsync(); } var properties = new List <Tuple <string, string> >(); var matches = _metaPropertyRegex.Matches(content); foreach (Match match in matches) { properties.Add(Tuple.Create(match.Groups[1].Value, match.Groups[2].Value)); } var url = properties.FirstOrDefault(x => x.Item1 == "og:url")?.Item2; if (!string.IsNullOrEmpty(url) && url.Contains("comments")) { // Comment type board return(new DaumCafePage() { RelativeUrl = url, Type = "comment", Body = DaumCafePageBody.CreateFromComment(content) }); } else { // Assume regular board return(new DaumCafePage() { RelativeUrl = url, Type = properties.FirstOrDefault(x => x.Item1 == "og:type")?.Item2, Title = WebUtility.HtmlDecode(properties.FirstOrDefault(x => x.Item1 == "og:title")?.Item2 ?? ""), ImageUrl = properties.FirstOrDefault(x => x.Item1 == "og:image")?.Item2, Description = WebUtility.HtmlDecode(properties.FirstOrDefault(x => x.Item1 == "og:description")?.Item2 ?? ""), Body = DaumCafePageBody.Create(content) }); } }
public static DaumCafePageBody Create(string content) { var result = new DaumCafePageBody(); var doc = new HtmlDocument(); doc.LoadHtml(content); result.Subject = doc.DocumentNode.Descendants("h3").FirstOrDefault(x => x.GetAttributeValue("class", "") == "tit_subject")?.InnerText.Trim(); var text = doc.DocumentNode.Descendants("div").FirstOrDefault(x => x.GetAttributeValue("id", "") == "article"); if (text != null) { result.ImageUrl = text.Descendants("img").FirstOrDefault(x => x.Attributes.Contains("src"))?.GetAttributeValue("src", "").Trim(); result.Text = text.ToPlainText().Trim(); } return(result); }