Example #1
0
        public static DaumCafePageBody CreateFromComment(string content)
        {
            var result = new DaumCafePageBody();

            var doc = new HtmlDocument();

            doc.LoadHtml(content);

            result.Text     = doc.DocumentNode.Descendants("span").FirstOrDefault(x => x.HasClass("txt_detail"))?.ToPlainText().Trim();
            result.ImageUrl = doc.DocumentNode.Descendants("img").FirstOrDefault(x => x.HasClass("thumb_info"))?.GetAttributeValue("src", null)?.Trim().Replace("C120x120", "R640x0");

            // Discord stopped embedding the scaled down links (eg. https://img1.daumcdn.net/thumb/R640x0/?fname=http://cfile277.uf.daum.net/image/99D447415BA4896424BC9D)
            var i = result.ImageUrl?.LastIndexOf("fname=") ?? -1;

            if (i >= 0)
            {
                result.ImageUrl = result.ImageUrl.Substring(i + "fname=".Length);
            }

            // Protocol sometimes missing
            if (result.ImageUrl?.StartsWith("//") ?? false)
            {
                result.ImageUrl = "https:" + result.ImageUrl;
            }

            return(result);
        }
Example #2
0
        public async Task <DaumCafePage> GetPage(Uri mobileUrl, CancellationToken ct)
        {
            string content;
            var    response = await _client.GetAsync(mobileUrl, ct);

            if (response.StatusCode == (HttpStatusCode)308)
            {
                // Deal with the wonky 308 status code (permanent redirect) - HttpClient should redirect, but it doesn't (probably because 308 is not even in .NET docs)
                var location         = response.Headers.Location;
                var absoluteLocation = location.IsAbsoluteUri ? location : new Uri(new Uri(mobileUrl.GetComponents(UriComponents.Scheme | UriComponents.StrongAuthority, UriFormat.Unescaped)), location);
                content = await _client.GetStringAsync(absoluteLocation);
            }
            else
            {
                content = await response.Content.ReadAsStringAsync();
            }

            var properties = new List <Tuple <string, string> >();

            var matches = _metaPropertyRegex.Matches(content);

            foreach (Match match in matches)
            {
                properties.Add(Tuple.Create(match.Groups[1].Value, match.Groups[2].Value));
            }

            var url = properties.FirstOrDefault(x => x.Item1 == "og:url")?.Item2;

            if (!string.IsNullOrEmpty(url) && url.Contains("comments"))
            {
                // Comment type board
                return(new DaumCafePage()
                {
                    RelativeUrl = url,
                    Type = "comment",
                    Body = DaumCafePageBody.CreateFromComment(content)
                });
            }
            else
            {
                // Assume regular board
                return(new DaumCafePage()
                {
                    RelativeUrl = url,
                    Type = properties.FirstOrDefault(x => x.Item1 == "og:type")?.Item2,
                    Title = WebUtility.HtmlDecode(properties.FirstOrDefault(x => x.Item1 == "og:title")?.Item2 ?? ""),
                    ImageUrl = properties.FirstOrDefault(x => x.Item1 == "og:image")?.Item2,
                    Description = WebUtility.HtmlDecode(properties.FirstOrDefault(x => x.Item1 == "og:description")?.Item2 ?? ""),
                    Body = DaumCafePageBody.Create(content)
                });
            }
        }
Example #3
0
        public static DaumCafePageBody Create(string content)
        {
            var result = new DaumCafePageBody();

            var doc = new HtmlDocument();

            doc.LoadHtml(content);

            result.Subject = doc.DocumentNode.Descendants("h3").FirstOrDefault(x => x.GetAttributeValue("class", "") == "tit_subject")?.InnerText.Trim();

            var text = doc.DocumentNode.Descendants("div").FirstOrDefault(x => x.GetAttributeValue("id", "") == "article");

            if (text != null)
            {
                result.ImageUrl = text.Descendants("img").FirstOrDefault(x => x.Attributes.Contains("src"))?.GetAttributeValue("src", "").Trim();
                result.Text     = text.ToPlainText().Trim();
            }

            return(result);
        }