Exemplo n.º 1
0
        private static void SetTitleBody(TelegramPost post, string txt)
        {
            if (post == null)
            {
                return;
            }

            int brIndex = txt.IndexOf("<br", StringComparison.Ordinal);

            if (brIndex > -1)
            {
                post.PossibleTitle = txt.Substring(0, brIndex).RemoveHtmlTags();

                if (post.PostType == TelegramPostType.Text)
                {
                    int afterBrIndex = brIndex + 4; // <br> length = 4
                    post.Body = txt.Substring(afterBrIndex, Math.Max(txt.Length - afterBrIndex, 0));
                }
                else
                {
                    post.Body = txt;
                }
            }
            else
            {
                var possibleTitle = txt.RemoveHtmlTags();

                if (possibleTitle.Length > MAX_TITLE_LENGTH)
                {
                    post.PossibleTitle = possibleTitle.Substring(0, MAX_TITLE_LENGTH);
                    post.Body          = txt;
                }
                else
                {
                    post.PossibleTitle = possibleTitle;
                    post.Body          = post.PostType == TelegramPostType.Text ? txt : "";
                }
            }
        }
Exemplo n.º 2
0
        private void SetAttachmentUri(TelegramPost post, HtmlNode photoNode, HtmlNode videoNode, HtmlNode stickerNode)
        {
            string url = null;

            switch (post.PostType)
            {
            case TelegramPostType.Photo:
                url = ParsePhotoUrl(photoNode);
                break;

            case TelegramPostType.Video:
                url = videoNode?.Attributes["src"]?.Value;
                break;

            case TelegramPostType.Sticker:
                url = ParseStickerUrl(stickerNode);
                break;
            }

            if (!string.IsNullOrWhiteSpace(url) && Uri.TryCreate(url, UriKind.Absolute, out var uri))
            {
                post.AttachmentUri = uri;
            }
        }
Exemplo n.º 3
0
        protected TelegramPost Parse(HtmlDocument htmlDocument)
        {
            if (htmlDocument == null)
            {
                return(null);
            }

            var divText = htmlDocument.DocumentNode.Descendants("div")
                          .LastOrDefault(m => m.HasClass("tgme_widget_message_text"));

            var video = htmlDocument.DocumentNode.Descendants("video")?
                        .LastOrDefault();

            var aPhoto = htmlDocument.DocumentNode.Descendants("a")
                         .FirstOrDefault(m => m.HasClass("tgme_widget_message_photo_wrap"))
                         ?? htmlDocument.DocumentNode.Descendants("i")
                         .FirstOrDefault(m => m.HasClass("link_preview_image"));

            var iSticker = htmlDocument.DocumentNode.Descendants("i")
                           .FirstOrDefault(m => m.HasClass("tgme_widget_message_sticker"));

            var aDocument = htmlDocument.DocumentNode.Descendants("a")
                            .FirstOrDefault(m => m.HasClass("tgme_widget_message_document_wrap"));

            if (aPhoto == null && divText == null && video == null && iSticker == null && aDocument == null)
            {
                // log ("Post {0} has no photo an no message Text.", postId);
                return(null);
            }

            // log "Parsing post {0}. ", postId;
            var spans = htmlDocument.DocumentNode.Descendants("span")?.ToList();

            var athuorNode = spans?.LastOrDefault(m => m.HasClass("tgme_widget_message_from_author"));
            var dateNode   = spans?
                             .SingleOrDefault(s => s.HasClass("tgme_widget_message_meta"))?.Descendants("a")?
                             .SingleOrDefault(s => s.HasClass("tgme_widget_message_date"))?.Descendants("time")?
                             .FirstOrDefault();
            var viewNode = spans?.LastOrDefault(m => m.HasClass("tgme_widget_message_views"));

            var post = new TelegramPost
            {
                Id       = ParsePostId(htmlDocument),
                PostType = video != null ? TelegramPostType.Video :
                           aPhoto != null ? TelegramPostType.Photo :
                           iSticker != null ? TelegramPostType.Sticker :
                           aDocument != null ? TelegramPostType.File : TelegramPostType.Text,
                WebRaw     = htmlDocument.ParsedText,
                TextRaw    = divText?.InnerText,
                Author     = athuorNode?.InnerText ?? string.Empty,
                DateString = dateNode?.InnerText,
                ViewCount  = viewNode?.InnerText
            };

            SetAttachmentUri(post, aPhoto, video, iSticker);
            SetTitleBody(post, divText?.InnerHtml);

            if (DateTime.TryParse(dateNode?.Attributes["datetime"]?.Value, out var date))
            {
                post.Date = date;
            }

            return(post);
        }