Exemple #1
0
        static TitlingResult HandleHtml(
            TitlingRequest request,
            HtmlPage page,
            Func <TitlingRequest, string, TitlingResult> handler)
        {
            const int maxTitleLength = 1024;

            ReportCharsets(request, page);

            string htmlTitle = WebTools.GetTitle(page.Content);

            if (string.IsNullOrWhiteSpace(htmlTitle))
            {
                request.AddMessage("No <title> found, or title element was empty/whitespace.");
                return(request.CreateResult(false));
            }
            if (htmlTitle.Length > maxTitleLength)
            {
                request.AddMessage("HTML title length was in excess of 1024 characters, assuming spam.");
                return(request.CreateResult(false));
            }
            // If defined and not of ridiculous length make it available to TitleBuilder.
            request.IrcTitle.HtmlTitle = htmlTitle;
            return(handler(request, page.Content));
        }
        public TitlingResult WikipediaSummarize(TitlingRequest req, string htmlDoc)
        {
            WikipediaArticle article = WikipediaTools.Parse(htmlDoc);

            // The paragraph to be summarized.
            string p = null;

            // Check if the URL has an anchor to a specific section in the article.
            int anchorIndex = req.Url.IndexOf("#", StringComparison.OrdinalIgnoreCase);

            if (anchorIndex >= 0 && (anchorIndex + 1) < req.Url.Length)
            {
                var anchorId = req.Url.Substring(anchorIndex + 1);
                p = article.GetFirstParagraph(anchorId);
            }
            // If no anchor or if we couldn't extract a paragraph for the specific anchor,
            // get first paragraph of the article.
            if (p == null && article.SummaryParagraphs.Length > 0)
            {
                p = article.SummaryParagraphs[0];
            }

            if (!string.IsNullOrWhiteSpace(p))
            {
                string summary = Format.Shorten(p, MaxCharacters, ContinuationSymbol);
                req.IrcTitle.SetFormat("[ {0} ]", summary);
            }

            return(req.CreateResult(true));
        }
Exemple #3
0
        // To conform to the signature we accept the HTML document. We don't need it.
        TitlingResult GenericHandler(TitlingRequest req, string htmlDoc)
        {
            // Because the similarity can only be 1 max, allow all titles to be printed if Threshold is set to 1 or
            // higher. The similarity would always be equal to or less than 1.
            if (Threshold >= 1)
            {
                return(req.CreateResult(true));
            }
            // If Threshold is set to 0 that would still mean that titles that had 0 similarity with their URLs would
            // get printed. Set to a negative value to never print any title.
            if (Threshold < 0)
            {
                return(req.CreateResult(false));
            }

            double urlTitleSimilarity = UrlTitle.Similarity(req.Url, req.IrcTitle.HtmlTitle);

            req.AddMessage(
                string.Format("URL-Title Similarity: {0} [Threshold: {1}]", urlTitleSimilarity, Threshold)
                );

            if (urlTitleSimilarity <= Threshold)
            {
                return(req.CreateResult(true));
            }

            return(req.CreateResult(false));
        }
Exemple #4
0
        static void ReportCharsets(TitlingRequest req, HtmlPage page)
        {
            var encInfo = string.Format("(HTTP) \"{0}\" -> {1} ; (HTML) \"{2}\" -> {3}",
                                        page.HeadersCharset, page.EncHeaders,
                                        page.HtmlCharset, page.EncHtml);

            req.AddMessage(encInfo);
        }
Exemple #5
0
        public static TitlingResult YoutubeWithDuration(TitlingRequest req, string htmlDoc)
        {
            // If duration can be found, change the html info to include that.
            var ytTime = WebTools.GetYoutubeTime(htmlDoc);

            req.IrcTitle.SetHtmlTitle().AppendTime(ytTime);

            return(req.CreateResult(true));
        }
Exemple #6
0
        public TitlingResult HandleRequest(TitlingRequest req)
        {
            if (Supports(req))
            {
                return(ThreadTopicToIrc(req));
            }

            return(null);
        }
Exemple #7
0
        public TitlingResult HandleRequest(TitlingRequest req)
        {
            if (req.Url.Contains("gelbooru.com/index.php?page=post&s=view&id=",
                                 StringComparison.OrdinalIgnoreCase))
            {
                return(PostToIrc(req));
            }

            return(null);
        }
Exemple #8
0
        public TitlingResult HandleRequest(TitlingRequest req)
        {
            if (req.Url.Contains("donmai.us/posts/",
                                 StringComparison.OrdinalIgnoreCase))
            {
                return(PostToIrc(req));
            }

            return(null);
        }
Exemple #9
0
        public static bool Supports(TitlingRequest req)
        {
            Source?src = GetSource(req.Url);

            if (src == null)
            {
                return(false);
            }
            else
            {
                return(true);
            }
        }
Exemple #10
0
        public TitlingResult WebInfo(TitlingRequest request)
        {
            if (request == null)
            {
                throw new ArgumentNullException(nameof(request));
            }
            // TitlingRequest ensures that what we get passed is an absolute URI with a scheme we support. Most
            // importantly this relieves the individual handlers of checking for those conditions.


            foreach (var handler in preHtmlHandlers)
            {
                var result = handler(request);
                if (result != null)
                {
                    return(result);
                }
            }

            foreach (var instruction in urlInstructions)
            {
                if (instruction.Match(request.Uri))
                {
                    // Set options as per instructions.
                    urlFetcher.MaxSizeHtml         = instruction.FetchSize;
                    urlFetcher.FollowMetaRefreshes = instruction.FollowMetaRefreshes;

                    var result = urlFetcher.Load(request.Uri);
                    request.Resource = result.Page;

                    // HTML handling.
                    if (result.IsHtml)
                    {
                        return(HandleHtml(
                                   request, result.Page,
                                   instruction.Handler ?? GenericHandler
                                   ));
                    }
                    // Media/Binary handling.
                    if (ParseMedia && result.Bytes.Success)
                    {
                        return(BinaryHandler.BinaryToIrc(request, result.Bytes));
                    }

                    return(request.CreateResult(false));
                }
            }

            throw new InvalidOperationException(
                      "Reached end of method, this should not happen. There should be a catch-all in urlInstructions.");
        }
Exemple #11
0
        public TitlingResult PostToIrc(TitlingRequest req)
        {
            BooruPost postInfo = GelboTools.GetPostInfo(req.Url);

            req.Resource = postInfo;

            if (postInfo.Success)
            {
                string warning = ConstructWarning(postInfo.Tags);
                if (!string.IsNullOrEmpty(warning))
                {
                    FormatMessage(req.IrcTitle, postInfo.Rated, warning, postInfo.PostNo);
                    return(req.CreateResult(true));
                }
            }
            return(req.CreateResult(false));
        }
Exemple #12
0
        public static TitlingResult BinaryToIrc(TitlingRequest req, WebBytes wb)
        {
            req.Resource = wb;
            if (!wb.Success)
            {
                return(req.CreateResult(false));
            }

            var media = MediaDispatch.Parse(wb.Data);

            string type;

            switch (media.Type)
            {
            case MediaType.Jpeg:
                type = "JPEG";
                break;

            case MediaType.Png:
                type = "PNG";
                break;

            case MediaType.Gif:
                type = "GIF";
                break;

            case MediaType.Matroska:
                type = "Matroska";
                break;

            case MediaType.Webm:
                type = "WebM";
                break;

            default:
                type = wb.ContentType;
                req.AddMessage("Binary format not supported.");
                break;
            }

            FormatBinaryInfo(req.IrcTitle, type, media, wb.ContentLength);
            return(req.CreateResult(true));
        }
Exemple #13
0
        public TitlingResult WebInfo(Uri uri)
        {
            if (uri == null)
            {
                throw new ArgumentNullException(nameof(uri));
            }
            if (!uri.IsAbsoluteUri)
            {
                throw new ArgumentException("Uri must be absolute: " + uri, nameof(uri));
            }

            if (TitlingRequest.IsSchemeSupported(uri))
            {
                return(WebInfo(new TitlingRequest(uri)));
            }
            else
            {
                var ex = new NotSupportedException("Unsupported scheme: " + uri.Scheme);
                return(TitlingResult.Failure(uri.OriginalString, ex));
            }
        }
Exemple #14
0
        public TitlingResult PostToIrc(TitlingRequest req)
        {
            DanboPost postInfo = DanboTools.GetPostInfo(req.Url);

            req.Resource = postInfo;

            if (postInfo.Success)
            {
                string warning = ConstructWarning(postInfo.GeneralTags, postInfo.MetaTags);

                // If image has no character, copyright or artist tags, return just the post ID, rating and
                // possible warning.
                if (postInfo.CopyrightTags.Length == 0 &&
                    postInfo.CharacterTags.Length == 0 &&
                    postInfo.ArtistTags.Length == 0)
                {
                    FormatMessage(req.IrcTitle, postInfo.Rated, warning, postInfo.PostNo);
                    return(req.CreateResult(true));
                }

                DanboTools.CleanupCharacterTags(postInfo.CharacterTags, postInfo.CopyrightTags);

                // Convert to string and limit the number of tags as specified in `MaxTagCount`.
                // Also colourize the tags if set to true.
                var characters = TagArrayToString(postInfo.CharacterTags, CharacterCode);
                var copyrights = TagArrayToString(postInfo.CopyrightTags, CopyrightCode);
                var artists    = TagArrayToString(postInfo.ArtistTags, ArtistCode);

                FormatMessage(req.IrcTitle, postInfo.Rated, warning);
                FormatDanboInfo(req.IrcTitle, characters, copyrights, artists);

                return(req.CreateResult(true));
            }
            else
            {
                return(req.CreateResult(false));
            }
        }
Exemple #15
0
        public TitlingResult ThreadTopicToIrc(TitlingRequest req)
        {
            ChanPost post;

            switch (GetSource(req.Url))
            {
            case Source.FourChan:
                post = FourChan.GetPost(req.Url);
                break;

            case Source.ArchiveMoe:
                post = ArchiveMoe.GetPost(req.Url);
                break;

            default:
                // Fail loudly for now, this might change in the future.
                throw new NotSupportedException("Passed TitlingRequest not supported.");
            }
            req.Resource = post;

            if (post.Success)
            {
                string topic = ConstructTopic(post, req.Url);
                if (topic == null)
                {
                    req.AddMessage("Post contained neither subject or comment.");
                    return(req.CreateResult(false));
                }

                req.IrcTitle.SetFormat("[ /{0}/ - {1} ] [ {2} ]", post.Board, post.BoardName, topic);
                return(req.CreateResult(true));
            }
            else
            {
                return(req.CreateResult(false));
            }
        }