static TitlingResult HandleHtml( TitlingRequest request, HtmlPage page, Func <TitlingRequest, string, TitlingResult> handler) { const int maxTitleLength = 1024; ReportCharsets(request, page); string htmlTitle = WebTools.GetTitle(page.Content); if (string.IsNullOrWhiteSpace(htmlTitle)) { request.AddMessage("No <title> found, or title element was empty/whitespace."); return(request.CreateResult(false)); } if (htmlTitle.Length > maxTitleLength) { request.AddMessage("HTML title length was in excess of 1024 characters, assuming spam."); return(request.CreateResult(false)); } // If defined and not of ridiculous length make it available to TitleBuilder. request.IrcTitle.HtmlTitle = htmlTitle; return(handler(request, page.Content)); }
public TitlingResult WikipediaSummarize(TitlingRequest req, string htmlDoc) { WikipediaArticle article = WikipediaTools.Parse(htmlDoc); // The paragraph to be summarized. string p = null; // Check if the URL has an anchor to a specific section in the article. int anchorIndex = req.Url.IndexOf("#", StringComparison.OrdinalIgnoreCase); if (anchorIndex >= 0 && (anchorIndex + 1) < req.Url.Length) { var anchorId = req.Url.Substring(anchorIndex + 1); p = article.GetFirstParagraph(anchorId); } // If no anchor or if we couldn't extract a paragraph for the specific anchor, // get first paragraph of the article. if (p == null && article.SummaryParagraphs.Length > 0) { p = article.SummaryParagraphs[0]; } if (!string.IsNullOrWhiteSpace(p)) { string summary = Format.Shorten(p, MaxCharacters, ContinuationSymbol); req.IrcTitle.SetFormat("[ {0} ]", summary); } return(req.CreateResult(true)); }
// To conform to the signature we accept the HTML document. We don't need it. TitlingResult GenericHandler(TitlingRequest req, string htmlDoc) { // Because the similarity can only be 1 max, allow all titles to be printed if Threshold is set to 1 or // higher. The similarity would always be equal to or less than 1. if (Threshold >= 1) { return(req.CreateResult(true)); } // If Threshold is set to 0 that would still mean that titles that had 0 similarity with their URLs would // get printed. Set to a negative value to never print any title. if (Threshold < 0) { return(req.CreateResult(false)); } double urlTitleSimilarity = UrlTitle.Similarity(req.Url, req.IrcTitle.HtmlTitle); req.AddMessage( string.Format("URL-Title Similarity: {0} [Threshold: {1}]", urlTitleSimilarity, Threshold) ); if (urlTitleSimilarity <= Threshold) { return(req.CreateResult(true)); } return(req.CreateResult(false)); }
static void ReportCharsets(TitlingRequest req, HtmlPage page) { var encInfo = string.Format("(HTTP) \"{0}\" -> {1} ; (HTML) \"{2}\" -> {3}", page.HeadersCharset, page.EncHeaders, page.HtmlCharset, page.EncHtml); req.AddMessage(encInfo); }
public static TitlingResult YoutubeWithDuration(TitlingRequest req, string htmlDoc) { // If duration can be found, change the html info to include that. var ytTime = WebTools.GetYoutubeTime(htmlDoc); req.IrcTitle.SetHtmlTitle().AppendTime(ytTime); return(req.CreateResult(true)); }
public TitlingResult HandleRequest(TitlingRequest req) { if (Supports(req)) { return(ThreadTopicToIrc(req)); } return(null); }
public TitlingResult HandleRequest(TitlingRequest req) { if (req.Url.Contains("", StringComparison.OrdinalIgnoreCase)) { return(PostToIrc(req)); } return(null); }
public TitlingResult HandleRequest(TitlingRequest req) { if (req.Url.Contains("", StringComparison.OrdinalIgnoreCase)) { return(PostToIrc(req)); } return(null); }
public static bool Supports(TitlingRequest req) { Source?src = GetSource(req.Url); if (src == null) { return(false); } else { return(true); } }
public TitlingResult WebInfo(TitlingRequest request) { if (request == null) { throw new ArgumentNullException(nameof(request)); } // TitlingRequest ensures that what we get passed is an absolute URI with a scheme we support. Most // importantly this relieves the individual handlers of checking for those conditions. foreach (var handler in preHtmlHandlers) { var result = handler(request); if (result != null) { return(result); } } foreach (var instruction in urlInstructions) { if (instruction.Match(request.Uri)) { // Set options as per instructions. urlFetcher.MaxSizeHtml = instruction.FetchSize; urlFetcher.FollowMetaRefreshes = instruction.FollowMetaRefreshes; var result = urlFetcher.Load(request.Uri); request.Resource = result.Page; // HTML handling. if (result.IsHtml) { return(HandleHtml( request, result.Page, instruction.Handler ?? GenericHandler )); } // Media/Binary handling. if (ParseMedia && result.Bytes.Success) { return(BinaryHandler.BinaryToIrc(request, result.Bytes)); } return(request.CreateResult(false)); } } throw new InvalidOperationException( "Reached end of method, this should not happen. There should be a catch-all in urlInstructions."); }
public TitlingResult PostToIrc(TitlingRequest req) { BooruPost postInfo = GelboTools.GetPostInfo(req.Url); req.Resource = postInfo; if (postInfo.Success) { string warning = ConstructWarning(postInfo.Tags); if (!string.IsNullOrEmpty(warning)) { FormatMessage(req.IrcTitle, postInfo.Rated, warning, postInfo.PostNo); return(req.CreateResult(true)); } } return(req.CreateResult(false)); }
public static TitlingResult BinaryToIrc(TitlingRequest req, WebBytes wb) { req.Resource = wb; if (!wb.Success) { return(req.CreateResult(false)); } var media = MediaDispatch.Parse(wb.Data); string type; switch (media.Type) { case MediaType.Jpeg: type = "JPEG"; break; case MediaType.Png: type = "PNG"; break; case MediaType.Gif: type = "GIF"; break; case MediaType.Matroska: type = "Matroska"; break; case MediaType.Webm: type = "WebM"; break; default: type = wb.ContentType; req.AddMessage("Binary format not supported."); break; } FormatBinaryInfo(req.IrcTitle, type, media, wb.ContentLength); return(req.CreateResult(true)); }
public TitlingResult WebInfo(Uri uri) { if (uri == null) { throw new ArgumentNullException(nameof(uri)); } if (!uri.IsAbsoluteUri) { throw new ArgumentException("Uri must be absolute: " + uri, nameof(uri)); } if (TitlingRequest.IsSchemeSupported(uri)) { return(WebInfo(new TitlingRequest(uri))); } else { var ex = new NotSupportedException("Unsupported scheme: " + uri.Scheme); return(TitlingResult.Failure(uri.OriginalString, ex)); } }
public TitlingResult PostToIrc(TitlingRequest req) { DanboPost postInfo = DanboTools.GetPostInfo(req.Url); req.Resource = postInfo; if (postInfo.Success) { string warning = ConstructWarning(postInfo.GeneralTags, postInfo.MetaTags); // If image has no character, copyright or artist tags, return just the post ID, rating and // possible warning. if (postInfo.CopyrightTags.Length == 0 && postInfo.CharacterTags.Length == 0 && postInfo.ArtistTags.Length == 0) { FormatMessage(req.IrcTitle, postInfo.Rated, warning, postInfo.PostNo); return(req.CreateResult(true)); } DanboTools.CleanupCharacterTags(postInfo.CharacterTags, postInfo.CopyrightTags); // Convert to string and limit the number of tags as specified in `MaxTagCount`. // Also colourize the tags if set to true. var characters = TagArrayToString(postInfo.CharacterTags, CharacterCode); var copyrights = TagArrayToString(postInfo.CopyrightTags, CopyrightCode); var artists = TagArrayToString(postInfo.ArtistTags, ArtistCode); FormatMessage(req.IrcTitle, postInfo.Rated, warning); FormatDanboInfo(req.IrcTitle, characters, copyrights, artists); return(req.CreateResult(true)); } else { return(req.CreateResult(false)); } }
public TitlingResult ThreadTopicToIrc(TitlingRequest req) { ChanPost post; switch (GetSource(req.Url)) { case Source.FourChan: post = FourChan.GetPost(req.Url); break; case Source.ArchiveMoe: post = ArchiveMoe.GetPost(req.Url); break; default: // Fail loudly for now, this might change in the future. throw new NotSupportedException("Passed TitlingRequest not supported."); } req.Resource = post; if (post.Success) { string topic = ConstructTopic(post, req.Url); if (topic == null) { req.AddMessage("Post contained neither subject or comment."); return(req.CreateResult(false)); } req.IrcTitle.SetFormat("[ /{0}/ - {1} ] [ {2} ]", post.Board, post.BoardName, topic); return(req.CreateResult(true)); } else { return(req.CreateResult(false)); } }