public LinkAndInfo ResolveLink(LinkToResolve link) { if (link.ContentType.MediaType != "text/html" && link.ContentType.MediaType != "application/xhtml+xml") { return(null); } // HTML? parse it and get the title var respStr = EncodingGuesser.GuessEncodingAndDecode(link.ResponseBytes, link.ContentType); var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(respStr); var titleElement = htmlDoc.DocumentNode.SelectSingleNode(".//title"); if (titleElement != null) { return(link.ToResult(FetchErrorLevel.Success, FoldWhitespace(HtmlEntity.DeEntitize(titleElement.InnerText)).Trim())); } var h1Element = htmlDoc.DocumentNode.SelectSingleNode(".//h1"); if (h1Element != null) { return(link.ToResult(FetchErrorLevel.Success, FoldWhitespace(HtmlEntity.DeEntitize(h1Element.InnerText)).Trim())); } return(link.ToResult(FetchErrorLevel.Success, "(HTML without a title O_o)")); }
public LinkAndInfo ResolveLink(LinkToResolve link) { if (link.ContentType?.MediaType == null) { return(null); } string typeDescription; if (!DetectedMimeTypes.TryGetValue(link.ContentType.MediaType, out typeDescription)) { // you're not my type return(null); } string description = ResolveLinkText(link, typeDescription); return(link.ToResult(FetchErrorLevel.Success, description)); }
public LinkAndInfo ResolveLink(LinkToResolve link) { string absoluteUri = link.Link.AbsoluteUri; Match z0rMatch = Z0rUrlPattern.Match(absoluteUri); if (!z0rMatch.Success) { // can't handle this return(null); } // obtain the ID long z0rID; if (!long.TryParse(z0rMatch.Groups["id"].Value, NumberStyles.None, CultureInfo.InvariantCulture, out z0rID)) { // unparseable ID, probably too many digits return(null); } Z0rEntry entry; if (EntryCache.TryGetValue(z0rID, out entry)) { // fast-path return(link.ToResult(FetchErrorLevel.Success, FormatEntry(entry))); } Z0rRange range = RangeForID(z0rID); if (!MaxPage.HasValue) { MaxPage = ObtainMaxPageValue(); } if (!MaxPage.HasValue) { // bad return(link.ToResult( FetchErrorLevel.TransientError, string.Format(CultureInfo.InvariantCulture, "z0r #{0}; fetching index page list failed", z0rID) )); } if (range.Page > MaxPage) { // the index does not contain this page entry = new Z0rEntry(z0rID, null, null, null, null); return(link.ToResult(FetchErrorLevel.Success, FormatEntry(entry))); } LoadFromPage(range.Page); if (EntryCache.TryGetValue(z0rID, out entry)) { return(link.ToResult(FetchErrorLevel.Success, FormatEntry(entry))); } return(link.ToResult( FetchErrorLevel.TransientError, string.Format(CultureInfo.InvariantCulture, "z0r #{0}; fetching failed", z0rID) )); }