Esempio n. 1
0
        public LinkAndInfo ResolveLink(LinkToResolve link)
        {
            if (link.ContentType.MediaType != "text/html" && link.ContentType.MediaType != "application/xhtml+xml")
            {
                return(null);
            }

            // HTML? parse it and get the title
            var respStr = EncodingGuesser.GuessEncodingAndDecode(link.ResponseBytes, link.ContentType);

            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(respStr);
            var titleElement = htmlDoc.DocumentNode.SelectSingleNode(".//title");

            if (titleElement != null)
            {
                return(link.ToResult(FetchErrorLevel.Success, FoldWhitespace(HtmlEntity.DeEntitize(titleElement.InnerText)).Trim()));
            }
            var h1Element = htmlDoc.DocumentNode.SelectSingleNode(".//h1");

            if (h1Element != null)
            {
                return(link.ToResult(FetchErrorLevel.Success, FoldWhitespace(HtmlEntity.DeEntitize(h1Element.InnerText)).Trim()));
            }
            return(link.ToResult(FetchErrorLevel.Success, "(HTML without a title O_o)"));
        }
Esempio n. 2
0
        public LinkAndInfo ResolveLink(LinkToResolve link)
        {
            if (link.ContentType?.MediaType == null)
            {
                return(null);
            }

            string typeDescription;

            if (!DetectedMimeTypes.TryGetValue(link.ContentType.MediaType, out typeDescription))
            {
                // you're not my type
                return(null);
            }

            string description = ResolveLinkText(link, typeDescription);

            return(link.ToResult(FetchErrorLevel.Success, description));
        }
Esempio n. 3
0
        public LinkAndInfo ResolveLink(LinkToResolve link)
        {
            string absoluteUri = link.Link.AbsoluteUri;
            Match  z0rMatch    = Z0rUrlPattern.Match(absoluteUri);

            if (!z0rMatch.Success)
            {
                // can't handle this
                return(null);
            }

            // obtain the ID
            long z0rID;

            if (!long.TryParse(z0rMatch.Groups["id"].Value, NumberStyles.None, CultureInfo.InvariantCulture, out z0rID))
            {
                // unparseable ID, probably too many digits
                return(null);
            }

            Z0rEntry entry;

            if (EntryCache.TryGetValue(z0rID, out entry))
            {
                // fast-path
                return(link.ToResult(FetchErrorLevel.Success, FormatEntry(entry)));
            }

            Z0rRange range = RangeForID(z0rID);

            if (!MaxPage.HasValue)
            {
                MaxPage = ObtainMaxPageValue();
            }

            if (!MaxPage.HasValue)
            {
                // bad
                return(link.ToResult(
                           FetchErrorLevel.TransientError,
                           string.Format(CultureInfo.InvariantCulture, "z0r #{0}; fetching index page list failed", z0rID)
                           ));
            }

            if (range.Page > MaxPage)
            {
                // the index does not contain this page
                entry = new Z0rEntry(z0rID, null, null, null, null);
                return(link.ToResult(FetchErrorLevel.Success, FormatEntry(entry)));
            }

            LoadFromPage(range.Page);

            if (EntryCache.TryGetValue(z0rID, out entry))
            {
                return(link.ToResult(FetchErrorLevel.Success, FormatEntry(entry)));
            }

            return(link.ToResult(
                       FetchErrorLevel.TransientError,
                       string.Format(CultureInfo.InvariantCulture, "z0r #{0}; fetching failed", z0rID)
                       ));
        }