Example #1
0
        public void TestMakeUrlAbsoluteUrls()
        {
            Dictionary <string, string> UrlTable = new Dictionary <string, string> ();

            UrlTable.Add(
                @"path/to/images/picture.gif",
                @"http://www.host.com/path/to/page/path/to/images/picture.gif"
                );

            UrlTable.Add(
                @"../path/to/images/picture.gif",
                @"http://www.host.com/path/to/path/to/images/picture.gif"
                );

            UrlTable.Add(
                @"../../path/to/images/picture.gif",
                @"http://www.host.com/path/path/to/images/picture.gif"
                );

            const string BaseUrl  = "http://www.host.com/path/to/page/";
            const string Filename = "index.html";
            string       Url      = string.Join("", BaseUrl, Filename);

            foreach (string RelativeUrl in UrlTable.Keys)
            {
                string sAbsoluteUrl = MacroscopeHttpUrlUtils.MakeUrlAbsolute(Url, RelativeUrl);
                Assert.AreEqual(UrlTable[RelativeUrl], sAbsoluteUrl, "DO NOT MATCH");
            }
        }
        /**************************************************************************/

        private string ProcessCssBackImageUrl(string BackgroundImageUrl)
        {
            string LinkUrlAbs     = null;
            string LinkUrlCleaned = MacroscopeHttpUrlUtils.CleanUrlCss(BackgroundImageUrl);

            if (LinkUrlCleaned != null)
            {
                try
                {
                    LinkUrlAbs = MacroscopeHttpUrlUtils.MakeUrlAbsolute(
                        BaseUrl: this.DocUrl,
                        Url: LinkUrlCleaned
                        );
                }
                catch (MacroscopeUriFormatException ex)
                {
                    DebugMsg(string.Format("ProcessCssBackImageUrl: {0}", ex.Message));
                }

                DebugMsg(string.Format("ProcessCssBackImageUrl: {0}", LinkUrlCleaned));
                DebugMsg(string.Format("ProcessCssBackImageUrl: this.DocUrl: {0}", this.DocUrl));
                DebugMsg(string.Format("ProcessCssBackImageUrl: LinkUrlAbs: {0}", LinkUrlAbs));
            }

            return(LinkUrlAbs);
        }
        /** Sitemaps **************************************************************/

        public async Task <List <string> > GetSitemapsAsList(string Url)
        {
            List <string> SitemapsList = new List <string>();

            if (MacroscopePreferencesManager.GetFollowRobotsProtocol())
            {
                Robots robot = await this.FetchRobot(Url : Url);

                try
                {
                    if ((robot != null) && (robot.Sitemaps != null))
                    {
                        foreach (Sitemap SitemapEntry in robot.Sitemaps)
                        {
                            string SitemapUrl    = SitemapEntry.Url.ToString();
                            string SitemapUrlAbs = MacroscopeHttpUrlUtils.MakeUrlAbsolute(BaseUrl: Url, Url: SitemapUrl);

                            SitemapsList.Add(SitemapUrlAbs);

                            this.DebugMsg(string.Format("ROBOTS SitemapUrl: {0}", SitemapUrl));
                        }
                    }
                }
                catch (Exception ex)
                {
                    this.DebugMsg(ex.Message);
                }
            }

            return(SitemapsList);
        }
Example #4
0
        public void TestMakeUrlAbsoluteUrlsWithBaseHref()
        {
            /*
             * List Items:
             *  Base HREF
             *  Base URL
             *  Page URL
             *  Absolute URL
             */

            List <List <string> > TestList = new List <List <string> > ();

            TestList.Add(new List <string> ());
            TestList[TestList.Count - 1].Add("http://www.host.com/BASEHREF/index.html");
            TestList[TestList.Count - 1].Add("http://www.host.com/path/to/page/");
            TestList[TestList.Count - 1].Add("http://www.host.com/path/to/page/to/pages/index.html");
            TestList[TestList.Count - 1].Add("http://www.host.com/path/to/page/to/pages/index.html");

            TestList.Add(new List <string> ());
            TestList[TestList.Count - 1].Add("http://www.host.com/BASEHREF/index.html");
            TestList[TestList.Count - 1].Add("http://www.host.com/path/to/page/");
            TestList[TestList.Count - 1].Add("path/to/pages/index.html");
            TestList[TestList.Count - 1].Add("http://www.host.com/BASEHREF/path/to/pages/index.html");

            TestList.Add(new List <string> ());
            TestList[TestList.Count - 1].Add("http://www.host.com/BASEHREF/index.html");
            TestList[TestList.Count - 1].Add("http://www.host.com/path/to/page/");
            TestList[TestList.Count - 1].Add("../path/to/pages/index.html");
            TestList[TestList.Count - 1].Add("http://www.host.com/path/to/pages/index.html");

            TestList.Add(new List <string> ());
            TestList[TestList.Count - 1].Add("http://www.host.com/BASEHREF/index.html");
            TestList[TestList.Count - 1].Add("http://www.host.com/path/to/page/");
            TestList[TestList.Count - 1].Add("../../path/to/pages/index.html");
            TestList[TestList.Count - 1].Add("http://www.host.com/path/to/pages/index.html");

            foreach (List <string> UrlSet in TestList)
            {
                string BaseHref    = UrlSet[0];
                string BaseUrl     = UrlSet[1];
                string PageUrl     = UrlSet[2];
                string AbsoluteUrl = UrlSet[3];

                string ResolvedUrl;

                ResolvedUrl = MacroscopeHttpUrlUtils.MakeUrlAbsolute(
                    BaseHref: BaseHref,
                    BaseUrl: BaseUrl,
                    Url: PageUrl
                    );

                Assert.AreEqual(AbsoluteUrl, ResolvedUrl, "DO NOT MATCH");
            }
        }
Example #5
0
        public void TestIsWithinChildDirectory()
        {
            const string  StartUrl   = "http://www.companyname.com/path/to/some/deep/folder/index.html";
            List <string> TargetUrls = new List <string>();

            TargetUrls.Add("http://www.companyname.com/path/to/some/deep/folder/sub-folder/sub-folder/index.html");
            TargetUrls.Add("http://www.companyname.com/path/to/some/deep/folder/sub-folder/image");
            foreach (string TargetUrl in TargetUrls)
            {
                Assert.IsTrue(MacroscopeHttpUrlUtils.IsWithinChildDirectory(StartUrl: StartUrl, Url: TargetUrl), string.Format("FAIL: {0}", TargetUrl));
            }
        }
        /**************************************************************************/

        public async Task <bool> IsPdfUrl(MacroscopeJobMaster JobMaster, string Url)
        {
            bool   Result    = false;
            Uri    TargetUri = new Uri(Url);
            string MimeType  = await MacroscopeHttpUrlUtils.GetMimeTypeOfUrl(JobMaster : JobMaster, TargetUri : TargetUri);

            if (!string.IsNullOrEmpty(MimeType))
            {
                if (Regex.IsMatch(MimeType, "^application/pdf$", RegexOptions.IgnoreCase))
                {
                    Result = true;
                }
            }

            return(Result);
        }
Example #7
0
        public void TestDowncaseUrl()
        {
            Dictionary <string, string> UrlList = new Dictionary <string, string>();

            UrlList.Add("https://nazuke.github.io/", "https://nazuke.github.io/");
            UrlList.Add("https://nazuke.github.io/ABC.html", "https://nazuke.github.io/abc.html");
            UrlList.Add("https://nazuke.github.io/ABC/ABC.html", "https://nazuke.github.io/abc/abc.html");
            UrlList.Add("https://nazuke.github.io/ABC/ABC/ABC.HTML", "https://nazuke.github.io/abc/abc/abc.html");
            UrlList.Add("https://nazuke.github.io/ABC/ABC/ABC/ABC/ABC.html?key=value", "https://nazuke.github.io/abc/abc/abc/abc/abc.html?key=value");
            UrlList.Add("https://nazuke.github.io/ABC/ABC/ABC/ABC/ABC.html?key=value&name=bongo", "https://nazuke.github.io/abc/abc/abc/abc/abc.html?key=value&name=bongo");
            UrlList.Add("https://nazuke.github.io/ABC/ABC/ABC/ABC/ABC.html?KEY=value&Name=Bongo", "https://nazuke.github.io/abc/abc/abc/abc/abc.html?KEY=value&Name=Bongo");

            foreach (KeyValuePair <string, string> UrlPair in UrlList)
            {
                string DowncasedUrl = MacroscopeHttpUrlUtils.DowncaseUrl(Url: UrlPair.Key);
                Assert.AreEqual(UrlPair.Value, DowncasedUrl);
            }
        }
Example #8
0
        public void TestParentFolderUrlsDepth()
        {
            Dictionary <string, int> UrlList = new Dictionary <string, int>();

            UrlList.Add("https://nazuke.github.io/", 0);
            UrlList.Add("https://nazuke.github.io/0.html", 0);
            UrlList.Add("https://nazuke.github.io/0/1.html", 1);
            UrlList.Add("https://nazuke.github.io/0/1/2.html", 2);
            UrlList.Add("https://nazuke.github.io/0/1/2/", 3);
            UrlList.Add("https://nazuke.github.io/0/1/2/3.html", 3);
            UrlList.Add("https://nazuke.github.io/0/1/2/3.html/", 4);
            UrlList.Add("https://nazuke.github.io/0/1/2/3/4.html?key=value", 4);

            foreach (KeyValuePair <string, int> UrlPair in UrlList)
            {
                List <string> ParentFolderUrls = MacroscopeHttpUrlUtils.GetParentFolderUrls(Url: UrlPair.Key);
                Assert.AreEqual(UrlPair.Value, ParentFolderUrls.Count);
            }
        }
Example #9
0
        /** Target URL ************************************************************/

        public void SetTargetUrl(string TargetUrl)
        {
            if (MacroscopePreferencesManager.GetDowncaseLinks())
            {
                string DowncasedUrl = MacroscopeHttpUrlUtils.DowncaseUrl(Url: TargetUrl);
                if (DowncasedUrl != null)
                {
                    this.TargetUrl = DowncasedUrl;
                }
                else
                {
                    this.TargetUrl = TargetUrl;
                }
            }
            else
            {
                this.TargetUrl = TargetUrl;
            }
        }
Example #10
0
        public void TestFindUrlDepth()
        {
            Dictionary <string, int> UrlList = new Dictionary <string, int>();

            UrlList.Add("https://nazuke.github.io/", 0);
            UrlList.Add("https://nazuke.github.io/0.html", 0);
            UrlList.Add("https://nazuke.github.io/0/1.html", 1);
            UrlList.Add("https://nazuke.github.io/0/1/2.html", 2);
            UrlList.Add("https://nazuke.github.io/0/1/2/", 2);
            UrlList.Add("https://nazuke.github.io/0/1/2/3.html", 3);
            UrlList.Add("https://nazuke.github.io/0/1/2/3.html/", 3);
            UrlList.Add("https://nazuke.github.io/0/1/2/3/4.html?key=value", 4);

            foreach (KeyValuePair <string, int> UrlPair in UrlList)
            {
                this.DebugMsg(string.Format("{0}: {1}", UrlPair.Value, UrlPair.Key));
                int Depth = MacroscopeHttpUrlUtils.FindUrlDepth(Url: UrlPair.Key);
                Assert.AreEqual(UrlPair.Value, Depth);
            }
        }
        /**************************************************************************/

        private void ProcessSitemapXmlOutlinks(XmlDocument XmlDoc)
        {
            XmlNodeList OutlinksList = XmlDoc.GetElementsByTagName("loc", MacroscopeConstants.SitemapXmlNamespace);

            DebugMsg(string.Format("ProcessSitemapXmlOutlinks nlOutlinks: {0}", OutlinksList.Count));

            if (OutlinksList != null)
            {
                foreach (XmlNode LinkNode in OutlinksList)
                {
                    string LinkUrl = null;

                    try
                    {
                        LinkUrl = LinkNode.InnerText;
                        DebugMsg(string.Format("ProcessSitemapXmlOutlinks sLinkUrl: {0}", LinkUrl));
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("ProcessSitemapXmlOutlinks: {0}", ex.Message));
                    }

                    if (LinkUrl != null)
                    {
                        MacroscopeLink Outlink;
                        string         LinkUrlAbs = MacroscopeHttpUrlUtils.MakeUrlAbsolute(BaseUrl: this.GetUrl(), Url: LinkUrl);

                        Outlink = this.AddDocumentOutlink(
                            AbsoluteUrl: LinkUrlAbs,
                            LinkType: MacroscopeConstants.InOutLinkType.SITEMAPXML,
                            Follow: true
                            );

                        if (Outlink != null)
                        {
                            Outlink.SetRawTargetUrl(LinkUrl);
                        }
                    }
                }
            }
        }
Example #12
0
        public void TestValidateUrls()
        {
            Dictionary <string, bool> UrlList = new Dictionary <string, bool> ();

            UrlList.Add(
                "http://www.host.com/",
                true
                );

            UrlList.Add(
                "http://www.host.com/index.html",
                true
                );

            UrlList.Add(
                "http://www.host.com/path/path/to/images/picture.gif",
                true
                );

            UrlList.Add(
                "http://www.host.com/??",
                true
                );

            UrlList.Add(
                "http://www.host.com/ ",
                true
                );

            UrlList.Add(
                "http://   www.host.com/",
                false
                );

            foreach (string Url in UrlList.Keys)
            {
                bool IsValid = MacroscopeHttpUrlUtils.ValidateUrl(Url);
                Assert.AreEqual(UrlList[Url], IsValid, string.Format("NOT VALID: {0}", Url));
            }
        }
Example #13
0
        public void TestStripHashFragment()
        {
            Dictionary <string, string> UrlList = new Dictionary <string, string> ();

            UrlList.Add("http://www.host.com/#aberdeen-angus", "http://www.host.com/");

            UrlList.Add("http://www.host.com/product/list/#boris", "http://www.host.com/product/list/");

            UrlList.Add("http://www.host.com/product/list/index.html#boris", "http://www.host.com/product/list/index.html");

            UrlList.Add("http://www.host.com/?key1=value1&key2=value2&key3=value3", "http://www.host.com/?key1=value1&key2=value2&key3=value3");

            UrlList.Add("http://www.host.com/?key1=value1&key2=value2&key3=value3#gonzo", "http://www.host.com/?key1=value1&key2=value2&key3=value3");

            UrlList.Add("http://www.host.com/index.html?key1=value1&key2=value2&key3=value3#gonzo", "http://www.host.com/index.html?key1=value1&key2=value2&key3=value3");

            foreach (string Url in UrlList.Keys)
            {
                string UrlResult = MacroscopeHttpUrlUtils.StripHashFragment(Url);
                Assert.AreEqual(UrlList[Url], UrlResult, string.Format("NOT VALID: {0}", Url));
            }
        }
        /** -------------------------------------------------------------------- **/

        private void GenerateTextSitemapPdfEntries(
            MacroscopeDocument msDoc,
            List <string> SitemapText,
            Dictionary <string, bool> Dedupe
            )
        {
            foreach (MacroscopeHyperlinkOut HyperlinkOut in msDoc.IterateHyperlinksOut())
            {
                string Url       = HyperlinkOut.GetTargetUrl();
                Uri    UrlParsed = new Uri(uriString: Url);

                if (Dedupe.ContainsKey(Url))
                {
                    continue;
                }
                else
                {
                    Dedupe.Add(Url, true);
                }

                if (!UrlParsed.AbsolutePath.ToLower().EndsWith(".pdf", StringComparison.InvariantCultureIgnoreCase))
                {
                    continue;
                }

                if (!this.DocCollection.GetAllowedHosts().IsAllowedFromUrl(Url: Url))
                {
                    continue;
                }

                if (!MacroscopeHttpUrlUtils.VerifySameHost(BaseUrl: msDoc.GetUrl(), Url: Url))
                {
                    continue;
                }

                SitemapText.Add(Url);
            }
        }
        /**************************************************************************/

        /*
         *
         * Reference: https://www.w3.org/TR/html5/document-metadata.html#the-base-element
         *
         */

        public static string MakeUrlAbsolute(
            string BaseHref,
            string BaseUrl,
            string Url
            )
        {
            string AbsoluteBaseHref;
            string UrlFixed;

            if (!string.IsNullOrEmpty(value: BaseHref))
            {
                AbsoluteBaseHref = MacroscopeHttpUrlUtils.MakeUrlAbsolute(
                    BaseUrl: BaseUrl,
                    Url: BaseHref
                    );

                DebugMsgStatic(string.Format("BASEHREF: {0}", BaseHref));
                DebugMsgStatic(string.Format("ABSOLUTEBASEHREF: {0}", AbsoluteBaseHref));

                UrlFixed = MacroscopeHttpUrlUtils.MakeUrlAbsolute(
                    BaseUrl: AbsoluteBaseHref,
                    Url: Url
                    );

                DebugMsgStatic(string.Format("URL: {0}", Url));
                DebugMsgStatic(string.Format("URLFIXED: {0}", UrlFixed));
            }
            else
            {
                UrlFixed = MacroscopeHttpUrlUtils.MakeUrlAbsolute(
                    BaseUrl: BaseUrl,
                    Url: Url
                    );
            }

            return(UrlFixed);
        }
Example #16
0
        public void TestCleanUrlCss()
        {
            Dictionary <string, string> PropertiesTable = new Dictionary <string, string> ();

            PropertiesTable.Add(
                "background-image:none;",
                null
                );

            PropertiesTable.Add(
                "background: #0b7bee url(none) no-repeat center center/cover;",
                null
                );

            PropertiesTable.Add(
                "background: #0b7bee url(images/video-bg.jpg) no-repeat center center/cover;",
                "images/video-bg.jpg"
                );

            PropertiesTable.Add(
                "background: #0b7bee url(\"images/video-bg.jpg\") no-repeat center center/cover;",
                "images/video-bg.jpg"
                );

            PropertiesTable.Add(
                "src: url(\"fonts/company/latin-e-bold-eot.eot\");",
                "fonts/company/latin-e-bold-eot.eot"
                );

            PropertiesTable.Add(
                "src: url(\"fonts/company/latin-e-bold-eot.eot?#iefix\") format(\"embedded-opentype\"),url(\"fonts/company/latin-e-bold-woff.woff\") format(\"woff\"),url(\"fonts/company/latin-e-bold-ttf.ttf\") format(\"truetype\");",
                "fonts/company/latin-e-bold-eot.eot?#iefix"
                );

            PropertiesTable.Add(
                "background: #ffffff url(images/services/features-background.png) no-repeat left bottom;",
                "images/services/features-background.png"
                );

            PropertiesTable.Add(
                "background: transparent url(\"images/home/mouse.png\") no-repeat 90% top;",
                "images/home/mouse.png"
                );

            PropertiesTable.Add(
                "background: #0b7bee url(images/services/features-background_hover.png) no-repeat left bottom;",
                "images/services/features-background_hover.png"
                );

            PropertiesTable.Add(
                "background-image: url(\"images/global/page-head-trans.png\");",
                "images/global/page-head-trans.png"
                );

            PropertiesTable.Add(
                "background-image: url(\"images/heroes/hero.jpg\");",
                "images/heroes/hero.jpg"
                );

            foreach (string PropertyKey in PropertiesTable.Keys)
            {
                string Cleaned = MacroscopeHttpUrlUtils.CleanUrlCss(PropertyKey);
                Assert.AreEqual(PropertiesTable[PropertyKey], Cleaned, string.Format("NOT VALID: {0}", Cleaned));
            }
        }
Example #17
0
        /**************************************************************************/

        private async Task <MacroscopeConstants.FetchStatus> Fetch(string Url, string RedirectedFromUrl = null)
        {
            MacroscopeDocument msDoc = null;

            MacroscopeConstants.FetchStatus FetchStatus = MacroscopeConstants.FetchStatus.VOID;
            bool BlockedByRobotsRule;

            if (MacroscopePreferencesManager.GetPageLimit() > -1)
            {
                int PagesFound = this.JobMaster.GetPagesFound();
                int PageLimit  = MacroscopePreferencesManager.GetPageLimit();
                if (PagesFound >= PageLimit)
                {
                    this.DebugMsg(string.Format("PAGE LIMIT REACHED: {0} :: {1}", PageLimit, PagesFound));
                    return(FetchStatus);
                }
            }

            if (this.DocCollection.ContainsDocument(Url: Url))
            {
                msDoc = this.DocCollection.GetDocumentByUrl(Url: Url);

                if (msDoc.GetAuthenticationRealm() != null)
                {
                    if (msDoc.GetAuthenticationType() == MacroscopeConstants.AuthenticationType.BASIC)
                    {
                        MacroscopeCredential Credential;

                        Credential = this.JobMaster.GetCredentialsHttp().GetCredential(
                            msDoc.GetHostAndPort(),
                            msDoc.GetAuthenticationRealm()
                            );

                        if (Credential != null)
                        {
                            msDoc = this.DocCollection.CreateDocument(
                                Credential: Credential,
                                Url: Url
                                );
                        }
                    }
                }
            }
            else
            {
                msDoc = this.DocCollection.CreateDocument(Url: Url);
            }

            if (!string.IsNullOrEmpty(RedirectedFromUrl))
            {
                msDoc.SetUrlRedirectFrom(Url: RedirectedFromUrl);
            }

            msDoc.SetFetchStatus(MacroscopeConstants.FetchStatus.OK);

            if (!MacroscopeDnsTools.CheckValidHostname(Url: Url))
            {
                this.DebugMsg(string.Format("Fetch :: CheckValidHostname: {0}", "NOT OK"));
                msDoc.SetStatusCode(HttpStatusCode.BadGateway);
                FetchStatus = MacroscopeConstants.FetchStatus.NETWORK_ERROR;
                msDoc.SetFetchStatus(FetchStatus);
            }

            if (await this.JobMaster.GetRobots().CheckRobotRule(Url: Url))
            {
                msDoc.SetAllowedByRobots(true);
            }
            else
            {
                msDoc.SetAllowedByRobots(false);
            }

            BlockedByRobotsRule = await this.JobMaster.GetRobots().ApplyRobotRule(Url: Url);

            if (!BlockedByRobotsRule)
            {
                this.DebugMsg(string.Format("Disallowed by robots.txt: {0}", Url));

                this.JobMaster.AddToBlockedByRobots(Url);

                FetchStatus = MacroscopeConstants.FetchStatus.ROBOTS_DISALLOWED;

                msDoc.SetFetchStatus(FetchStatus);

                JobHistory.VisitedHistoryItem(Url: msDoc.GetUrl());
            }
            else
            {
                this.JobMaster.RemoveFromBlockedByRobots(Url);
            }

            if (this.AllowedHosts.IsExternalUrl(Url: Url))
            {
                this.DebugMsg(string.Format("IsExternalUrl: {0}", Url));
                msDoc.SetIsExternal(State: true);
            }

            if (this.DocCollection.ContainsDocument(Url: Url))
            {
                if (!this.DocCollection.GetDocumentByUrl(Url: Url).GetIsDirty())
                {
                    FetchStatus = MacroscopeConstants.FetchStatus.ALREADY_SEEN;
                    return(FetchStatus);
                }
            }

            if (MacroscopePreferencesManager.GetDepth() >= 0)
            {
                int Depth = MacroscopeHttpUrlUtils.FindUrlDepth(Url: Url);
                if (Depth > MacroscopePreferencesManager.GetDepth())
                {
                    this.DebugMsg(string.Format("URL Too Deep: {0}", Depth));
                    FetchStatus = MacroscopeConstants.FetchStatus.SKIPPED;
                    return(FetchStatus);
                }
            }

            /** ------------------------------------------------------------------ **/

            if (!await msDoc.Execute())
            {
                this.DebugMsg(string.Format("EXECUTE FAILED: {0}", Url));
                FetchStatus = MacroscopeConstants.FetchStatus.ERROR;
            }

            /** ------------------------------------------------------------------ **/



            /** ------------------------------------------------------------------ **/

            {
                if (msDoc.GetStatusCode() == HttpStatusCode.Unauthorized)
                {
                    if (msDoc.GetAuthenticationType() == MacroscopeConstants.AuthenticationType.BASIC)
                    {
                        MacroscopeCredentialsHttp CredentialsHttp = this.JobMaster.GetCredentialsHttp();

                        CredentialsHttp.EnqueueCredentialRequest(
                            Domain: msDoc.GetHostAndPort(),
                            Realm: msDoc.GetAuthenticationRealm(),
                            Url: msDoc.GetUrl()
                            );

                        this.JobMaster.AddUrlQueueItem(Url: msDoc.GetUrl());
                    }
                }

                if (msDoc.GetIsRedirect())
                {
                    this.DebugMsg(string.Format("REDIRECTION DETECTED GetUrl: {0}", msDoc.GetUrl()));
                    this.DebugMsg(string.Format("REDIRECTION DETECTED From: {0}", msDoc.GetUrlRedirectFrom()));

                    if (MacroscopePreferencesManager.GetCheckRedirects())
                    {
                        string Hostname      = msDoc.GetHostAndPort();
                        string HostnameFrom  = MacroscopeAllowedHosts.ParseHostnameFromUrl(msDoc.GetUrlRedirectFrom());
                        string UrlRedirectTo = msDoc.GetUrlRedirectTo();
                        string HostnameTo    = MacroscopeAllowedHosts.ParseHostnameFromUrl(UrlRedirectTo);

                        this.DebugMsg(string.Format("REDIRECTION DETECTED UrlRedirectTo: {0}", UrlRedirectTo));
                        this.DebugMsg(string.Format("REDIRECTION DETECTED HostnameTo: {0}", HostnameTo));

                        if (MacroscopePreferencesManager.GetFollowRedirects())
                        {
                            if (MacroscopePreferencesManager.GetCheckExternalLinks())
                            {
                                this.AllowedHosts.AddFromUrl(Url: UrlRedirectTo);
                            }
                            else
                            {
                                if (this.AllowedHosts.IsInternalUrl(Url: UrlRedirectTo))
                                {
                                    this.AllowedHosts.AddFromUrl(Url: UrlRedirectTo);
                                }
                            }
                        }
                    }

                    this.JobMaster.AddUrlQueueItem(Url: msDoc.GetUrlRedirectTo());
                }
                else
                {
                    this.ProcessHrefLangLanguages(msDoc);         // Process Languages from HrefLang

                    this.JobMaster.ProcessOutlinks(msDoc: msDoc); // Process Outlinks from document
                }

                FetchStatus = MacroscopeConstants.FetchStatus.SUCCESS;
            }

            /** ------------------------------------------------------------------ **/

            if (DocCollection.ContainsDocument(msDoc: msDoc))
            {
                JobHistory.VisitedHistoryItem(Url: Url);
            }
            else
            {
                this.DebugMsg(string.Format("OOPS: {0}", Url));
            }

            /** ------------------------------------------------------------------ **/

            return(FetchStatus);
        }
Example #18
0
        /**************************************************************************/

        public async void Execute()
        {
            int MaxFetches = MacroscopePreferencesManager.GetMaxFetchesPerWorker();

            while (MaxFetches > 0)
            {
                if (this.JobMaster.GetThreadsStop())
                {
                    this.DebugMsg(string.Format("JobMaster.GetThreadsStop: {0}", this.JobMaster.GetThreadsStop()));
                    break;
                }
                else
                {
                    MacroscopeJobItem JobItem           = this.JobMaster.GetUrlQueueItem();
                    string            Url               = null;
                    string            RedirectedFromUrl = null;

                    if (JobItem != null)
                    {
                        Url = JobItem.GetItemUrl();
                        RedirectedFromUrl = JobItem.GetItemRedirectedFromUrl();
                    }

                    if (!string.IsNullOrEmpty(Url))
                    {
                        if (!this.CheckIncludeExcludeUrl(Url))
                        {
                            Url = null;
                        }
                    }

                    if (!string.IsNullOrEmpty(Url))
                    {
                        if (
                            !MacroscopePreferencesManager.GetCrawlParentDirectories() &&
                            !MacroscopePreferencesManager.GetCrawlChildDirectories() &&
                            Url != this.JobMaster.GetStartUrl())
                        {
                            Url = null;
                        }
                        else if (
                            !MacroscopePreferencesManager.GetCrawlParentDirectories() ||
                            !MacroscopePreferencesManager.GetCrawlChildDirectories())
                        {
                            this.DebugMsg(string.Format("Running Parent/Child Check: {0}", Url));

                            if (
                                MacroscopePreferencesManager.GetCrawlParentDirectories() &&
                                (!string.IsNullOrEmpty(Url)))
                            {
                                if (!MacroscopeHttpUrlUtils.IsWithinParentDirectory(StartUrl: this.JobMaster.GetParentStartingDirectory(), Url: Url))
                                {
                                    Url = null;
                                }
                            }

                            if (
                                MacroscopePreferencesManager.GetCrawlChildDirectories() &&
                                (!string.IsNullOrEmpty(Url)))
                            {
                                if (!MacroscopeHttpUrlUtils.IsWithinChildDirectory(StartUrl: this.JobMaster.GetChildStartingDirectory(), Url: Url))
                                {
                                    Url = null;
                                }
                            }
                        }
                        else
                        {
                            this.DebugMsg(string.Format("Skipping Parent/Child Check: {0}", Url));
                        }
                    }

                    if (!string.IsNullOrEmpty(Url))
                    {
                        if (MacroscopePreferencesManager.GetDepth() >= 0)
                        {
                            if (MacroscopeHttpUrlUtils.FindUrlDepth(Url: Url) > MacroscopePreferencesManager.GetDepth())
                            {
                                this.DebugMsg(string.Format("URL Too Deep: {0}", Url));
                                Url = null;
                            }
                        }
                    }

                    if (!string.IsNullOrEmpty(Url))
                    {
                        this.DebugMsg(string.Format("Execute: {0}", Url));

                        int Tries = MacroscopePreferencesManager.GetMaxRetries();

                        JobHistory.AddHistoryItem(Url: Url);

                        do
                        {
                            this.DebugMsg(string.Format("Trying Fetch: {0} :: {1}", Tries, Url));

                            MacroscopeConstants.FetchStatus FetchStatus = MacroscopeConstants.FetchStatus.VOID;

                            try
                            {
                                if (!string.IsNullOrEmpty(RedirectedFromUrl))
                                {
                                    FetchStatus = await this.Fetch(Url, RedirectedFromUrl);
                                }
                                else
                                {
                                    FetchStatus = await this.Fetch(Url);
                                }
                            }
                            catch (Exception ex)
                            {
                                this.DebugMsg(string.Format("FetchStatus: {0}", ex.Message));
                                this.DebugMsg(string.Format("Url: {0}", Url));
                                this.DebugMsg(string.Format("FetchStatus: {0}", FetchStatus));
                            }

                            switch (FetchStatus)
                            {
                            case MacroscopeConstants.FetchStatus.ERROR:
                                this.DebugMsg(string.Format("Fetch Failed: {0} :: {1}", Tries, Url));
                                Thread.Sleep(25);
                                break;

                            case MacroscopeConstants.FetchStatus.NETWORK_ERROR:
                                this.DebugMsg(string.Format("Fetch Failed: {0} :: {1}", Tries, Url));
                                Thread.Sleep(25);
                                break;

                            default:
                                this.JobMaster.NotifyWorkersFetched(Url: Url);
                                Tries = 0;
                                break;
                            }

                            Tries--;
                        } while(Tries > 0);

                        if (this.CrawlDelay > 0)
                        {
                            this.DebugMsg(string.Format("CRAWL DELAY: Sleeping for {0} seconds...", this.CrawlDelay));
                            Thread.Sleep(CrawlDelay * 1000);
                        }
                    }
                }

                MaxFetches--;

                //Thread.Yield();
            }

            this.JobMaster.NotifyWorkersDone();
        }
        /** -------------------------------------------------------------------- **/

        public static bool IsWithinChildDirectory(string StartUrl, string Url)
        {
            bool   IsWithin       = false;
            Uri    CurrentUri     = null;
            string CurrentUriPort = "";

            try
            {
                CurrentUri = new Uri(Url);

                if (CurrentUri.Port > 0)
                {
                    CurrentUriPort = string.Format(":{0}", CurrentUri.Port);
                }
            }
            catch (UriFormatException ex)
            {
                DebugMsgStatic(string.Format("UriFormatException: {0}", ex.Message));
            }
            catch (Exception ex)
            {
                DebugMsgStatic(string.Format("Exception: {0}", ex.Message));
            }

            if (CurrentUri != null)
            {
                if (
                    (CurrentUri.Scheme.ToLower() == "http") ||
                    (CurrentUri.Scheme.ToLower() == "https"))
                {
                    string StartingUrl = MacroscopeHttpUrlUtils.DetermineStartingDirectory(Url: StartUrl);
                    string Path        = CurrentUri.AbsolutePath;
                    string CurrentUriString;
                    int    ChildStartingDirectoryLength;
                    int    CurrentUriStringLength;

                    Path = Regex.Replace(Path, "/[^/]*$", "/", RegexOptions.IgnoreCase);

                    if (Path.Length == 0)
                    {
                        Path = "/";
                    }

                    CurrentUriString = string.Join(
                        "",
                        CurrentUri.Scheme,
                        "://",
                        CurrentUri.Host,
                        CurrentUriPort,
                        Path
                        );

                    ChildStartingDirectoryLength = StartingUrl.Length;
                    CurrentUriStringLength       = CurrentUriString.Length;

                    if (CurrentUriStringLength >= ChildStartingDirectoryLength)
                    {
                        if (CurrentUriString.StartsWith(StartingUrl, StringComparison.Ordinal))
                        {
                            IsWithin = true;
                        }
                    }
                }
            }

            return(IsWithin);
        }
Example #20
0
        /**************************************************************************/

        public MacroscopeDocumentList AnalyzeOrphanedDocumentsInCollection(MacroscopeDocumentCollection DocCollection)
        {
            MacroscopeDocumentList OrphanedDocumentList = new MacroscopeDocumentList();

            foreach (MacroscopeDocument msDocLeft in DocCollection.IterateDocuments())
            {
                bool   IsOrphan = true;
                string UrlLeft  = msDocLeft.GetUrl();

                if (!IsValidDocument(msDoc: msDocLeft))
                {
                    continue;
                }

                foreach (MacroscopeDocument msDocRight in DocCollection.IterateDocuments())
                {
                    if (MacroscopeHttpUrlUtils.CompareUrls(UrlLeft: UrlLeft, UrlRight: msDocRight.GetUrl()))
                    {
                        continue;
                    }

                    if (!this.IsValidDocument(msDoc: msDocRight))
                    {
                        continue;
                    }

                    foreach (MacroscopeHyperlinkOut HyperlinkOut in msDocRight.IterateHyperlinksOut())
                    {
                        string UrlRight    = HyperlinkOut.GetTargetUrl();
                        string UrlRightRaw = HyperlinkOut.GetRawTargetUrl();

                        if (MacroscopeHttpUrlUtils.CompareUrls(UrlLeft: UrlLeft, UrlRight: UrlRight))
                        {
                            IsOrphan = false;
                        }
                        else
                        if (MacroscopeHttpUrlUtils.CompareUrls(UrlLeft: UrlLeft, UrlRight: UrlRightRaw))
                        {
                            IsOrphan = false;
                        }

                        if (!IsOrphan)
                        {
                            break;
                        }
                    }

                    if (!IsOrphan)
                    {
                        break;
                    }
                }

                if (IsOrphan)
                {
                    OrphanedDocumentList.AddDocument(msDoc: msDocLeft);
                    msDocLeft.AddRemark("ORPHAN1", "This appears to be an orphaned page, not linked to from any other HTML page in this collection.");
                    msDocLeft.AddRemark("ORPHAN2", "This page appears to only be referenced from one or more sitemaps.");
                }
                else
                {
                    msDocLeft.RemoveRemark("ORPHAN1");
                    msDocLeft.RemoveRemark("ORPHAN2");
                }
            }

            return(OrphanedDocumentList);
        }
        /** -------------------------------------------------------------------- **/

        private async Task _ProcessPdfPage()
        {
            MacroscopeHttpTwoClient         Client         = this.DocCollection.GetJobMaster().GetHttpClient();
            MacroscopeHttpTwoClientResponse ClientResponse = null;
            string ResponseErrorCondition = null;

            try
            {
                ClientResponse = await Client.Get(
                    this.GetUri(),
                    this.ConfigurePdfPageRequestHeadersCallback,
                    this.PostProcessRequestHttpHeadersCallback
                    );
            }
            catch (MacroscopeDocumentException ex)
            {
                this.DebugMsg(string.Format("_ProcessPdfPage :: MacroscopeDocumentException: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.AddRemark("_ProcessPdfPage", ex.Message);
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("_ProcessPdfPage :: Exception: {0}", ex.Message));
                ResponseErrorCondition = ex.Message;
                this.AddRemark("_ProcessPdfPage", ex.Message);
            }

            if (ClientResponse != null)
            {
                MacroscopePdfTools PdfTools;

                this.ProcessResponseHttpHeaders(Response: ClientResponse);

                {                              // Probe Locale
                  //this.Locale = "en"; // Implement locale probing
                    this.Locale = "x-default"; // Implement locale probing
                    this.SetHreflang(HrefLangLocale: this.Locale, Url: this.DocUrl);
                }

                { // Canonical
                    this.Canonical = this.DocUrl;
                    this.DebugMsg(string.Format("CANONICAL: {0}", this.Canonical));
                }

                /** Get Response Body ---------------------------------------------- **/

                try
                {
                    byte[] RawData = ClientResponse.GetContentAsBytes();
                    this.SetContentLength(Length: RawData.Length);

                    PdfTools = new MacroscopePdfTools(PdfData: RawData);

                    if (PdfTools.GetHasError())
                    {
                        this.AddRemark("CORRUPT_PDF", Observation: PdfTools.GetErrorMessage());
                    }

                    this.SetWasDownloaded(true);
                }
                catch (Exception ex)
                {
                    this.DebugMsg(string.Format("Exception: {0}", ex.Message));
                    this.SetStatusCode(HttpStatusCode.BadRequest);
                    PdfTools = null;
                    this.SetContentLength(Length: 0);
                }

                /** Title ---------------------------------------------------------- **/

                if (PdfTools != null)
                {
                    string Text = PdfTools.GetTitle();

                    if (!string.IsNullOrEmpty(Text))
                    {
                        this.SetTitle(Text, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        this.DebugMsg(string.Format("TITLE: {0}", this.GetTitle()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }

                /** Author --------------------------------------------------------- **/

                if (PdfTools != null)
                {
                    string Text = PdfTools.GetAuthor();

                    if (!string.IsNullOrEmpty(Text))
                    {
                        this.SetAuthor(AuthorText: Text, ProcessingMode: MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        this.DebugMsg(string.Format("AUTHOR: {0}", this.GetAuthor()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("AUTHOR: {0}", "MISSING"));
                    }
                }

                /** Description ---------------------------------------------------- **/

                if (PdfTools != null)
                {
                    string Text = PdfTools.GetDescription();

                    if (!string.IsNullOrEmpty(Text))
                    {
                        this.SetDescription(Text, MacroscopeConstants.TextProcessingMode.NO_PROCESSING);
                        this.DebugMsg(string.Format("TITLE: {0}", this.GetDescription()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("TITLE: {0}", "MISSING"));
                    }
                }

                /** Metadata Keywords ---------------------------------------------- **/

                if (PdfTools != null)
                {
                    string Text = PdfTools.GetKeywords();

                    if (!string.IsNullOrEmpty(Text))
                    {
                        this.SetKeywords(KeywordsText: Text);
                        this.DebugMsg(string.Format("KEYWORDS: {0}", this.GetKeywords()));
                    }
                    else
                    {
                        this.DebugMsg(string.Format("KEYWORDS: {0}", "MISSING"));
                    }
                }

                /** Body Text ------------------------------------------------------ **/

                if (PdfTools != null)
                {
                    this.SetBodyText(Text: "");

                    if (PdfTools.GetHasError())
                    {
                        this.AddRemark("PDF_ERROR", Observation: PdfTools.GetErrorMessage());
                    }
                    else
                    {
                        string Text = PdfTools.GetTextAsString();
                        if (!string.IsNullOrEmpty(Text))
                        {
                            this.SetDocumentText(Text: Text);
                            this.SetBodyText(Text: Text);
                        }
                    }

                    this.DebugMsg(string.Format("BODY TEXT: {0}", this.GetBodyTextRaw()));
                }

                /** Data Extractors ------------------------------------------------ **/

                if (!string.IsNullOrEmpty(this.GetBodyTextRaw()))
                {
                    if (MacroscopePreferencesManager.GetDataExtractorsEnable())
                    {
                        if (MacroscopePreferencesManager.GetDataExtractorsApplyToPdf())
                        {
                            string Text = this.GetBodyTextRaw();
                            this.ProcessGenericDataExtractors(GenericText: Text);
                        }
                    }
                }

                /** Out Links Text ------------------------------------------------- **/

                if (this.GetDocumentTextRawLength() > 0)
                {
                    if (this.GetIsInternal())
                    {
                        string Text = this.GetDocumentTextRaw();
                        this.ProcessPureTextOutlinks(TextDoc: Text, LinkType: MacroscopeConstants.InOutLinkType.PDF);
                    }
                }

                /** Out Links in Annotations --------------------------------------- **/

                if (this.GetIsInternal() && (this.GetDocumentTextRawLength() > 0))
                {
                    List <KeyValuePair <string, string> > AnnotationOutLinks = PdfTools.GetOutLinks();

                    // TODO: Implement extraction of text that underlies the link annotation

                    foreach (KeyValuePair <string, string> AnnotationOutLinkPair in AnnotationOutLinks)
                    {
                        MacroscopeHyperlinkOut HyperlinkOut = null;
                        string AnnotationOutLinkUrlAbs;

                        AnnotationOutLinkUrlAbs = MacroscopeHttpUrlUtils.MakeUrlAbsolute(
                            BaseHref: this.BaseHref,
                            BaseUrl: this.DocUrl,
                            Url: AnnotationOutLinkPair.Key
                            );

                        HyperlinkOut = this.HyperlinksOut.Add(LinkType: MacroscopeConstants.HyperlinkType.PDF, UrlTarget: AnnotationOutLinkUrlAbs);
                        HyperlinkOut.SetRawTargetUrl(TargetUrl: AnnotationOutLinkUrlAbs);
                        HyperlinkOut.SetAltText(AnnotationOutLinkPair.Value);
                        HyperlinkOut.SetAnchorText(AnnotationOutLinkPair.Value);
                        HyperlinkOut.SetTitle(AnnotationOutLinkPair.Value);
                        HyperlinkOut.SetDoFollow();
                        HyperlinkOut.SetMethod(Method: "GET");

                        this.AddDocumentOutlink(AbsoluteUrl: AnnotationOutLinkUrlAbs, LinkType: MacroscopeConstants.InOutLinkType.PDF, Follow: true);
                    }
                }

                /** ---------------------------------------------------------------- **/
            }

            if (ResponseErrorCondition != null)
            {
                this.ProcessErrorCondition(ResponseErrorCondition);
            }
        }
        /** -------------------------------------------------------------------- **/

        private void GenerateXmlSitemapPdfEntries(
            MacroscopeDocument msDoc,
            XmlDocument SitemapXml,
            XmlElement UrlSetNode,
            Dictionary <string, bool> Dedupe
            )
        {
            foreach (MacroscopeHyperlinkOut HyperlinkOut in msDoc.IterateHyperlinksOut())
            {
                string Url       = HyperlinkOut.GetTargetUrl();
                Uri    UrlParsed = new Uri(uriString: Url);

                if (Dedupe.ContainsKey(Url))
                {
                    continue;
                }
                else
                {
                    Dedupe.Add(Url, true);
                }

                if (!UrlParsed.AbsolutePath.ToLower().EndsWith(".pdf", StringComparison.InvariantCultureIgnoreCase))
                {
                    continue;
                }

                if (!this.DocCollection.GetAllowedHosts().IsAllowedFromUrl(Url: Url))
                {
                    continue;
                }

                if (!MacroscopeHttpUrlUtils.VerifySameHost(BaseUrl: msDoc.GetUrl(), Url: Url))
                {
                    continue;
                }

                XmlElement UrlNode = SitemapXml.CreateElement(string.Empty, "url", MacroscopeSitemapGenerator.XmlNamespace);
                UrlSetNode.AppendChild(UrlNode);

                {
                    XmlElement EntryNode = SitemapXml.CreateElement(string.Empty, "loc", MacroscopeSitemapGenerator.XmlNamespace);
                    XmlText    TextNode  = SitemapXml.CreateTextNode(Url);
                    UrlNode.AppendChild(EntryNode);
                    EntryNode.AppendChild(TextNode);
                }

                {
                    XmlElement EntryNode = SitemapXml.CreateElement(string.Empty, "changefreq", MacroscopeSitemapGenerator.XmlNamespace);
                    XmlText    TextNode  = SitemapXml.CreateTextNode("daily");
                    UrlNode.AppendChild(EntryNode);
                    EntryNode.AppendChild(TextNode);
                }

                {
                    XmlElement EntryNode = SitemapXml.CreateElement(string.Empty, "priority", MacroscopeSitemapGenerator.XmlNamespace);
                    XmlText    TextNode  = SitemapXml.CreateTextNode("1.0");
                    UrlNode.AppendChild(EntryNode);
                    EntryNode.AppendChild(TextNode);
                }
            }
        }
        /**************************************************************************/

        public async Task <List <MacroscopeRedirectChainDocStruct> > AnalyzeRedirectChains(
            HttpStatusCode StatusCode,
            string StartUrl,
            string RedirectUrl
            )
        {
            List <MacroscopeRedirectChainDocStruct> RedirectChain = new List <MacroscopeRedirectChainDocStruct>();
            int MaxHops = MacroscopePreferencesManager.GetRedirectChainsMaxHops();
            MacroscopeRedirectChainDocStruct StructStart;
            int    IHOP    = 0;
            string PrevUrl = null;
            string NextUrl = null;

            try
            {
                try
                {
                    StructStart = new MacroscopeRedirectChainDocStruct(
                        NewStatusCode: StatusCode,
                        NewUrl: StartUrl,
                        NewRedirectUrl: RedirectUrl
                        );

                    RedirectChain.Add(StructStart);

                    PrevUrl = StructStart.Url;
                    NextUrl = StructStart.RedirectUrl;
                }
                catch (Exception ex)
                {
                    this.DebugMsg(ex.Message);
                }

                do
                {
                    MacroscopeRedirectChainDocStruct StructNext;

                    try
                    {
                        if (!string.IsNullOrEmpty(PrevUrl))
                        {
                            NextUrl = MacroscopeHttpUrlUtils.MakeUrlAbsolute(PrevUrl, NextUrl);
                        }

                        StructNext = await this.Probe(Url : NextUrl);

                        RedirectChain.Add(StructNext);

                        PrevUrl = StructNext.Url;
                        NextUrl = StructNext.RedirectUrl;


                        switch (StructNext.StatusCode)
                        {
                        case HttpStatusCode.Found:
                            break;

                        case HttpStatusCode.Moved:
                            break;

                        case HttpStatusCode.SeeOther:
                            break;

                        case HttpStatusCode.TemporaryRedirect:
                            break;

                        default:
                            IHOP = MaxHops;
                            break;
                        }
                    }
                    catch (Exception ex)
                    {
                        this.DebugMsg(ex.Message);
                    }

                    IHOP++;
                }while(IHOP < MaxHops);
            }
            catch (Exception ex)
            {
                this.DebugMsg(ex.Message);
            }

            return(RedirectChain);
        }
        /**************************************************************************/

        private void ProcessHttpLinkHeader(string HttpLinkHeader)
        {
            // https://webmasters.googleblog.com/2011/09/pagination-with-relnext-and-relprev.html

            // Link: <http://www.example.com/downloads/white-paper.pdf>; rel="canonical"

            string[] HttpLinkHeaderItems = Regex.Split(HttpLinkHeader, @",\s*");

            for (int i = 0; i < HttpLinkHeaderItems.Length; i++)
            {
                string          Url = null;
                string          Rel = null;
                MatchCollection matches;

                matches = Regex.Matches(HttpLinkHeader, "<([^<>]+)>\\s*;\\srel=\"([^\"]+)\"");

                foreach (Match match in matches)
                {
                    Url = match.Groups[1].Value;
                    Rel = match.Groups[2].Value;
                }

                if (
                    (!string.IsNullOrEmpty(Rel)) &&
                    (!string.IsNullOrEmpty(Url)))
                {
                    string LinkUrl    = null;
                    string LinkUrlAbs = null;
                    MacroscopeConstants.InOutLinkType LinkType = MacroscopeConstants.InOutLinkType.RELATED;

                    switch (Rel.ToLower())
                    {
                    case @"canonical":
                        this.SetCanonical(Url: Url);
                        break;

                    case @"shortlink":
                        this.SetLinkShortLink(Url: Url);
                        break;

                    case @"first":
                        this.SetLinkFirst(Url: Url);
                        break;

                    case @"prev":
                        this.SetLinkPrev(Url: Url);
                        break;

                    case @"next":
                        this.SetLinkNext(Url: Url);
                        break;

                    case @"last":
                        this.SetLinkLast(Url: Url);
                        break;

                    default:
                        this.DebugMsgForced(string.Format("Link Rel: {0} :: {1}", Rel, Url));
                        break;
                    }

                    LinkUrl = Uri.UnescapeDataString(stringToUnescape: Url);

                    if (!string.IsNullOrEmpty(LinkUrlAbs))
                    {
                        LinkUrlAbs = MacroscopeHttpUrlUtils.MakeUrlAbsolute(
                            BaseHref: this.GetBaseHref(),
                            BaseUrl: this.DocUrl,
                            Url: LinkUrl
                            );

                        if (!string.IsNullOrEmpty(LinkUrlAbs))
                        {
                            this.AddDocumentOutlink(
                                AbsoluteUrl: LinkUrlAbs,
                                LinkType: LinkType,
                                Follow: true
                                );
                        }
                    }
                }
            }

            return;
        }