示例#1
0
        /**************************************************************************/

        private MacroscopeLink AddSitemapTextOutlink(
            string AbsoluteUrl,
            MacroscopeConstants.InOutLinkType LinkType,
            Boolean Follow
            )
        {
            MacroscopeLink OutLink = null;

            if (!MacroscopePreferencesManager.GetCheckExternalLinks())
            {
                MacroscopeAllowedHosts AllowedHosts = this.DocCollection.GetAllowedHosts();
                if (AllowedHosts != null)
                {
                    if (!AllowedHosts.IsAllowedFromUrl(Url: AbsoluteUrl))
                    {
                        return(OutLink);
                    }
                }
            }

            OutLink = new MacroscopeLink(
                SourceUrl: this.GetUrl(),
                TargetUrl: AbsoluteUrl,
                LinkType: LinkType,
                Follow: Follow
                );

            this.Outlinks.Add(OutLink);

            return(OutLink);
        }
        /** Pure Text Out Links ***************************************************/

        private void ProcessPureTextOutlinks(List <string> TextDoc, MacroscopeConstants.InOutLinkType LinkType)
        {
            foreach (string Text in TextDoc)
            {
                this.ProcessPureTextOutlinks(TextDoc: Text, LinkType: LinkType);
            }
        }
        /**************************************************************************/

        private MacroscopeLink AddSitemapXmlOutlink(
            string AbsoluteUrl,
            MacroscopeConstants.InOutLinkType LinkType,
            Boolean Follow
            )
        {
            MacroscopeLink OutLink = null;
            Boolean        Proceed = true;

            if (!MacroscopePreferencesManager.GetCheckExternalLinks())
            {
                MacroscopeAllowedHosts AllowedHosts = this.DocCollection.GetAllowedHosts();
                if (AllowedHosts != null)
                {
                    if (!AllowedHosts.IsAllowedFromUrl(Url: AbsoluteUrl))
                    {
                        Proceed = false;
                    }
                }
            }

            switch (LinkType)
            {
            case MacroscopeConstants.InOutLinkType.SITEMAPXML:
                if (!MacroscopePreferencesManager.GetFetchXml())
                {
                    Proceed = false;
                }
                break;
            }

            if (Proceed)
            {
                OutLink = new MacroscopeLink(
                    SourceUrl: this.GetUrl(),
                    TargetUrl: AbsoluteUrl,
                    LinkType: LinkType,
                    Follow: Follow
                    );

                this.Outlinks.Add(OutLink);
            }

            return(OutLink);
        }
示例#4
0
        /**************************************************************************/

        public MacroscopeLink(
            string SourceUrl,
            string TargetUrl,
            MacroscopeConstants.InOutLinkType LinkType,
            Boolean Follow
            )
        {
            this.LinkGuid = Guid.NewGuid();

            this.LinkType = LinkType;

            this.SourceUrl = SourceUrl;
            this.TargetUrl = TargetUrl;

            this.DoFollow = Follow;

            this.RawSourceUrl = SourceUrl;
            this.RawTargetUrl = TargetUrl;
        }
示例#5
0
        /**************************************************************************/

        public MacroscopeLink(
            string SourceUrl,
            string TargetUrl,
            MacroscopeConstants.InOutLinkType LinkType,
            bool Follow
            )
        {
            this.LinkGuid = Guid.NewGuid();

            this.LinkType = LinkType;

            //this.SourceUrl = SourceUrl;
            //this.TargetUrl = TargetUrl;
            this.SetSourceUrl(SourceUrl: SourceUrl);
            this.SetTargetUrl(TargetUrl: TargetUrl);

            this.DoFollow = Follow;

            this.RawSourceUrl = SourceUrl;
            this.RawTargetUrl = TargetUrl;
        }
        /** -------------------------------------------------------------------- **/

        public void ProcessPureTextOutlinks(string TextDoc, MacroscopeConstants.InOutLinkType LinkType)
        {
            // BUG: Trailing punctuation in the detected URL can cause problems:
            Regex UrlRegex = new Regex(
                @"(https?://[^/]+/[^\s]*)",
                RegexOptions.IgnoreCase
                );

            Match UrlMatch = UrlRegex.Match(TextDoc);

            while (UrlMatch.Success)
            {
                Group             CaptureGroups = UrlMatch.Groups[0];
                CaptureCollection Captures      = CaptureGroups.Captures;
                Capture           Captured      = null;
                string            UrlProcessing = null;
                string            UrlCleaned    = null;

                if (Captures.Count <= 0)
                {
                    continue;
                }

                Captured      = Captures[0];
                UrlProcessing = Captured.Value;
                UrlProcessing = UrlProcessing.Trim();
                UrlProcessing = UrlProcessing.Trim(',');
                UrlProcessing = UrlProcessing.Trim('.');
                UrlProcessing = UrlProcessing.Trim('(');
                UrlProcessing = UrlProcessing.Trim(')');
                UrlProcessing = UrlProcessing.Trim('"');
                UrlProcessing = UrlProcessing.Trim('\'');

                if (!string.IsNullOrEmpty(UrlProcessing))
                {
                    try
                    {
                        Uri PureTextUri = new Uri(UrlProcessing);
                        if (PureTextUri != null)
                        {
                            UrlCleaned = UrlProcessing;
                        }
                    }
                    catch (UriFormatException ex)
                    {
                        this.DebugMsg(string.Format("ProcessPureTextOutlinks: {0}", ex.Message));
                        UrlCleaned = null;
                    }
                    catch (Exception ex)
                    {
                        this.DebugMsg(string.Format("ProcessPureTextOutlinks: {0}", ex.Message));
                        UrlCleaned = null;
                    }

                    if (UrlCleaned != null)
                    {
                        MacroscopeLink Outlink;

                        Outlink = this.AddDocumentOutlink(
                            AbsoluteUrl: UrlCleaned,
                            LinkType: LinkType,
                            Follow: true
                            );

                        if (Outlink != null)
                        {
                            Outlink.SetRawTargetUrl(TargetUrl: UrlCleaned);
                        }
                    }
                }

                UrlMatch = UrlMatch.NextMatch();
            }
        }
示例#7
0
        /** Link Type *************************************************************/

        public void SetLinkType(MacroscopeConstants.InOutLinkType LinkType)
        {
            this.LinkType = LinkType;
        }
        /**************************************************************************/

        private void ProcessHttpLinkHeader(string HttpLinkHeader)
        {
            // https://webmasters.googleblog.com/2011/09/pagination-with-relnext-and-relprev.html

            // Link: <http://www.example.com/downloads/white-paper.pdf>; rel="canonical"

            string[] HttpLinkHeaderItems = Regex.Split(HttpLinkHeader, @",\s*");

            for (int i = 0; i < HttpLinkHeaderItems.Length; i++)
            {
                string          Url = null;
                string          Rel = null;
                MatchCollection matches;

                matches = Regex.Matches(HttpLinkHeader, "<([^<>]+)>\\s*;\\srel=\"([^\"]+)\"");

                foreach (Match match in matches)
                {
                    Url = match.Groups[1].Value;
                    Rel = match.Groups[2].Value;
                }

                if (
                    (!string.IsNullOrEmpty(Rel)) &&
                    (!string.IsNullOrEmpty(Url)))
                {
                    string LinkUrl    = null;
                    string LinkUrlAbs = null;
                    MacroscopeConstants.InOutLinkType LinkType = MacroscopeConstants.InOutLinkType.RELATED;

                    switch (Rel.ToLower())
                    {
                    case @"canonical":
                        this.SetCanonical(Url: Url);
                        break;

                    case @"shortlink":
                        this.SetLinkShortLink(Url: Url);
                        break;

                    case @"first":
                        this.SetLinkFirst(Url: Url);
                        break;

                    case @"prev":
                        this.SetLinkPrev(Url: Url);
                        break;

                    case @"next":
                        this.SetLinkNext(Url: Url);
                        break;

                    case @"last":
                        this.SetLinkLast(Url: Url);
                        break;

                    default:
                        this.DebugMsgForced(string.Format("Link Rel: {0} :: {1}", Rel, Url));
                        break;
                    }

                    LinkUrl = Uri.UnescapeDataString(stringToUnescape: Url);

                    if (!string.IsNullOrEmpty(LinkUrlAbs))
                    {
                        LinkUrlAbs = MacroscopeHttpUrlUtils.MakeUrlAbsolute(
                            BaseHref: this.GetBaseHref(),
                            BaseUrl: this.DocUrl,
                            Url: LinkUrl
                            );

                        if (!string.IsNullOrEmpty(LinkUrlAbs))
                        {
                            this.AddDocumentOutlink(
                                AbsoluteUrl: LinkUrlAbs,
                                LinkType: LinkType,
                                Follow: true
                                );
                        }
                    }
                }
            }

            return;
        }