Esempio n. 1
0
        private string Describe(BrokenLinkType brokenLinkType)
        {
            switch (brokenLinkType)
            {
            case BrokenLinkType.External:
                return(Localization.BrokenLink_External);

            case BrokenLinkType.MediaLibrary:
                return(Localization.BrokenLink_MediaLibrary);

            case BrokenLinkType.Page:
                return(Localization.BrokenLink_Page);

            case BrokenLinkType.PageNotPublished:
                return(Localization.BrokenLink_PageNotPublished);

            case BrokenLinkType.Relative:
                return(Localization.BrokenLink_Relative);
            }

            throw new InvalidOperationException("not supported link type " + brokenLinkType);
        }
Esempio n. 2
0
        private XElement DescribeBrokenLink(XElement a, string link, BrokenLinkType brokenLinkType)
        {
            string previousnode = ToPreviewString(a.PreviousNode);
            string nextnode     = ToPreviewString(a.NextNode);

            if (previousnode.Length > 50)
            {
                previousnode = "..." + previousnode.Substring(previousnode.Length - 40);
            }
            if (nextnode.Length > 50)
            {
                nextnode = nextnode.Substring(nextnode.Length - 40) + "...";
            }

            string errorText = Describe(brokenLinkType);

            return(new XElement("invalidContent",
                                new XAttribute("previousNode", previousnode),
                                new XAttribute("originalText", a.Value),
                                new XAttribute("originalLink", link),
                                new XAttribute("nextNode", nextnode),
                                new XAttribute("errorType", errorText)));
        }
Esempio n. 3
0
        //private object GetHostNameSyncObject(string hostname)
        //{
        //    hostname = hostname.ToLowerInvariant();

        //    return _hostnameSync.GetOrAdd(hostname, h => new object());
        //}

        private bool SaveLinkCheckResult(string url, BrokenLinkType brokenLinkType)
        {
            _brokenLinks.TryAdd(url, brokenLinkType);

            return(brokenLinkType == BrokenLinkType.None);
        }
Esempio n. 4
0
        private RequestValidationInfo GetRequestValidationInfo(string url, string baseUrl, string serverUrl, BrokenLinkType brokenLinkType)
        {
            url = UrlHelper.ToAbsoluteUrl(url, baseUrl, serverUrl ?? _serverUrl);

            string hostname;

            try
            {
                hostname = new Uri(url).Host;
            }
            catch (UriFormatException)
            {
                return(null);
            }

            return(new RequestValidationInfo {
                Hostname = hostname, LinkType = brokenLinkType, Url = url
            });
        }
Esempio n. 5
0
        public bool BuildBrokenLinksReport(XElement infoDocumentRoot)
        {
            using (new DataScope(PublicationScope.Published))
            {
                bool noInvalidLinksFound = true;

                // Get all pages present in the console
                List <IPage> actionRequiredPages = DataFacade.GetData <IPage>().ToList();

                // Check security for each page (does the user have access - no need to bother the user with pages they do not have access to)
                UserToken userToken            = UserValidationFacade.GetUserToken();
                var       userPermissions      = PermissionTypeFacade.GetUserPermissionDefinitions(userToken.Username).ToList();
                var       userGroupPermissions =
                    PermissionTypeFacade.GetUserGroupPermissionDefinitions(userToken.Username).ToList();

                // Loop all pages and remove the ones the user has no access to
                actionRequiredPages = actionRequiredPages.Where(page =>
                                                                PermissionTypeFacade.GetCurrentPermissionTypes(userToken, page.GetDataEntityToken(), userPermissions,
                                                                                                               userGroupPermissions)
                                                                .Contains(PermissionType.Read)).ToList();

                var pageIdsWithAccessTo = new HashSet <Guid>(actionRequiredPages.Select(p => p.Id));


                var allSitemapElements = PageStructureInfo.GetSiteMap().DescendantsAndSelf();

                var relevantElements =
                    allSitemapElements.Where(f => pageIdsWithAccessTo.Contains(new Guid(f.Attribute("Id").Value)));
                var minimalTree =
                    relevantElements.AncestorsAndSelf().Where(f => f.Name.LocalName == "Page").Distinct().ToList();

                var reportElements = new Hashtable <Guid, XElement>();

                var linksToCheck = new List <LinkToCheck>();

                // Rendering all the C1 pages and collecting links
                foreach (XElement pageElement in minimalTree)
                {
                    Guid pageId = new Guid(pageElement.Attribute("Id").Value);

                    IPage page = PageManager.GetPageById(pageId);
                    Verify.IsNotNull(page, "Failed to get the page");

                    string pageTitle = pageElement.Attribute("MenuTitle") != null
                        ? pageElement.Attribute("MenuTitle").Value
                        : pageElement.Attribute("Title").Value;

                    var resultPageElement = new XElement(PageElementName,
                                                         new XAttribute("Id", pageId),
                                                         new XAttribute("Title", pageTitle));

                    reportElements[pageId] = resultPageElement;


                    string htmlDocument, errorCode;

                    string url           = pageElement.Attribute("URL").Value;
                    string pageServerUrl = null;

                    if (url.StartsWith("http://") || (url.StartsWith("https://")))
                    {
                        pageServerUrl = new UrlBuilder(url).ServerUrl;
                        if (pageServerUrl == string.Empty)
                        {
                            pageServerUrl = url;                                /* Bug in versions < C1 4.0 beta 2 */
                        }
                    }

                    pageServerUrl = pageServerUrl ?? _serverUrl;

                    PageRenderingResult result = RenderPage(url, out htmlDocument, out errorCode);
                    if (result == PageRenderingResult.Failed)
                    {
                        resultPageElement.Add(GetRenderingErrorNode(errorCode));
                        continue;
                    }

                    if (result == PageRenderingResult.Redirect || result == PageRenderingResult.NotFound)
                    {
                        continue;
                    }

                    XDocument document;
                    try
                    {
                        document = XDocument.Parse(htmlDocument);
                    }
                    catch (Exception)
                    {
                        resultPageElement.Add(GetRenderingErrorNode(Localization.BrokenLinkReport_NotValidXhml));
                        continue;
                    }

                    linksToCheck.AddRange(CollectLinksToCheck(document, resultPageElement, url, pageServerUrl));
                }

                linksToCheck = linksToCheck.OrderBy(o => Guid.NewGuid()).ToList(); // Shuffling links

                // Checking external and internall links in parrallel tasks - one per hostname
                var linksGroupedByHostname = linksToCheck.Where(l => l.RequestValidationInfo != null)
                                             .GroupBy(link => link.RequestValidationInfo.Hostname).ToList();


                ParallelFacade.ForEach(linksGroupedByHostname, linkGroup =>
                {
                    foreach (var linkToCheck in linkGroup)
                    {
                        linkToCheck.BrokenLinkType = ValidateByRequest(linkToCheck.RequestValidationInfo);
                        // linkToCheck.RequestValidationInfo = null;
                    }
                });


                // Having 100 tasks running in parallel would fill the app pool and make the site unresponsive
                foreach (var link in linksToCheck)
                {
                    if (!link.BrokenLinkType.HasValue)
                    {
                        Log.LogWarning(LogTitle, "Incorrectly processed link: " + link.Href);

                        link.BrokenLinkType = BrokenLinkType.Relative;
                    }

                    BrokenLinkType brokenLinkType = link.BrokenLinkType.Value;
                    if (brokenLinkType == BrokenLinkType.None)
                    {
                        continue;
                    }

                    var brokenLinkDescriptionElement = DescribeBrokenLink(link.LinkNode, link.Href, brokenLinkType);

                    link.ReportPageNode.Add(brokenLinkDescriptionElement);

                    noInvalidLinksFound = false;
                }

                BuildReportTreeRec(infoDocumentRoot, Guid.Empty, reportElements);

                return(noInvalidLinksFound);
            }
        }
Esempio n. 6
0
        /// <summary>
        /// Pasres the url, and checks if the urls can be validated without making an http request (for internal urls=.
        /// </summary>
        private UrlPreprocessResult PreprocessUrl(
            string url,
            string pageUrl,
            string serverUrl,
            out BrokenLinkType brokenLinkType,
            out RequestValidationInfo requestValidationInfo)
        {
            BrokenLinkType cachedResult;

            requestValidationInfo = null;

            if (_brokenLinks.TryGetValue(url, out cachedResult))
            {
                brokenLinkType = cachedResult;
                return(brokenLinkType == BrokenLinkType.None ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken);
            }

            // Trying to parse as a page url first
            PageUrlData pageUrlData = null;

            try
            {
                if (IsKnownHostname(url)) // Workaround "if" for early vesrions of 4.0 beta
                {
                    pageUrlData = PageUrls.ParseUrl(url);
                }
            }
            catch (UriFormatException)
            {
            }

            if (pageUrlData != null)
            {
                Guid linkedPageId = pageUrlData.PageId;

                IPage page;

                using (new DataScope(pageUrlData.PublicationScope, pageUrlData.LocalizationScope))
                {
                    page = PageManager.GetPageById(linkedPageId);
                }

                if (page == null)
                {
                    if (pageUrlData.PublicationScope == PublicationScope.Published)
                    {
                        using (new DataScope(PublicationScope.Unpublished, pageUrlData.LocalizationScope))
                        {
                            if (PageManager.GetPageById(linkedPageId) != null)
                            {
                                brokenLinkType = BrokenLinkType.PageNotPublished;
                                return(SaveLinkCheckResult(url, brokenLinkType) ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken);
                            }
                        }
                    }

                    brokenLinkType = BrokenLinkType.Page;
                    return(SaveLinkCheckResult(url, brokenLinkType) ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken);
                }

                // If no PathInfo - page link is already valid
                if (string.IsNullOrEmpty(pageUrlData.PathInfo))
                {
                    brokenLinkType = BrokenLinkType.None;
                    return(UrlPreprocessResult.Valid);
                }

                // If there's pathInfo -> making a request to check whether the link is actually broken
                requestValidationInfo = GetRequestValidationInfo(url, pageUrl, serverUrl, BrokenLinkType.Page);
                if (requestValidationInfo == null)
                {
                    brokenLinkType = BrokenLinkType.Page;
                    return(UrlPreprocessResult.Broken);
                }

                brokenLinkType = BrokenLinkType.None;
                return(UrlPreprocessResult.NeedToBeValidatedByRequest);
            }

            MediaUrlData mediaUrlData = MediaUrls.ParseUrl(url);

            if (mediaUrlData != null)
            {
                Guid   mediaId    = mediaUrlData.MediaId;
                string mediastore = mediaUrlData.MediaStore;

                bool mediaExist = DataFacade.GetData <IMediaFile>().Any(f => f.StoreId == mediastore && f.Id == mediaId);

                brokenLinkType = mediaExist ? BrokenLinkType.None : BrokenLinkType.MediaLibrary;
                return(SaveLinkCheckResult(url, brokenLinkType) ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken);
            }

            var linkType = UrlHelper.IsAbsoluteLink(url) ? BrokenLinkType.External : BrokenLinkType.Relative;

            requestValidationInfo = GetRequestValidationInfo(url, pageUrl, serverUrl, linkType);
            if (requestValidationInfo == null)
            {
                brokenLinkType = linkType;
                return(UrlPreprocessResult.Broken);
            }

            brokenLinkType = BrokenLinkType.None;
            return(UrlPreprocessResult.NeedToBeValidatedByRequest);
        }
        private RequestValidationInfo GetRequestValidationInfo(string url, string baseUrl, string serverUrl, BrokenLinkType brokenLinkType)
        {
            url = UrlHelper.ToAbsoluteUrl(url, baseUrl, serverUrl ?? _serverUrl);

            string hostname;
            try
            {
                hostname = new Uri(url).Host;
            }
            catch (UriFormatException)
            {
                return null;
            }

            return new RequestValidationInfo { Hostname = hostname, LinkType = brokenLinkType, Url = url};
        }
        //private object GetHostNameSyncObject(string hostname)
        //{
        //    hostname = hostname.ToLowerInvariant();

        //    return _hostnameSync.GetOrAdd(hostname, h => new object());
        //}

        private bool SaveLinkCheckResult(string url, BrokenLinkType brokenLinkType)
        {
            _brokenLinks.TryAdd(url, brokenLinkType);
            
            return brokenLinkType == BrokenLinkType.None;
        }
        private string Describe(BrokenLinkType brokenLinkType)
        {
            switch (brokenLinkType)
            {
                case BrokenLinkType.External:
                    return Localization.BrokenLink_External;
                case BrokenLinkType.MediaLibrary:
                    return Localization.BrokenLink_MediaLibrary;
                case BrokenLinkType.Page:
                    return Localization.BrokenLink_Page;
                case BrokenLinkType.PageNotPublished:
                    return Localization.BrokenLink_PageNotPublished;
                case BrokenLinkType.Relative:
                    return Localization.BrokenLink_Relative;
            }

            throw new InvalidOperationException("not supported link type " + brokenLinkType);
        }
Esempio n. 10
0
        /// <summary>
        /// Pasres the url, and checks if the urls can be validated without making an http request (for internal urls=.
        /// </summary>
        private UrlPreprocessResult PreprocessUrl(
            string url,
            string pageUrl,
            string serverUrl, 
            out BrokenLinkType brokenLinkType, 
            out RequestValidationInfo requestValidationInfo)
        {
            BrokenLinkType cachedResult;
            requestValidationInfo = null;

            if (_brokenLinks.TryGetValue(url, out cachedResult))
            {
                brokenLinkType = cachedResult;
                return brokenLinkType == BrokenLinkType.None ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken;
            }

            // Trying to parse as a page url first
            PageUrlData pageUrlData = null;
            try
            {
                if (IsKnownHostname(url)) // Workaround "if" for early vesrions of 4.0 beta
                {
                    pageUrlData = PageUrls.ParseUrl(url);
                }
            }
            catch (UriFormatException)
            {
            }

            if (pageUrlData != null)
            {
                Guid linkedPageId = pageUrlData.PageId;

                IPage page;

                using (new DataScope(pageUrlData.PublicationScope, pageUrlData.LocalizationScope))
                {
                    page = PageManager.GetPageById(linkedPageId);
                }

                if (page == null)
                {
                    if (pageUrlData.PublicationScope == PublicationScope.Published)
                    {
                        using (new DataScope(PublicationScope.Unpublished, pageUrlData.LocalizationScope))
                        {
                            if (PageManager.GetPageById(linkedPageId) != null)
                            {
                                brokenLinkType = BrokenLinkType.PageNotPublished;
                                return SaveLinkCheckResult(url, brokenLinkType) ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken;
                            }
                        }
                    }

                    brokenLinkType = BrokenLinkType.Page;
                    return SaveLinkCheckResult(url, brokenLinkType) ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken;
                }

                // If no PathInfo - page link is already valid
                if (string.IsNullOrEmpty(pageUrlData.PathInfo))
                {
                    brokenLinkType = BrokenLinkType.None;
                    return UrlPreprocessResult.Valid;
                }

                // If there's pathInfo -> making a request to check whether the link is actually broken
                requestValidationInfo = GetRequestValidationInfo(url, pageUrl, serverUrl, BrokenLinkType.Page);
                if (requestValidationInfo == null)
                {
                    brokenLinkType = BrokenLinkType.Page;
                    return UrlPreprocessResult.Broken;
                }

                brokenLinkType = BrokenLinkType.None;
                return UrlPreprocessResult.NeedToBeValidatedByRequest;
            }

            MediaUrlData mediaUrlData = MediaUrls.ParseUrl(url);

            if (mediaUrlData != null)
            {
                Guid mediaId = mediaUrlData.MediaId;
                string mediastore = mediaUrlData.MediaStore;

                bool mediaExist = DataFacade.GetData<IMediaFile>().Any(f => f.StoreId == mediastore && f.Id == mediaId);

                brokenLinkType = mediaExist ? BrokenLinkType.None : BrokenLinkType.MediaLibrary;
                return SaveLinkCheckResult(url, brokenLinkType) ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken;
            }

            var linkType = UrlHelper.IsAbsoluteLink(url) ? BrokenLinkType.External : BrokenLinkType.Relative;

            requestValidationInfo = GetRequestValidationInfo(url, pageUrl, serverUrl, linkType);
            if (requestValidationInfo == null)
            {
                brokenLinkType = linkType;
                return UrlPreprocessResult.Broken;
            }

            brokenLinkType = BrokenLinkType.None;
            return UrlPreprocessResult.NeedToBeValidatedByRequest;
        }
Esempio n. 11
0
        private XElement DescribeBrokenLink(XElement a, string link, BrokenLinkType brokenLinkType)
        {
            string previousnode = ToPreviewString(a.PreviousNode);
            string nextnode = ToPreviewString(a.NextNode);

            if (previousnode.Length > 50)
            {
                previousnode = "..." + previousnode.Substring(previousnode.Length - 40);
            }
            if (nextnode.Length > 50)
            {
                nextnode = nextnode.Substring(nextnode.Length - 40) + "...";
            }

            string errorText = Describe(brokenLinkType);

            return new XElement("invalidContent",
                new XAttribute("previousNode", previousnode),
                new XAttribute("originalText", a.Value),
                new XAttribute("originalLink", link),
                new XAttribute("nextNode", nextnode),
                new XAttribute("errorType", errorText));
        }
Esempio n. 12
0
 public BrokenLink(Uri BrokenUrl, BrokenLinkType TypeOfLink)
 {
     Url = BrokenUrl;
     LinkType = TypeOfLink;
 }
Esempio n. 13
0
        /// <summary>
        /// Validates a link by checking the cache to see if the response has already been checked and recording broken links
        /// </summary>
        /// <param name="PageUri">The Uri of the page the link is on</param>
        /// <param name="UriToValidate">The Uri of the link to check</param>
        /// <param name="LinkType">The type of the link being checked</param>
        private void ValidateLink(Uri PageUri, Uri UriToValidate, string originalUrl, BrokenLinkType LinkType)
        {
            //Validate if original url is absolute and its pointing to authoring or publishing site instead of relative link
            Uri originalUri = new Uri(originalUrl, UriKind.RelativeOrAbsolute);

            if (originalUri.IsAbsoluteUri)
            {
                string originalAuthority = originalUri.GetLeftPart(UriPartial.Authority).ToLower();
                string pageAuthority = PageUri.GetLeftPart(UriPartial.Authority).ToLower();

                if (originalAuthority.Equals(WWWUrl.ToLower()))
                {
                    //Absolute URL for www site
                }
                else if (originalAuthority.Equals(pageAuthority))
                {
                    //Absolute URL for publishing site
                }
                else
                {
                    //External Url
                    LinkType = BrokenLinkType.ExternalLink;
                }
            }

            if (!CheckedLinks.Contains(UriToValidate.ToString()))
            {
                if (UriToValidate.ToString().ToLower().Contains("fixupredirect.aspx"))
                {
                    CheckedLinks.Add(UriToValidate.ToString(), true);
                }
                else if (!LinkCheckerUtilities.LinkIsValid(UriToValidate))
                {
                    Utility.LogInformation("Found broken link: " + UriToValidate.ToString());
                    AddBrokenLink(PageUri, new BrokenLink(UriToValidate, LinkType));
                    CheckedLinks.Add(UriToValidate.ToString(), false);
                }
                else
                {
                    CheckedLinks.Add(UriToValidate.ToString(), LinkType != BrokenLinkType.ExternalLink);
                }
            }
        }