private string Describe(BrokenLinkType brokenLinkType) { switch (brokenLinkType) { case BrokenLinkType.External: return(Localization.BrokenLink_External); case BrokenLinkType.MediaLibrary: return(Localization.BrokenLink_MediaLibrary); case BrokenLinkType.Page: return(Localization.BrokenLink_Page); case BrokenLinkType.PageNotPublished: return(Localization.BrokenLink_PageNotPublished); case BrokenLinkType.Relative: return(Localization.BrokenLink_Relative); } throw new InvalidOperationException("not supported link type " + brokenLinkType); }
private XElement DescribeBrokenLink(XElement a, string link, BrokenLinkType brokenLinkType) { string previousnode = ToPreviewString(a.PreviousNode); string nextnode = ToPreviewString(a.NextNode); if (previousnode.Length > 50) { previousnode = "..." + previousnode.Substring(previousnode.Length - 40); } if (nextnode.Length > 50) { nextnode = nextnode.Substring(nextnode.Length - 40) + "..."; } string errorText = Describe(brokenLinkType); return(new XElement("invalidContent", new XAttribute("previousNode", previousnode), new XAttribute("originalText", a.Value), new XAttribute("originalLink", link), new XAttribute("nextNode", nextnode), new XAttribute("errorType", errorText))); }
//private object GetHostNameSyncObject(string hostname) //{ // hostname = hostname.ToLowerInvariant(); // return _hostnameSync.GetOrAdd(hostname, h => new object()); //} private bool SaveLinkCheckResult(string url, BrokenLinkType brokenLinkType) { _brokenLinks.TryAdd(url, brokenLinkType); return(brokenLinkType == BrokenLinkType.None); }
private RequestValidationInfo GetRequestValidationInfo(string url, string baseUrl, string serverUrl, BrokenLinkType brokenLinkType) { url = UrlHelper.ToAbsoluteUrl(url, baseUrl, serverUrl ?? _serverUrl); string hostname; try { hostname = new Uri(url).Host; } catch (UriFormatException) { return(null); } return(new RequestValidationInfo { Hostname = hostname, LinkType = brokenLinkType, Url = url }); }
public bool BuildBrokenLinksReport(XElement infoDocumentRoot) { using (new DataScope(PublicationScope.Published)) { bool noInvalidLinksFound = true; // Get all pages present in the console List <IPage> actionRequiredPages = DataFacade.GetData <IPage>().ToList(); // Check security for each page (does the user have access - no need to bother the user with pages they do not have access to) UserToken userToken = UserValidationFacade.GetUserToken(); var userPermissions = PermissionTypeFacade.GetUserPermissionDefinitions(userToken.Username).ToList(); var userGroupPermissions = PermissionTypeFacade.GetUserGroupPermissionDefinitions(userToken.Username).ToList(); // Loop all pages and remove the ones the user has no access to actionRequiredPages = actionRequiredPages.Where(page => PermissionTypeFacade.GetCurrentPermissionTypes(userToken, page.GetDataEntityToken(), userPermissions, userGroupPermissions) .Contains(PermissionType.Read)).ToList(); var pageIdsWithAccessTo = new HashSet <Guid>(actionRequiredPages.Select(p => p.Id)); var allSitemapElements = PageStructureInfo.GetSiteMap().DescendantsAndSelf(); var relevantElements = allSitemapElements.Where(f => pageIdsWithAccessTo.Contains(new Guid(f.Attribute("Id").Value))); var minimalTree = relevantElements.AncestorsAndSelf().Where(f => f.Name.LocalName == "Page").Distinct().ToList(); var reportElements = new Hashtable <Guid, XElement>(); var linksToCheck = new List <LinkToCheck>(); // Rendering all the C1 pages and collecting links foreach (XElement pageElement in minimalTree) { Guid pageId = new Guid(pageElement.Attribute("Id").Value); IPage page = PageManager.GetPageById(pageId); Verify.IsNotNull(page, "Failed to get the page"); string pageTitle = pageElement.Attribute("MenuTitle") != null ? pageElement.Attribute("MenuTitle").Value : pageElement.Attribute("Title").Value; var resultPageElement = new XElement(PageElementName, new XAttribute("Id", pageId), new XAttribute("Title", pageTitle)); reportElements[pageId] = resultPageElement; string htmlDocument, errorCode; string url = pageElement.Attribute("URL").Value; string pageServerUrl = null; if (url.StartsWith("http://") || (url.StartsWith("https://"))) { pageServerUrl = new UrlBuilder(url).ServerUrl; if (pageServerUrl == string.Empty) { pageServerUrl = url; /* Bug in versions < C1 4.0 beta 2 */ } } pageServerUrl = pageServerUrl ?? _serverUrl; PageRenderingResult result = RenderPage(url, out htmlDocument, out errorCode); if (result == PageRenderingResult.Failed) { resultPageElement.Add(GetRenderingErrorNode(errorCode)); continue; } if (result == PageRenderingResult.Redirect || result == PageRenderingResult.NotFound) { continue; } XDocument document; try { document = XDocument.Parse(htmlDocument); } catch (Exception) { resultPageElement.Add(GetRenderingErrorNode(Localization.BrokenLinkReport_NotValidXhml)); continue; } linksToCheck.AddRange(CollectLinksToCheck(document, resultPageElement, url, pageServerUrl)); } linksToCheck = linksToCheck.OrderBy(o => Guid.NewGuid()).ToList(); // Shuffling links // Checking external and internall links in parrallel tasks - one per hostname var linksGroupedByHostname = linksToCheck.Where(l => l.RequestValidationInfo != null) .GroupBy(link => link.RequestValidationInfo.Hostname).ToList(); ParallelFacade.ForEach(linksGroupedByHostname, linkGroup => { foreach (var linkToCheck in linkGroup) { linkToCheck.BrokenLinkType = ValidateByRequest(linkToCheck.RequestValidationInfo); // linkToCheck.RequestValidationInfo = null; } }); // Having 100 tasks running in parallel would fill the app pool and make the site unresponsive foreach (var link in linksToCheck) { if (!link.BrokenLinkType.HasValue) { Log.LogWarning(LogTitle, "Incorrectly processed link: " + link.Href); link.BrokenLinkType = BrokenLinkType.Relative; } BrokenLinkType brokenLinkType = link.BrokenLinkType.Value; if (brokenLinkType == BrokenLinkType.None) { continue; } var brokenLinkDescriptionElement = DescribeBrokenLink(link.LinkNode, link.Href, brokenLinkType); link.ReportPageNode.Add(brokenLinkDescriptionElement); noInvalidLinksFound = false; } BuildReportTreeRec(infoDocumentRoot, Guid.Empty, reportElements); return(noInvalidLinksFound); } }
/// <summary> /// Pasres the url, and checks if the urls can be validated without making an http request (for internal urls=. /// </summary> private UrlPreprocessResult PreprocessUrl( string url, string pageUrl, string serverUrl, out BrokenLinkType brokenLinkType, out RequestValidationInfo requestValidationInfo) { BrokenLinkType cachedResult; requestValidationInfo = null; if (_brokenLinks.TryGetValue(url, out cachedResult)) { brokenLinkType = cachedResult; return(brokenLinkType == BrokenLinkType.None ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken); } // Trying to parse as a page url first PageUrlData pageUrlData = null; try { if (IsKnownHostname(url)) // Workaround "if" for early vesrions of 4.0 beta { pageUrlData = PageUrls.ParseUrl(url); } } catch (UriFormatException) { } if (pageUrlData != null) { Guid linkedPageId = pageUrlData.PageId; IPage page; using (new DataScope(pageUrlData.PublicationScope, pageUrlData.LocalizationScope)) { page = PageManager.GetPageById(linkedPageId); } if (page == null) { if (pageUrlData.PublicationScope == PublicationScope.Published) { using (new DataScope(PublicationScope.Unpublished, pageUrlData.LocalizationScope)) { if (PageManager.GetPageById(linkedPageId) != null) { brokenLinkType = BrokenLinkType.PageNotPublished; return(SaveLinkCheckResult(url, brokenLinkType) ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken); } } } brokenLinkType = BrokenLinkType.Page; return(SaveLinkCheckResult(url, brokenLinkType) ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken); } // If no PathInfo - page link is already valid if (string.IsNullOrEmpty(pageUrlData.PathInfo)) { brokenLinkType = BrokenLinkType.None; return(UrlPreprocessResult.Valid); } // If there's pathInfo -> making a request to check whether the link is actually broken requestValidationInfo = GetRequestValidationInfo(url, pageUrl, serverUrl, BrokenLinkType.Page); if (requestValidationInfo == null) { brokenLinkType = BrokenLinkType.Page; return(UrlPreprocessResult.Broken); } brokenLinkType = BrokenLinkType.None; return(UrlPreprocessResult.NeedToBeValidatedByRequest); } MediaUrlData mediaUrlData = MediaUrls.ParseUrl(url); if (mediaUrlData != null) { Guid mediaId = mediaUrlData.MediaId; string mediastore = mediaUrlData.MediaStore; bool mediaExist = DataFacade.GetData <IMediaFile>().Any(f => f.StoreId == mediastore && f.Id == mediaId); brokenLinkType = mediaExist ? BrokenLinkType.None : BrokenLinkType.MediaLibrary; return(SaveLinkCheckResult(url, brokenLinkType) ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken); } var linkType = UrlHelper.IsAbsoluteLink(url) ? BrokenLinkType.External : BrokenLinkType.Relative; requestValidationInfo = GetRequestValidationInfo(url, pageUrl, serverUrl, linkType); if (requestValidationInfo == null) { brokenLinkType = linkType; return(UrlPreprocessResult.Broken); } brokenLinkType = BrokenLinkType.None; return(UrlPreprocessResult.NeedToBeValidatedByRequest); }
private RequestValidationInfo GetRequestValidationInfo(string url, string baseUrl, string serverUrl, BrokenLinkType brokenLinkType) { url = UrlHelper.ToAbsoluteUrl(url, baseUrl, serverUrl ?? _serverUrl); string hostname; try { hostname = new Uri(url).Host; } catch (UriFormatException) { return null; } return new RequestValidationInfo { Hostname = hostname, LinkType = brokenLinkType, Url = url}; }
//private object GetHostNameSyncObject(string hostname) //{ // hostname = hostname.ToLowerInvariant(); // return _hostnameSync.GetOrAdd(hostname, h => new object()); //} private bool SaveLinkCheckResult(string url, BrokenLinkType brokenLinkType) { _brokenLinks.TryAdd(url, brokenLinkType); return brokenLinkType == BrokenLinkType.None; }
private string Describe(BrokenLinkType brokenLinkType) { switch (brokenLinkType) { case BrokenLinkType.External: return Localization.BrokenLink_External; case BrokenLinkType.MediaLibrary: return Localization.BrokenLink_MediaLibrary; case BrokenLinkType.Page: return Localization.BrokenLink_Page; case BrokenLinkType.PageNotPublished: return Localization.BrokenLink_PageNotPublished; case BrokenLinkType.Relative: return Localization.BrokenLink_Relative; } throw new InvalidOperationException("not supported link type " + brokenLinkType); }
/// <summary> /// Pasres the url, and checks if the urls can be validated without making an http request (for internal urls=. /// </summary> private UrlPreprocessResult PreprocessUrl( string url, string pageUrl, string serverUrl, out BrokenLinkType brokenLinkType, out RequestValidationInfo requestValidationInfo) { BrokenLinkType cachedResult; requestValidationInfo = null; if (_brokenLinks.TryGetValue(url, out cachedResult)) { brokenLinkType = cachedResult; return brokenLinkType == BrokenLinkType.None ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken; } // Trying to parse as a page url first PageUrlData pageUrlData = null; try { if (IsKnownHostname(url)) // Workaround "if" for early vesrions of 4.0 beta { pageUrlData = PageUrls.ParseUrl(url); } } catch (UriFormatException) { } if (pageUrlData != null) { Guid linkedPageId = pageUrlData.PageId; IPage page; using (new DataScope(pageUrlData.PublicationScope, pageUrlData.LocalizationScope)) { page = PageManager.GetPageById(linkedPageId); } if (page == null) { if (pageUrlData.PublicationScope == PublicationScope.Published) { using (new DataScope(PublicationScope.Unpublished, pageUrlData.LocalizationScope)) { if (PageManager.GetPageById(linkedPageId) != null) { brokenLinkType = BrokenLinkType.PageNotPublished; return SaveLinkCheckResult(url, brokenLinkType) ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken; } } } brokenLinkType = BrokenLinkType.Page; return SaveLinkCheckResult(url, brokenLinkType) ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken; } // If no PathInfo - page link is already valid if (string.IsNullOrEmpty(pageUrlData.PathInfo)) { brokenLinkType = BrokenLinkType.None; return UrlPreprocessResult.Valid; } // If there's pathInfo -> making a request to check whether the link is actually broken requestValidationInfo = GetRequestValidationInfo(url, pageUrl, serverUrl, BrokenLinkType.Page); if (requestValidationInfo == null) { brokenLinkType = BrokenLinkType.Page; return UrlPreprocessResult.Broken; } brokenLinkType = BrokenLinkType.None; return UrlPreprocessResult.NeedToBeValidatedByRequest; } MediaUrlData mediaUrlData = MediaUrls.ParseUrl(url); if (mediaUrlData != null) { Guid mediaId = mediaUrlData.MediaId; string mediastore = mediaUrlData.MediaStore; bool mediaExist = DataFacade.GetData<IMediaFile>().Any(f => f.StoreId == mediastore && f.Id == mediaId); brokenLinkType = mediaExist ? BrokenLinkType.None : BrokenLinkType.MediaLibrary; return SaveLinkCheckResult(url, brokenLinkType) ? UrlPreprocessResult.Valid : UrlPreprocessResult.Broken; } var linkType = UrlHelper.IsAbsoluteLink(url) ? BrokenLinkType.External : BrokenLinkType.Relative; requestValidationInfo = GetRequestValidationInfo(url, pageUrl, serverUrl, linkType); if (requestValidationInfo == null) { brokenLinkType = linkType; return UrlPreprocessResult.Broken; } brokenLinkType = BrokenLinkType.None; return UrlPreprocessResult.NeedToBeValidatedByRequest; }
private XElement DescribeBrokenLink(XElement a, string link, BrokenLinkType brokenLinkType) { string previousnode = ToPreviewString(a.PreviousNode); string nextnode = ToPreviewString(a.NextNode); if (previousnode.Length > 50) { previousnode = "..." + previousnode.Substring(previousnode.Length - 40); } if (nextnode.Length > 50) { nextnode = nextnode.Substring(nextnode.Length - 40) + "..."; } string errorText = Describe(brokenLinkType); return new XElement("invalidContent", new XAttribute("previousNode", previousnode), new XAttribute("originalText", a.Value), new XAttribute("originalLink", link), new XAttribute("nextNode", nextnode), new XAttribute("errorType", errorText)); }
public BrokenLink(Uri BrokenUrl, BrokenLinkType TypeOfLink) { Url = BrokenUrl; LinkType = TypeOfLink; }
/// <summary> /// Validates a link by checking the cache to see if the response has already been checked and recording broken links /// </summary> /// <param name="PageUri">The Uri of the page the link is on</param> /// <param name="UriToValidate">The Uri of the link to check</param> /// <param name="LinkType">The type of the link being checked</param> private void ValidateLink(Uri PageUri, Uri UriToValidate, string originalUrl, BrokenLinkType LinkType) { //Validate if original url is absolute and its pointing to authoring or publishing site instead of relative link Uri originalUri = new Uri(originalUrl, UriKind.RelativeOrAbsolute); if (originalUri.IsAbsoluteUri) { string originalAuthority = originalUri.GetLeftPart(UriPartial.Authority).ToLower(); string pageAuthority = PageUri.GetLeftPart(UriPartial.Authority).ToLower(); if (originalAuthority.Equals(WWWUrl.ToLower())) { //Absolute URL for www site } else if (originalAuthority.Equals(pageAuthority)) { //Absolute URL for publishing site } else { //External Url LinkType = BrokenLinkType.ExternalLink; } } if (!CheckedLinks.Contains(UriToValidate.ToString())) { if (UriToValidate.ToString().ToLower().Contains("fixupredirect.aspx")) { CheckedLinks.Add(UriToValidate.ToString(), true); } else if (!LinkCheckerUtilities.LinkIsValid(UriToValidate)) { Utility.LogInformation("Found broken link: " + UriToValidate.ToString()); AddBrokenLink(PageUri, new BrokenLink(UriToValidate, LinkType)); CheckedLinks.Add(UriToValidate.ToString(), false); } else { CheckedLinks.Add(UriToValidate.ToString(), LinkType != BrokenLinkType.ExternalLink); } } }