/**************************************************************************/ public List <MacroscopeDocumentList> AnalyzeInSitemaps(MacroscopeDocumentCollection DocCollection) { Dictionary <string, Dictionary <string, bool> > UrlMap = this.BuildSitemapUrlList(DocCollection: DocCollection); MacroscopeDocumentList InSitemapsDocumentList = new MacroscopeDocumentList(); MacroscopeDocumentList NotInSitemapsDocumentList = new MacroscopeDocumentList(); List <MacroscopeDocumentList> DocumentLists = new List <MacroscopeDocumentList>(2); DocumentLists.Add(NotInSitemapsDocumentList); DocumentLists.Add(InSitemapsDocumentList); foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { bool InSitemap = false; string DocumentNote = null; string Url = msDoc.GetUrl(); if (msDoc.GetIsExternal()) { continue; } if (!msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML)) { continue; } foreach (string SitemapUrl in UrlMap.Keys) { if (UrlMap[SitemapUrl].ContainsKey(Url)) { InSitemap = true; DocumentNote = SitemapUrl; } } if (InSitemap) { InSitemapsDocumentList.AddDocument(msDoc: msDoc); InSitemapsDocumentList.AddDocumentNote(msDoc: msDoc, Note: DocumentNote); } else { NotInSitemapsDocumentList.AddDocument(msDoc: msDoc); } } return(DocumentLists); }
/**************************************************************************/ private MacroscopeDocumentList FindSitemaps(MacroscopeDocumentCollection DocCollection) { MacroscopeDocumentList SitemapDocumentList = new MacroscopeDocumentList(); foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { if ( msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPTEXT) || msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML)) { SitemapDocumentList.AddDocument(msDoc: msDoc); } } return(SitemapDocumentList); }
/**************************************************************************/ public MacroscopeDocumentList AnalyzeOrphanedDocumentsInCollection(MacroscopeDocumentCollection DocCollection) { MacroscopeDocumentList OrphanedDocumentList = new MacroscopeDocumentList(); foreach (MacroscopeDocument msDocLeft in DocCollection.IterateDocuments()) { bool IsOrphan = true; string UrlLeft = msDocLeft.GetUrl(); if (!IsValidDocument(msDoc: msDocLeft)) { continue; } foreach (MacroscopeDocument msDocRight in DocCollection.IterateDocuments()) { if (MacroscopeHttpUrlUtils.CompareUrls(UrlLeft: UrlLeft, UrlRight: msDocRight.GetUrl())) { continue; } if (!this.IsValidDocument(msDoc: msDocRight)) { continue; } foreach (MacroscopeHyperlinkOut HyperlinkOut in msDocRight.IterateHyperlinksOut()) { string UrlRight = HyperlinkOut.GetTargetUrl(); string UrlRightRaw = HyperlinkOut.GetRawTargetUrl(); if (MacroscopeHttpUrlUtils.CompareUrls(UrlLeft: UrlLeft, UrlRight: UrlRight)) { IsOrphan = false; } else if (MacroscopeHttpUrlUtils.CompareUrls(UrlLeft: UrlLeft, UrlRight: UrlRightRaw)) { IsOrphan = false; } if (!IsOrphan) { break; } } if (!IsOrphan) { break; } } if (IsOrphan) { OrphanedDocumentList.AddDocument(msDoc: msDocLeft); msDocLeft.AddRemark("ORPHAN1", "This appears to be an orphaned page, not linked to from any other HTML page in this collection."); msDocLeft.AddRemark("ORPHAN2", "This page appears to only be referenced from one or more sitemaps."); } else { msDocLeft.RemoveRemark("ORPHAN1"); msDocLeft.RemoveRemark("ORPHAN2"); } } return(OrphanedDocumentList); }