/**************************************************************************/

        public List <MacroscopeDocumentList> AnalyzeInSitemaps(MacroscopeDocumentCollection DocCollection)
        {
            Dictionary <string, Dictionary <string, bool> > UrlMap  = this.BuildSitemapUrlList(DocCollection: DocCollection);
            MacroscopeDocumentList        InSitemapsDocumentList    = new MacroscopeDocumentList();
            MacroscopeDocumentList        NotInSitemapsDocumentList = new MacroscopeDocumentList();
            List <MacroscopeDocumentList> DocumentLists             = new List <MacroscopeDocumentList>(2);

            DocumentLists.Add(NotInSitemapsDocumentList);
            DocumentLists.Add(InSitemapsDocumentList);

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool   InSitemap    = false;
                string DocumentNote = null;
                string Url          = msDoc.GetUrl();

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (!msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
                {
                    continue;
                }

                foreach (string SitemapUrl in UrlMap.Keys)
                {
                    if (UrlMap[SitemapUrl].ContainsKey(Url))
                    {
                        InSitemap    = true;
                        DocumentNote = SitemapUrl;
                    }
                }

                if (InSitemap)
                {
                    InSitemapsDocumentList.AddDocument(msDoc: msDoc);
                    InSitemapsDocumentList.AddDocumentNote(msDoc: msDoc, Note: DocumentNote);
                }
                else
                {
                    NotInSitemapsDocumentList.AddDocument(msDoc: msDoc);
                }
            }

            return(DocumentLists);
        }
        /**************************************************************************/

        private MacroscopeDocumentList FindSitemaps(MacroscopeDocumentCollection DocCollection)
        {
            MacroscopeDocumentList SitemapDocumentList = new MacroscopeDocumentList();

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (
                    msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPTEXT) ||
                    msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML))
                {
                    SitemapDocumentList.AddDocument(msDoc: msDoc);
                }
            }

            return(SitemapDocumentList);
        }
Exemplo n.º 3
0
        /**************************************************************************/

        public MacroscopeDocumentList AnalyzeOrphanedDocumentsInCollection(MacroscopeDocumentCollection DocCollection)
        {
            MacroscopeDocumentList OrphanedDocumentList = new MacroscopeDocumentList();

            foreach (MacroscopeDocument msDocLeft in DocCollection.IterateDocuments())
            {
                bool   IsOrphan = true;
                string UrlLeft  = msDocLeft.GetUrl();

                if (!IsValidDocument(msDoc: msDocLeft))
                {
                    continue;
                }

                foreach (MacroscopeDocument msDocRight in DocCollection.IterateDocuments())
                {
                    if (MacroscopeHttpUrlUtils.CompareUrls(UrlLeft: UrlLeft, UrlRight: msDocRight.GetUrl()))
                    {
                        continue;
                    }

                    if (!this.IsValidDocument(msDoc: msDocRight))
                    {
                        continue;
                    }

                    foreach (MacroscopeHyperlinkOut HyperlinkOut in msDocRight.IterateHyperlinksOut())
                    {
                        string UrlRight    = HyperlinkOut.GetTargetUrl();
                        string UrlRightRaw = HyperlinkOut.GetRawTargetUrl();

                        if (MacroscopeHttpUrlUtils.CompareUrls(UrlLeft: UrlLeft, UrlRight: UrlRight))
                        {
                            IsOrphan = false;
                        }
                        else
                        if (MacroscopeHttpUrlUtils.CompareUrls(UrlLeft: UrlLeft, UrlRight: UrlRightRaw))
                        {
                            IsOrphan = false;
                        }

                        if (!IsOrphan)
                        {
                            break;
                        }
                    }

                    if (!IsOrphan)
                    {
                        break;
                    }
                }

                if (IsOrphan)
                {
                    OrphanedDocumentList.AddDocument(msDoc: msDocLeft);
                    msDocLeft.AddRemark("ORPHAN1", "This appears to be an orphaned page, not linked to from any other HTML page in this collection.");
                    msDocLeft.AddRemark("ORPHAN2", "This page appears to only be referenced from one or more sitemaps.");
                }
                else
                {
                    msDocLeft.RemoveRemark("ORPHAN1");
                    msDocLeft.RemoveRemark("ORPHAN2");
                }
            }

            return(OrphanedDocumentList);
        }