Ejemplo n.º 1
0
        /**************************************************************************/

        private void BuildWorksheetPageBrokenLinks(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Anchor Text");
                ws.WriteField("Alt Text");
                ws.WriteField("Origin URL");
                ws.WriteField("Destination URL");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(msDoc.GetUrl());
                int    StatusCode = ( int )msDoc.GetStatusCode();
                string Status     = msDoc.GetStatusCode().ToString();

                if (
                    (StatusCode >= 400) &&
                    (StatusCode <= 599) &&
                    (HyperlinksIn != null))
                {
                    foreach (MacroscopeHyperlinkIn HyperlinkIn in HyperlinksIn.IterateLinks())
                    {
                        string OriginUrl  = HyperlinkIn.GetSourceUrl();
                        string AnchorText = HyperlinkIn.GetAnchorText();
                        string AltText    = HyperlinkIn.GetAltText();

                        if (
                            (OriginUrl != null) &&
                            (OriginUrl.Length > 0))
                        {
                            this.InsertAndFormatContentCell(ws, StatusCode.ToString());

                            this.InsertAndFormatContentCell(ws, Status);

                            this.InsertAndFormatContentCell(ws, AnchorText);

                            this.InsertAndFormatContentCell(ws, AltText);

                            this.InsertAndFormatUrlCell(ws, OriginUrl);

                            this.InsertAndFormatUrlCell(ws, msDoc);

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
        /**************************************************************************/

        public void RenderListView(MacroscopeJobMaster JobMaster)
        {
            Dictionary <String, bool> Blocked = JobMaster.GetBlockedByRobotsList();

            if (Blocked.Count == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem>(1);

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)Blocked.Count;
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (string Url in Blocked.Keys)
            {
                bool IsInternal = JobMaster.GetAllowedHosts().IsInternalUrl(Url);

                this.RenderListView(
                    ListViewItems: ListViewItems,
                    Url: Url,
                    IsBlocked: Blocked[Url],
                    IsInternal: IsInternal
                    );

                Count++;
                MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                ProgressForm.UpdatePercentages(
                    Title: null,
                    Message: null,
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
Ejemplo n.º 3
0
        /**************************************************************************/

        private void BuildWorksheetBlockedByRobotsInternal(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.GetIsInternal() && (!msDoc.GetAllowedByRobots()))
                {
                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatRobotsCell(ws, iRow, iCol, msDoc);

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetSitemapErrors(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Sitemap URL");
                ws.WriteField("Status Code");
                ws.WriteField("Robots");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.GetIsInternal() && msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML))
                {
                    foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks())
                    {
                        string             TargetUrl   = Outlink.GetTargetUrl();
                        MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl);
                        bool InsertRow = false;

                        if (msDocLinked.GetIsInternal())
                        {
                            int StatusCode = (int)msDocLinked.GetStatusCode();
                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                InsertRow = true;
                            }
                            if (!msDocLinked.GetAllowedByRobots())
                            {
                                InsertRow = true;
                            }
                        }

                        if (InsertRow)
                        {
                            this.InsertAndFormatUrlCell(ws, msDoc);

                            this.InsertAndFormatStatusCodeCell(ws, msDoc);

                            this.InsertAndFormatRobotsCell(ws, msDoc);

                            this.InsertAndFormatUrlCell(ws, TargetUrl);

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
Ejemplo n.º 5
0
        /**************************************************************************/

        private void BuildWorksheetEmailAddresses(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Email Address";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.GetIsHtml())
                {
                    Dictionary <string, string> EmailAddresses = msDoc.GetEmailAddresses();

                    foreach (string EmailAddress in EmailAddresses.Keys)
                    {
                        iCol = 1;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, EmailAddress);

                        iCol++;

                        this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                        iRow++;
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
Ejemplo n.º 6
0
        /**************************************************************************/

        private void BuildWorksheetPageRedirectsAudit(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Origin URL");
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Destination URL");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url);

                if (!msDoc.GetIsRedirect())
                {
                    continue;
                }

                string OriginURL      = msDoc.GetUrlRedirectFrom();
                string StatusCode     = (( int )msDoc.GetStatusCode()).ToString();
                string Status         = msDoc.GetStatusCode().ToString();
                string DestinationURL = msDoc.GetUrlRedirectTo();

                if (string.IsNullOrEmpty(OriginURL))
                {
                    continue;
                }

                if (string.IsNullOrEmpty(DestinationURL))
                {
                    continue;
                }

                this.InsertAndFormatUrlCell(ws, OriginURL);

                this.InsertAndFormatContentCell(ws, StatusCode);

                this.InsertAndFormatContentCell(ws, Status);

                this.InsertAndFormatUrlCell(ws, DestinationURL);

                ws.NextRecord();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetTelephoneNumbers(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Telephone Number";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
                {
                    Dictionary <string, string> TelephoneNumbers = msDoc.GetTelephoneNumbers();

                    foreach (string TelephoneNumber in TelephoneNumbers.Keys)
                    {
                        iCol = 1;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, TelephoneNumber);

                        iCol++;

                        this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                        iRow++;
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
Ejemplo n.º 8
0
        /**************************************************************************/

        private void BuildWorksheetPageUriAnalysis(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("URL");
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Occurrences");
                ws.WriteField("Checksum");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url);

                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();
                string Checksum   = msDoc.GetChecksum();
                int    Count      = DocCollection.GetStatsChecksumCount(Checksum: Checksum);

                this.InsertAndFormatUrlCell(ws, msDoc);

                this.InsertAndFormatContentCell(ws, StatusCode);

                this.InsertAndFormatContentCell(ws, Status);

                this.InsertAndFormatContentCell(ws, Count.ToString());

                this.InsertAndFormatContentCell(ws, Checksum);

                ws.NextRecord();
            }
        }
Ejemplo n.º 9
0
        /**************************************************************************/

        private void BuildWorksheetErrors(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument     msDoc        = DocCollection.GetDocument(Url);
                MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(Url);
                int    StatusCode = ( int )msDoc.GetStatusCode();
                string Status     = msDoc.GetStatusCode().ToString();

                if (
                    (StatusCode >= 400) &&
                    (StatusCode <= 599))
                {
                    this.InsertAndFormatContentCell(ws, StatusCode.ToString());

                    this.InsertAndFormatContentCell(ws, Status);

                    this.InsertAndFormatUrlCell(ws, Url);

                    ws.NextRecord();
                }
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageObservations(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("URL");
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Observation");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Url        = msDoc.GetUrl();
                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();

                foreach (KeyValuePair <string, string> RemarkPair in msDoc.IterateRemarks())
                {
                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, StatusCode);

                    this.InsertAndFormatContentCell(ws, Status);

                    this.InsertAndFormatContentCell(ws, RemarkPair.Value);

                    ws.NextRecord();
                }
            }
        }
Ejemplo n.º 11
0
        /**************************************************************************/

        private void BuildWorksheetPageLinks(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Link Type";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Source URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Target URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Follow";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Alt Text";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Raw Source URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Raw Target URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                foreach (MacroscopeLink Link in msDoc.IterateOutlinks())
                {
                    string LinkType = Link.GetLinkType().ToString();

                    string SourceUrl = Link.GetSourceUrl();
                    string TargetUrl = Link.GetTargetUrl();

                    string AltText = Link.GetAltText();

                    string RawSourceUrl = Link.GetRawSourceUrl();
                    string RawTargetUrl = Link.GetRawTargetUrl();

                    string DoFollow = "No Follow";

                    if (Link.GetDoFollow())
                    {
                        DoFollow = "Follow";
                    }

                    if (string.IsNullOrEmpty(AltText))
                    {
                        AltText = "";
                    }

                    if (string.IsNullOrEmpty(RawSourceUrl))
                    {
                        RawSourceUrl = "";
                    }

                    if (string.IsNullOrEmpty(RawTargetUrl))
                    {
                        RawTargetUrl = "";
                    }

                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(LinkType));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(SourceUrl));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(TargetUrl));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DoFollow));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AltText));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(RawSourceUrl));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(RawTargetUrl));

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
Ejemplo n.º 12
0
        /**************************************************************************/

        private void BuildWorksheetErrors(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(msDoc.GetUrl());
                int    StatusCode = ( int )msDoc.GetStatusCode();
                string Status     = msDoc.GetStatusCode().ToString();

                if (
                    (StatusCode >= 400) &&
                    (StatusCode <= 599))
                {
                    iCol = 1;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode.ToString());

                    if ((StatusCode >= 400) && (StatusCode <= 599))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, Status);

                    if ((StatusCode >= 400) && (StatusCode <= 599))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                    }

                    iCol++;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc.GetUrl());

                    if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
Ejemplo n.º 13
0
        /**************************************************************************/

        private void BuildWorksheetPageRedirectsAudit(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Origin URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Destination URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url);

                if (!msDoc.GetIsRedirect())
                {
                    continue;
                }

                string OriginURL      = msDoc.GetUrlRedirectFrom();
                string StatusCode     = (( int )msDoc.GetStatusCode()).ToString();
                string Status         = msDoc.GetStatusCode().ToString();
                string DestinationURL = msDoc.GetUrlRedirectTo();

                if (OriginURL == null)
                {
                    continue;
                }

                if (DestinationURL == null)
                {
                    continue;
                }

                iCol = 1;

                this.InsertAndFormatUrlCell(ws, iRow, iCol, OriginURL);

                if (AllowedHosts.IsInternalUrl(Url: OriginURL))
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                }

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode);

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, Status);

                iCol++;

                this.InsertAndFormatUrlCell(ws, iRow, iCol, DestinationURL);

                if (AllowedHosts.IsInternalUrl(Url: DestinationURL))
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                }

                iRow++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
Ejemplo n.º 14
0
        /**************************************************************************/

        private void BuildWorksheetSitemapXmlErrors(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Sitemap URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Robots";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.GetIsInternal() && msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML))
                {
                    foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks())
                    {
                        string             TargetUrl   = Outlink.GetTargetUrl();
                        MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl);
                        bool InsertRow = false;

                        if (msDocLinked.GetIsInternal())
                        {
                            int StatusCode = (int)msDocLinked.GetStatusCode();
                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                InsertRow = true;
                            }
                            if (!msDocLinked.GetAllowedByRobots())
                            {
                                InsertRow = true;
                            }
                        }

                        if (InsertRow)
                        {
                            iCol = 1;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                            if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iCol++;

                            this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);

                            iCol++;

                            this.InsertAndFormatRobotsCell(ws, iRow, iCol, msDoc);

                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, TargetUrl);

                            if (AllowedHosts.IsInternalUrl(Url: TargetUrl))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iRow++;
                        }
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetRegularExpressions(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = MacroscopeConstants.Url;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.StatusCode;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.Status;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.ContentType;
                iCol++;

                ws.Cell(iRow, iCol).Value = "Extracted Label";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Extracted Value";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string DocUrl     = msDoc.GetUrl();
                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();
                string MimeType   = msDoc.GetMimeType();

                if (!this.DataExtractorRegexes.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedRegexes())
                {
                    string ExtractedLabel = DataExtractedPair.Key;
                    string ExtractedValue = DataExtractedPair.Value;

                    if (
                        string.IsNullOrEmpty(ExtractedLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (msDoc.GetIsInternal())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Status));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(MimeType));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(ExtractedLabel));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(ExtractedValue));

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
Ejemplo n.º 16
0
        /**************************************************************************/

        private void BuildWorksheetPageObservations(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Observation";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Url = msDoc.GetUrl();

                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();

                foreach (KeyValuePair <string, string> RemarkPair in msDoc.IterateRemarks())
                {
                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (AllowedHosts.IsInternalUrl(Url: Url))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, Status);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, RemarkPair.Value);

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
Ejemplo n.º 17
0
        /**************************************************************************/

        private void BuildWorksheetCustomFilter(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> FilterColsTable = new Dictionary <string, int>(CustomFilter.GetSize());

            const int FilterColOffset = 4;

            {
                ws.Cell(iRow, iCol).Value = MacroscopeConstants.Url;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.StatusCode;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.Status;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.ContentType;

                for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
                {
                    string FilterPattern = CustomFilter.GetPattern(Slot).Key;

                    iCol++;

                    if (FilterColsTable.ContainsKey(FilterPattern) || string.IsNullOrEmpty(FilterPattern))
                    {
                        FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + FilterColOffset);
                        ws.Cell(iRow, iCol).Value = string.Format("EMPTY{0}", Slot + 1);
                    }
                    else
                    {
                        FilterColsTable.Add(FilterPattern, Slot + FilterColOffset);
                        ws.Cell(iRow, iCol).Value = FilterPattern;
                    }
                }
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string DocUrl     = msDoc.GetUrl();
                string StatusCode = ((int)msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();
                string MimeType   = msDoc.GetMimeType();

                if (!CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc))
                {
                    continue;
                }

                iCol = 1;

                this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                if (msDoc.GetIsInternal())
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                }

                iCol++;

                this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetStatusCode().ToString()));

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(MimeType));

                iCol++;

                for (int Slot = 0; Slot < this.CustomFilter.GetSize(); Slot++)
                {
                    string FilterPattern = this.CustomFilter.GetPattern(Slot: Slot).Key;
                    KeyValuePair <string, MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern);

                    if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED))
                    {
                        string CustomFilterItemValue = MacroscopeConstants.TextPresenceLabels[Pair.Value];

                        this.InsertAndFormatContentCell(ws, iRow, iCol, CustomFilterItemValue);

                        switch (Pair.Value)
                        {
                        case MacroscopeConstants.TextPresence.CONTAINS_STRING:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            break;

                        case MacroscopeConstants.TextPresence.NOT_CONTAINS_STRING:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            break;

                        case MacroscopeConstants.TextPresence.MUST_CONTAIN_STRING:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            break;

                        case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_STRING:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            break;

                        case MacroscopeConstants.TextPresence.CONTAINS_REGEX:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            break;

                        case MacroscopeConstants.TextPresence.NOT_CONTAINS_REGEX:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            break;

                        case MacroscopeConstants.TextPresence.MUST_CONTAIN_REGEX:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            break;

                        case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_REGEX:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            break;

                        default:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            break;
                        }
                    }
                    else
                    {
                        this.InsertAndFormatContentCell(ws, iRow, iCol, "");
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;
                }

                iRow++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
Ejemplo n.º 18
0
        /**************************************************************************/

        private void BuildWorksheetPageRedirectChains(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();
            List <List <MacroscopeRedirectChainDocStruct> > RedirectChains = DocCollection.GetMacroscopeRedirectChains();

            {
                ws.Cell(iRow, iCol).Value = "Hop";
                iCol++;
                ws.Cell(iRow, iCol).Value = "Status";
            }

            iRow++;

            foreach (List <MacroscopeRedirectChainDocStruct> DocList in RedirectChains)
            {
                int iHop = 1;

                iCol = 1;

                foreach (MacroscopeRedirectChainDocStruct RedirectChainDocStruct in DocList)
                {
                    string Url        = RedirectChainDocStruct.Url;
                    string StatusCode = RedirectChainDocStruct.StatusCode.ToString();

                    ws.Cell(1, iCol).Value = string.Format("Hop {0} URL", iHop);
                    this.InsertAndFormatUrlCell(ws, iRow, iCol, Url);
                    iCol++;

                    if (AllowedHosts.IsInternalUrl(Url: Url))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    ws.Cell(1, iCol).Value = string.Format("Hop {0} Status", iHop);
                    this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode);
                    iCol++;

                    iHop++;
                }

                if (iCol > iColMax)
                {
                    iColMax = iCol;
                }

                iRow++;
            }

            if ((iRow > 1) && (iColMax > 2))
            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax - 1);
                var excelTable = rangeData.CreateTable();
            }
        }
Ejemplo n.º 19
0
        /**************************************************************************/

        private void BuildWorksheetCustomFilter(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> FilterColsTable = new Dictionary <string, int> (CustomFilter.GetSize());

            const int FilterColOffset = 3;

            {
                ws.WriteField(MacroscopeConstants.Url);
                ws.WriteField(MacroscopeConstants.StatusCode);
                ws.WriteField(MacroscopeConstants.Status);
                ws.WriteField(MacroscopeConstants.ContentType);

                for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
                {
                    string FilterPattern = CustomFilter.GetPattern(Slot).Key;

                    if (FilterColsTable.ContainsKey(FilterPattern) || string.IsNullOrEmpty(FilterPattern))
                    {
                        FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + FilterColOffset);

                        ws.WriteField(string.Format("EMPTY{0}", Slot + 1));
                    }
                    else
                    {
                        FilterColsTable.Add(FilterPattern, Slot + FilterColOffset);

                        ws.WriteField(FilterPattern);
                    }
                }

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string DocUrl     = msDoc.GetUrl();
                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();
                string MimeType   = msDoc.GetMimeType();

                if (!this.CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc))
                {
                    continue;
                }

                this.InsertAndFormatUrlCell(ws, msDoc);

                this.InsertAndFormatStatusCodeCell(ws, msDoc);

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetStatusCode().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(MimeType));

                for (int Slot = 0; Slot < this.CustomFilter.GetSize(); Slot++)
                {
                    string FilterPattern = this.CustomFilter.GetPattern(Slot: Slot).Key;
                    KeyValuePair <string,  MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern);

                    if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED))
                    {
                        string CustomFilterItemValue = MacroscopeConstants.TextPresenceLabels[Pair.Value];

                        this.InsertAndFormatContentCell(ws, CustomFilterItemValue);
                    }
                    else
                    {
                        this.InsertAndFormatContentCell(ws, "");
                    }

                    ws.NextRecord();
                }
            }
        }
Ejemplo n.º 20
0
        /**************************************************************************/

        private void BuildWorksheetKeywordTerms(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel,
            Dictionary <string, int> DicTerms
            )
        {
            var     ws        = wb.Worksheets.Add(WorksheetLabel);
            decimal TermTotal = DicTerms.Count;
            decimal TermCount = 0;

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Term";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Term in DicTerms.Keys)
            {
                MacroscopeDocumentList DocumentList = DocCollection.GetDeepKeywordAnalysDocumentList(Term);

                decimal DocTotal = ( decimal )DocumentList.CountDocuments();
                decimal DocCount = 0;
                TermCount++;

                if (TermTotal > 0)
                {
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: null,
                        MinorPercentage: (( decimal )100 / TermTotal) * TermCount,
                        ProgressLabelMinor: "Keywords Processed",
                        SubMinorPercentage: -1,
                        ProgressLabelSubMinor: null
                        );
                }

                foreach (MacroscopeDocument msDoc in DocumentList.IterateDocuments())
                {
                    DocCount++;

                    if (DocTotal > 0)
                    {
                        this.ProgressForm.UpdatePercentages(
                            Title: null,
                            Message: null,
                            MajorPercentage: -1,
                            ProgressLabelMajor: null,
                            MinorPercentage: -1,
                            ProgressLabelMinor: null,
                            SubMinorPercentage: (( decimal )100 / DocTotal) * DocCount,
                            ProgressLabelSubMinor: "Documents Processed"
                            );
                    }

                    iCol = 1;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DicTerms[Term].ToString()));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Term));

                    iCol++;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc.GetUrl());

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageHyperlinks(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Source URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Target URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Follow";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Target";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Anchor Text";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Alt Text";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Raw Target URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut();

                foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks())
                {
                    string HyperlinkOutUrl = HyperlinkOut.GetTargetUrl();
                    string DoFollow        = "No Follow";
                    string LinkTarget      = HyperlinkOut.GetLinkTarget();
                    string AnchorText      = HyperlinkOut.GetAnchorText();
                    string Title           = HyperlinkOut.GetTitle();
                    string AltText         = HyperlinkOut.GetAltText();

                    string RawTargetUrl = HyperlinkOut.GetRawTargetUrl();

                    if (HyperlinkOutUrl == null)
                    {
                        HyperlinkOutUrl = "";
                    }

                    if (HyperlinkOut.GetDoFollow())
                    {
                        DoFollow = "Follow";
                    }

                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, HyperlinkOutUrl);

                    if ((HyperlinkOutUrl.Length > 0) && (AllowedHosts.IsInternalUrl(Url: HyperlinkOutUrl)))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    if ((HyperlinkOutUrl.Length > 0) && (AllowedHosts.IsExternalUrl(Url: HyperlinkOutUrl)))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }
                    else
                    {
                        this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(HyperlinkOutUrl));
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, DoFollow);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, LinkTarget);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AnchorText));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Title));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AltText));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, RawTargetUrl);

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
Ejemplo n.º 22
0
        /**************************************************************************/

        private void BuildWorksheetHrefLangMatrix(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            Dictionary <string, string>  LocalesTable  = JobMaster.GetLocales();
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            Dictionary <string, int>     LocaleCols    = new Dictionary <string, int> ();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Site Locale";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title";
                iCol++;

                foreach (string LocaleKey in LocalesTable.Keys)
                {
                    DebugMsg(string.Format("EXCEL Locale: {0}", LocaleKey));

                    string LocaleLabel       = LocaleKey.ToUpper();
                    string DateServerLabel   = string.Format("{0} Date Server", LocaleKey.ToUpper());
                    string DateModifiedLabel = string.Format("{0} Date Modified", LocaleKey.ToUpper());

                    LocaleCols[LocaleKey] = iCol;

                    ws.Cell(iRow, iCol).Value = LocaleLabel;
                    iCol++;

                    ws.Cell(iRow, iCol).Value = DateServerLabel;
                    iCol++;

                    ws.Cell(iRow, iCol).Value = DateModifiedLabel;
                    iCol++;
                }

                for (int i = 1; i <= iCol; i++)
                {
                    ws.Cell(iRow, i).Style.Font.SetBold();
                }
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                Dictionary <string, MacroscopeHrefLang> HrefLangsTable = msDoc.GetHrefLangs();

                string SiteLocale = this.FormatIfMissing(msDoc.GetLocale());
                string Title      = this.FormatIfMissing(msDoc.GetTitle());
                string LocaleCol  = msDoc.GetLocale();

                iCol = 1;

                this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);
                iCol++;

                this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);
                iCol++;

                ws.Cell(iRow, iCol).Value = SiteLocale;
                if (SiteLocale == "MISSING")
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                }
                iCol++;

                ws.Cell(iRow, iCol).Value = Title;
                if (Title == "MISSING")
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                }
                iCol++;

                if (LocaleCol != null)
                {
                    this.InsertAndFormatUrlCell(ws, iRow, LocaleCols[LocaleCol], msDoc.GetUrl());
                }
                else
                {
                    ;
                }

                foreach (string LocaleKey in LocalesTable.Keys)
                {
                    if (!string.IsNullOrEmpty(LocaleKey))
                    {
                        if (HrefLangsTable.ContainsKey(LocaleKey))
                        {
                            MacroscopeHrefLang HrefLangAlternate = HrefLangsTable[LocaleKey];

                            string   HrefLangUrl          = HrefLangAlternate.GetUrl();
                            DateTime HrefLangDateServer   = HrefLangAlternate.GetDateServer();
                            DateTime HrefLangDateModified = HrefLangAlternate.GetDateModified();

                            this.InsertAndFormatUrlCell(ws, iRow, LocaleCols[LocaleKey], HrefLangUrl);

                            if (JobMaster.GetAllowedHosts().IsInternalUrl(HrefLangUrl))
                            {
                                ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Blue);
                            }

                            this.InsertAndFormatDateCell(ws, iRow, LocaleCols[LocaleKey] + 1, HrefLangDateServer.ToString());

                            this.InsertAndFormatDateCell(ws, iRow, LocaleCols[LocaleKey] + 2, HrefLangDateModified.ToString());
                        }
                        else
                        {
                            ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Red);
                            ws.Cell(iRow, LocaleCols[LocaleKey]).Value     = "NOT SPECIFIED";
                            ws.Cell(iRow, LocaleCols[LocaleKey] + 1).Value = "";
                            ws.Cell(iRow, LocaleCols[LocaleKey] + 2).Value = "";
                        }
                    }
                }

                iRow++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax - 1);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateTitles(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            decimal Count    = 0;
            decimal DocCount = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (DocCount > 0)
                {
                    Count++;
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", Count),
                        MinorPercentage: (( decimal )100 / DocCount) * Count,
                        ProgressLabelMinor: msDoc.GetUrl(),
                        SubMinorPercentage: -1,
                        ProgressLabelSubMinor: null
                        );
                }

                if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                {
                    switch (msDoc.GetDocumentType())
                    {
                    case MacroscopeConstants.DocumentType.HTML:
                        Proceed = true;
                        break;

                    case MacroscopeConstants.DocumentType.PDF:
                        Proceed = true;
                        break;

                    default:
                        Proceed = false;
                        break;
                    }
                }

                if (Proceed)
                {
                    string Title       = msDoc.GetTitle();
                    int    Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc);

                    if (Occurrences > 1)
                    {
                        iCol = 1;

                        this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                        if (msDoc.GetIsInternal())
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                        }
                        else
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                        }

                        iCol++;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences);

                        if (Occurrences > 1)
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange);
                        }
                        else
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                        }

                        iCol++;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Title));

                        iRow++;
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateTitles(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            decimal Count    = 0;
            decimal DocCount = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.WriteField("URL");
                ws.WriteField("Occurrences");
                ws.WriteField("Title");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (DocCount > 0)
                {
                    Count++;
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", Count),
                        MinorPercentage: (( decimal )100 / DocCount) * Count,
                        ProgressLabelMinor: msDoc.GetUrl(),
                        SubMinorPercentage: -1,
                        ProgressLabelSubMinor: null
                        );
                }

                if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                {
                    switch (msDoc.GetDocumentType())
                    {
                    case MacroscopeConstants.DocumentType.HTML:
                        Proceed = true;
                        break;

                    case MacroscopeConstants.DocumentType.PDF:
                        Proceed = true;
                        break;

                    default:
                        Proceed = false;
                        break;
                    }
                }

                if (Proceed)
                {
                    string Title       = msDoc.GetTitle();
                    int    Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc);

                    if (Occurrences > 1)
                    {
                        this.InsertAndFormatUrlCell(ws, msDoc);

                        this.InsertAndFormatContentCell(ws, Occurrences);

                        this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title));

                        ws.NextRecord();
                    }
                }
            }
        }
Ejemplo n.º 25
0
        /**************************************************************************/

        private void BuildWorksheetPageRedirectedLinks(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Origin URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Destination URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument     msDoc        = DocCollection.GetDocument(Url);
                MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(Url);
                int    StatusCode = ( int )msDoc.GetStatusCode();
                string Status     = msDoc.GetStatusCode().ToString();

                if (
                    (StatusCode >= 300) &&
                    (StatusCode <= 399) &&
                    (HyperlinksIn != null))
                {
                    foreach (MacroscopeHyperlinkIn HyperlinkIn in HyperlinksIn.IterateLinks())
                    {
                        string OriginUrl = HyperlinkIn.GetSourceUrl();

                        if (
                            (OriginUrl != null) &&
                            (OriginUrl.Length > 0))
                        {
                            iCol = 1;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode.ToString());

                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                            }

                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, Status);

                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                            }

                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, OriginUrl);

                            if (AllowedHosts.IsInternalUrl(Url: OriginUrl))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                            if (AllowedHosts.IsInternalUrl(Url: Url))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iRow++;
                        }
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetHrefLangMatrixUnspecified(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            Dictionary <string, string>  LocalesTable  = JobMaster.GetLocales();
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            Dictionary <string, int>     LocaleCols    = new Dictionary <string, int> ();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Site Locale";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title";
                iCol++;

                foreach (string LocaleKey in LocalesTable.Keys)
                {
                    DebugMsg(string.Format("EXCEL Locale: {0}", LocaleKey));
                    LocaleCols[LocaleKey]     = iCol;
                    ws.Cell(iRow, iCol).Value = LocaleKey;
                    iCol++;
                }

                for (int i = 1; i <= iCol; i++)
                {
                    ws.Cell(iRow, i).Style.Font.SetBold();
                }
            }

            iColMax = iCol;

            iRow++;

            foreach (string Key in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Key);
                Dictionary <string, MacroscopeHrefLang> HrefLangsTable = msDoc.GetHrefLangs();
                Boolean Proceed = false;

                foreach (string LocaleKey in LocalesTable.Keys)
                {
                    if (!string.IsNullOrEmpty(LocaleKey))
                    {
                        if (!HrefLangsTable.ContainsKey(LocaleKey))
                        {
                            Proceed = true;
                        }
                    }
                }

                if (Proceed)
                {
                    if (!string.IsNullOrEmpty(msDoc.GetLocale()))
                    {
                        continue;
                    }

                    string SiteLocale = this.FormatIfMissing(msDoc.GetLocale());
                    string Title      = this.FormatIfMissing(msDoc.GetTitle());
                    string LocaleCol  = msDoc.GetLocale();

                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);
                    iCol++;

                    this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);
                    iCol++;

                    ws.Cell(iRow, iCol).Value = SiteLocale;
                    if (SiteLocale == "MISSING")
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    iCol++;

                    ws.Cell(iRow, iCol).Value = Title;
                    if (Title == "MISSING")
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    iCol++;

                    if (LocaleCol != null)
                    {
                        ws.Cell(iRow, LocaleCols[LocaleCol]).Value = msDoc.GetUrl();
                    }
                    else
                    {
                        ;
                    }

                    foreach (string LocaleKey in LocalesTable.Keys)
                    {
                        if (!string.IsNullOrEmpty(LocaleKey))
                        {
                            if (HrefLangsTable.ContainsKey(LocaleKey))
                            {
                                MacroscopeHrefLang HrefLangAlternate = HrefLangsTable[LocaleKey];
                                string             Value             = HrefLangAlternate.GetUrl();

                                ws.Cell(iRow, LocaleCols[LocaleKey]).Value = Value;

                                if (JobMaster.GetAllowedHosts().IsInternalUrl(Value))
                                {
                                    ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Green);
                                }
                                else
                                {
                                    ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Blue);
                                }
                            }
                            else
                            {
                                ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Red);
                                ws.Cell(iRow, LocaleCols[LocaleKey]).Value = "NOT SPECIFIED";
                            }
                        }
                    }

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax - 1);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateEtags(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            decimal CountOuter = 0;
            decimal CountInner = 0;
            decimal DocCount   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments());
            Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments());

            DocCount = ( decimal )DocCollection.CountDocuments();

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Etag = msDoc.GetEtag();

                if ((Etag != null) && (Etag.Length > 0))
                {
                    if (!DuplicatesDocList.ContainsKey(msDoc.GetUrl()))
                    {
                        DuplicatesDocList.Add(msDoc.GetUrl(), msDoc);
                    }

                    if (DuplicatesList.ContainsKey(Etag))
                    {
                        DuplicatesList[Etag] = DuplicatesList[Etag] + 1;
                    }
                    else
                    {
                        DuplicatesList.Add(Etag, 1);
                    }
                }
            }

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Occurrences");
                ws.WriteField("ETag");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (string Etag in DuplicatesList.Keys)
            {
                CountOuter++;
                CountInner = 0;

                if (DuplicatesList[Etag] > 1)
                {
                    foreach (MacroscopeDocument msDoc in  DuplicatesDocList.Values)
                    {
                        CountInner++;

                        if (DocCount > 0)
                        {
                            this.ProgressForm.UpdatePercentages(
                                Title: null,
                                Message: null,
                                MajorPercentage: -1,
                                ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                                MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                                ProgressLabelMinor: Etag,
                                SubMinorPercentage: (( decimal )100 / DocCount) * CountInner,
                                ProgressLabelSubMinor: msDoc.GetUrl()
                                );
                        }

                        if (msDoc.GetEtag() == Etag)
                        {
                            int            StatusCode  = ( int )msDoc.GetStatusCode();
                            HttpStatusCode Status      = msDoc.GetStatusCode();
                            int            Occurrences = DuplicatesList[Etag];

                            this.InsertAndFormatStatusCodeCell(ws, StatusCode);

                            this.InsertAndFormatStatusCodeCell(ws, Status);

                            this.InsertAndFormatContentCell(ws, Occurrences);

                            this.InsertAndFormatContentCell(ws, msDoc.GetEtag());

                            this.InsertAndFormatUrlCell(ws, msDoc);

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
        /**************************************************************************/

        private void BuildWorksheetSitemapsAudit(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel,
            MacroscopeDocumentList DocumentList,
            bool InOut
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "In Sitemap";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Is Redirect";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Robots";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Sitemap";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocumentList.IterateDocuments())
            {
                string Url        = null;
                string Robots     = null;
                string SitemapUrl = null;
                int    StatusCode;

                if (!msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
                {
                    continue;
                }

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                Url        = msDoc.GetUrl();
                StatusCode = (int)msDoc.GetStatusCode();
                Robots     = msDoc.GetAllowedByRobotsAsString();
                SitemapUrl = DocumentList.GetDocumentNote(msDoc: msDoc);

                iCol = 1;

                this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                if (msDoc.GetIsInternal())
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                }

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, InOut.ToString());

                if (InOut)
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                }

                iCol++;

                this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);

                iCol++;

                this.InsertAndFormatRedirectCell(ws, iRow, iCol, msDoc);

                iCol++;

                this.InsertAndFormatRobotsCell(ws, iRow, iCol, msDoc);

                iCol++;

                this.InsertAndFormatUrlCell(ws, iRow, iCol, SitemapUrl);

                if (AllowedHosts.IsInternalUrl(Url: SitemapUrl))
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                }

                iRow++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateChecksums(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            decimal CountOuter = 0;
            decimal CountInner = 0;
            decimal DocCount   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments());
            Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments());

            DocCount = ( decimal )DocCollection.CountDocuments();

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Checksum = msDoc.GetChecksum();

                if ((Checksum != null) && (Checksum.Length > 0))
                {
                    if (!DuplicatesDocList.ContainsKey(msDoc.GetUrl()))
                    {
                        DuplicatesDocList.Add(msDoc.GetUrl(), msDoc);
                    }

                    if (DuplicatesList.ContainsKey(Checksum))
                    {
                        DuplicatesList[Checksum] = DuplicatesList[Checksum] + 1;
                    }
                    else
                    {
                        DuplicatesList.Add(Checksum, 1);
                    }
                }
            }

            {
                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Checksum";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Checksum in DuplicatesList.Keys)
            {
                CountOuter++;
                CountInner = 0;

                if (DuplicatesList[Checksum] > 1)
                {
                    foreach (MacroscopeDocument msDoc in  DuplicatesDocList.Values)
                    {
                        CountInner++;

                        if (DocCount > 0)
                        {
                            this.ProgressForm.UpdatePercentages(
                                Title: null,
                                Message: null,
                                MajorPercentage: -1,
                                ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                                MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                                ProgressLabelMinor: Checksum,
                                SubMinorPercentage: (( decimal )100 / DocCount) * CountInner,
                                ProgressLabelSubMinor: msDoc.GetUrl()
                                );
                        }

                        if (msDoc.GetChecksum() == Checksum)
                        {
                            iCol = 1;

                            int            StatusCode  = ( int )msDoc.GetStatusCode();
                            HttpStatusCode Status      = msDoc.GetStatusCode();
                            int            Occurrences = DuplicatesList[Checksum];

                            this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode);
                            iCol++;

                            this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status);
                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences);
                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetChecksum());
                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                            iRow++;
                        }
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
Ejemplo n.º 30
0
        /**************************************************************************/

        private void BuildWorksheetPageLinks(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("URL");
                ws.WriteField("Link Type");
                ws.WriteField("Source URL");
                ws.WriteField("Target URL");
                ws.WriteField("Follow");
                ws.WriteField("Alt Text");
                ws.WriteField("Raw Source URL");
                ws.WriteField("Raw Target URL");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url);

                foreach (MacroscopeLink Link in msDoc.IterateOutlinks())
                {
                    string LinkType = Link.GetLinkType().ToString();

                    string SourceUrl = Link.GetSourceUrl();
                    string TargetUrl = Link.GetTargetUrl();

                    string AltText = Link.GetAltText();

                    string RawSourceUrl = Link.GetRawSourceUrl();
                    string RawTargetUrl = Link.GetRawTargetUrl();

                    string DoFollow = "No Follow";

                    if (Link.GetDoFollow())
                    {
                        DoFollow = "Follow";
                    }

                    if (string.IsNullOrEmpty(AltText))
                    {
                        AltText = "";
                    }

                    if (string.IsNullOrEmpty(RawSourceUrl))
                    {
                        RawSourceUrl = "";
                    }

                    if (string.IsNullOrEmpty(RawTargetUrl))
                    {
                        RawTargetUrl = "";
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(LinkType));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(SourceUrl));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(TargetUrl));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DoFollow));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(AltText));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(RawSourceUrl));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(RawTargetUrl));

                    ws.NextRecord();
                }
            }
        }