/**************************************************************************/

        public MacroscopeJobWorker(MacroscopeJobMaster JobMaster)
        {
            this.SuppressDebugMsg = true;

            this.JobMaster = JobMaster;

            this.DocCollection = this.JobMaster.GetDocCollection();

            this.AllowedHosts = this.JobMaster.GetAllowedHosts();

            this.IncludeExcludeUrls = this.JobMaster.GetIncludeExcludeUrls();

            if (MacroscopePreferencesManager.GetCrawlDelay() > 0)
            {
                this.CrawlDelay = MacroscopePreferencesManager.GetCrawlDelay();
            }

            if (MacroscopePreferencesManager.GetFollowRobotsProtocol())
            {
                if (this.JobMaster.GetCrawlDelay() > 0)
                {
                    this.CrawlDelay = this.JobMaster.GetCrawlDelay();
                }
            }
        }
示例#2
0
        /**************************************************************************/

        private void BuildWorksheetEmailAddresses(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Email Address");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
                {
                    Dictionary <string, string> EmailAddresses = msDoc.GetEmailAddresses();

                    foreach (string EmailAddress in EmailAddresses.Keys)
                    {
                        this.InsertAndFormatContentCell(ws, EmailAddress);

                        this.InsertAndFormatUrlCell(ws, msDoc);

                        ws.NextRecord();
                    }
                }
            }
        }
        /**************************************************************************/

        public bool Execute()
        {
            bool Success = false;
            MacroscopeAllowedHosts AllowedHosts = this.JobMaster.GetAllowedHosts();

            this.CleanseList();

            if (this.UrlList.Count > 0)
            {
                this.JobMaster.SetRunTimeMode(
                    JobRunTimeMode: MacroscopeConstants.RunTimeMode.LISTFILE
                    );

                for (int i = 0; i < this.UrlList.Count; i++)
                {
                    string Url = this.UrlList[i];
                    AllowedHosts.AddFromUrl(Url);
                    this.JobMaster.AddUrlQueueItem(Url);
                }

                Success = true;
            }

            return(Success);
        }
示例#4
0
        /**************************************************************************/

        private void BuildWorksheetXpaths(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField(MacroscopeConstants.Url);
                ws.WriteField(MacroscopeConstants.StatusCode);
                ws.WriteField(MacroscopeConstants.Status);
                ws.WriteField(MacroscopeConstants.ContentType);
                ws.WriteField("Extracted Label");
                ws.WriteField("Extracted Value");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc      = DocCollection.GetDocument(Url);
                string             DocUrl     = msDoc.GetUrl();
                string             StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string             Status     = msDoc.GetStatusCode().ToString();
                string             MimeType   = msDoc.GetMimeType();

                if (!this.DataExtractorXpaths.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedXpaths())
                {
                    string ExtractedLabel = DataExtractedPair.Key;
                    string ExtractedValue = DataExtractedPair.Value;

                    if (
                        string.IsNullOrEmpty(ExtractedLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatStatusCodeCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Status));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(MimeType));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ExtractedLabel));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ExtractedValue));

                    ws.NextRecord();
                }
            }
        }
        public void TestRemoveFromUrl()
        {
            MacroscopeAllowedHosts AllowedHosts = new MacroscopeAllowedHosts();
            List <string>          TestUrls     = new List <string> ();

            TestUrls.Add("https://nazuke.github.io/SEOMacroscope/");
            TestUrls.Add("https://bogus.bogus.com/some/path/index.html");
            TestUrls.Add("https://www.google.com/");

            foreach (string Url in TestUrls)
            {
                AllowedHosts.AddFromUrl(Url: Url);
            }

            Assert.AreEqual(
                TestUrls.Count,
                AllowedHosts.Count(),
                string.Format("FAIL: {0} :: {1}", TestUrls.Count, AllowedHosts.Count())
                );

            this.DebugMsg(TestUrls[1]);
            this.DebugMsg(AllowedHosts.Count().ToString());

            AllowedHosts.RemoveFromUrl(Url: TestUrls[1]);

            this.DebugMsg(AllowedHosts.Count().ToString());


            Assert.AreEqual(
                TestUrls.Count - 1,
                AllowedHosts.Count(),
                string.Format("FAIL: {0} :: {1}", TestUrls.Count - 1, AllowedHosts.Count())
                );
        }
        /**************************************************************************/

        private void BuildWorksheetTelephoneNumbers(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Telephone Number");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.GetIsHtml())
                {
                    Dictionary <string, string> TelephoneNumbers = msDoc.GetTelephoneNumbers();

                    foreach (string TelephoneNumber in TelephoneNumbers.Keys)
                    {
                        this.InsertAndFormatContentCell(ws, TelephoneNumber);

                        this.InsertAndFormatUrlCell(ws, msDoc);

                        ws.NextRecord();
                    }
                }
            }
        }
示例#7
0
        /**************************************************************************/

        private MacroscopeLink AddSitemapTextOutlink(
            string AbsoluteUrl,
            MacroscopeConstants.InOutLinkType LinkType,
            Boolean Follow
            )
        {
            MacroscopeLink OutLink = null;

            if (!MacroscopePreferencesManager.GetCheckExternalLinks())
            {
                MacroscopeAllowedHosts AllowedHosts = this.DocCollection.GetAllowedHosts();
                if (AllowedHosts != null)
                {
                    if (!AllowedHosts.IsAllowedFromUrl(Url: AbsoluteUrl))
                    {
                        return(OutLink);
                    }
                }
            }

            OutLink = new MacroscopeLink(
                SourceUrl: this.GetUrl(),
                TargetUrl: AbsoluteUrl,
                LinkType: LinkType,
                Follow: Follow
                );

            this.Outlinks.Add(OutLink);

            return(OutLink);
        }
示例#8
0
        /**************************************************************************/

        private void BuildWorksheetPageBrokenLinks(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Anchor Text");
                ws.WriteField("Alt Text");
                ws.WriteField("Origin URL");
                ws.WriteField("Destination URL");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(msDoc.GetUrl());
                int    StatusCode = ( int )msDoc.GetStatusCode();
                string Status     = msDoc.GetStatusCode().ToString();

                if (
                    (StatusCode >= 400) &&
                    (StatusCode <= 599) &&
                    (HyperlinksIn != null))
                {
                    foreach (MacroscopeHyperlinkIn HyperlinkIn in HyperlinksIn.IterateLinks())
                    {
                        string OriginUrl  = HyperlinkIn.GetSourceUrl();
                        string AnchorText = HyperlinkIn.GetAnchorText();
                        string AltText    = HyperlinkIn.GetAltText();

                        if (
                            (OriginUrl != null) &&
                            (OriginUrl.Length > 0))
                        {
                            this.InsertAndFormatContentCell(ws, StatusCode.ToString());

                            this.InsertAndFormatContentCell(ws, Status);

                            this.InsertAndFormatContentCell(ws, AnchorText);

                            this.InsertAndFormatContentCell(ws, AltText);

                            this.InsertAndFormatUrlCell(ws, OriginUrl);

                            this.InsertAndFormatUrlCell(ws, msDoc);

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
示例#9
0
        /**************************************************************************/

        private void BuildWorksheetBlockedByRobotsInternal(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.GetIsInternal() && (!msDoc.GetAllowedByRobots()))
                {
                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatRobotsCell(ws, iRow, iCol, msDoc);

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetSitemapErrors(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Sitemap URL");
                ws.WriteField("Status Code");
                ws.WriteField("Robots");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.GetIsInternal() && msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML))
                {
                    foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks())
                    {
                        string             TargetUrl   = Outlink.GetTargetUrl();
                        MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl);
                        bool InsertRow = false;

                        if (msDocLinked.GetIsInternal())
                        {
                            int StatusCode = (int)msDocLinked.GetStatusCode();
                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                InsertRow = true;
                            }
                            if (!msDocLinked.GetAllowedByRobots())
                            {
                                InsertRow = true;
                            }
                        }

                        if (InsertRow)
                        {
                            this.InsertAndFormatUrlCell(ws, msDoc);

                            this.InsertAndFormatStatusCodeCell(ws, msDoc);

                            this.InsertAndFormatRobotsCell(ws, msDoc);

                            this.InsertAndFormatUrlCell(ws, TargetUrl);

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
示例#11
0
        /**************************************************************************/

        private void BuildWorksheetPageRedirectsAudit(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Origin URL");
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Destination URL");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url);

                if (!msDoc.GetIsRedirect())
                {
                    continue;
                }

                string OriginURL      = msDoc.GetUrlRedirectFrom();
                string StatusCode     = (( int )msDoc.GetStatusCode()).ToString();
                string Status         = msDoc.GetStatusCode().ToString();
                string DestinationURL = msDoc.GetUrlRedirectTo();

                if (string.IsNullOrEmpty(OriginURL))
                {
                    continue;
                }

                if (string.IsNullOrEmpty(DestinationURL))
                {
                    continue;
                }

                this.InsertAndFormatUrlCell(ws, OriginURL);

                this.InsertAndFormatContentCell(ws, StatusCode);

                this.InsertAndFormatContentCell(ws, Status);

                this.InsertAndFormatUrlCell(ws, DestinationURL);

                ws.NextRecord();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetEmailAddresses(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Email Address";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.GetIsHtml())
                {
                    Dictionary <string, string> EmailAddresses = msDoc.GetEmailAddresses();

                    foreach (string EmailAddress in EmailAddresses.Keys)
                    {
                        iCol = 1;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, EmailAddress);

                        iCol++;

                        this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                        iRow++;
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetTelephoneNumbers(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Telephone Number";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
                {
                    Dictionary <string, string> TelephoneNumbers = msDoc.GetTelephoneNumbers();

                    foreach (string TelephoneNumber in TelephoneNumbers.Keys)
                    {
                        iCol = 1;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, TelephoneNumber);

                        iCol++;

                        this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                        iRow++;
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private MacroscopeLink AddSitemapXmlOutlink(
            string AbsoluteUrl,
            MacroscopeConstants.InOutLinkType LinkType,
            Boolean Follow
            )
        {
            MacroscopeLink OutLink = null;
            Boolean        Proceed = true;

            if (!MacroscopePreferencesManager.GetCheckExternalLinks())
            {
                MacroscopeAllowedHosts AllowedHosts = this.DocCollection.GetAllowedHosts();
                if (AllowedHosts != null)
                {
                    if (!AllowedHosts.IsAllowedFromUrl(Url: AbsoluteUrl))
                    {
                        Proceed = false;
                    }
                }
            }

            switch (LinkType)
            {
            case MacroscopeConstants.InOutLinkType.SITEMAPXML:
                if (!MacroscopePreferencesManager.GetFetchXml())
                {
                    Proceed = false;
                }
                break;
            }

            if (Proceed)
            {
                OutLink = new MacroscopeLink(
                    SourceUrl: this.GetUrl(),
                    TargetUrl: AbsoluteUrl,
                    LinkType: LinkType,
                    Follow: Follow
                    );

                this.Outlinks.Add(OutLink);
            }

            return(OutLink);
        }
示例#15
0
        /**************************************************************************/

        private void BuildWorksheetPageUriAnalysis(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("URL");
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Occurrences");
                ws.WriteField("Checksum");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url);

                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();
                string Checksum   = msDoc.GetChecksum();
                int    Count      = DocCollection.GetStatsChecksumCount(Checksum: Checksum);

                this.InsertAndFormatUrlCell(ws, msDoc);

                this.InsertAndFormatContentCell(ws, StatusCode);

                this.InsertAndFormatContentCell(ws, Status);

                this.InsertAndFormatContentCell(ws, Count.ToString());

                this.InsertAndFormatContentCell(ws, Checksum);

                ws.NextRecord();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageObservations(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("URL");
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Observation");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Url        = msDoc.GetUrl();
                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();

                foreach (KeyValuePair <string, string> RemarkPair in msDoc.IterateRemarks())
                {
                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, StatusCode);

                    this.InsertAndFormatContentCell(ws, Status);

                    this.InsertAndFormatContentCell(ws, RemarkPair.Value);

                    ws.NextRecord();
                }
            }
        }
示例#17
0
        /**************************************************************************/

        private void BuildWorksheetErrors(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument     msDoc        = DocCollection.GetDocument(Url);
                MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(Url);
                int    StatusCode = ( int )msDoc.GetStatusCode();
                string Status     = msDoc.GetStatusCode().ToString();

                if (
                    (StatusCode >= 400) &&
                    (StatusCode <= 599))
                {
                    this.InsertAndFormatContentCell(ws, StatusCode.ToString());

                    this.InsertAndFormatContentCell(ws, Status);

                    this.InsertAndFormatUrlCell(ws, Url);

                    ws.NextRecord();
                }
            }
        }
示例#18
0
        /**************************************************************************/

        private void BuildWorksheetPageObservations(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Observation";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Url = msDoc.GetUrl();

                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();

                foreach (KeyValuePair <string, string> RemarkPair in msDoc.IterateRemarks())
                {
                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (AllowedHosts.IsInternalUrl(Url: Url))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, Status);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, RemarkPair.Value);

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetKeywordTerms(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel,
            Dictionary <string, int> DicTerms
            )
        {
            var     ws        = wb.Worksheets.Add(WorksheetLabel);
            decimal TermTotal = DicTerms.Count;
            decimal TermCount = 0;

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Term";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Term in DicTerms.Keys)
            {
                MacroscopeDocumentList DocumentList = DocCollection.GetDeepKeywordAnalysDocumentList(Term);

                decimal DocTotal = ( decimal )DocumentList.CountDocuments();
                decimal DocCount = 0;
                TermCount++;

                if (TermTotal > 0)
                {
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: null,
                        MinorPercentage: (( decimal )100 / TermTotal) * TermCount,
                        ProgressLabelMinor: "Keywords Processed",
                        SubMinorPercentage: -1,
                        ProgressLabelSubMinor: null
                        );
                }

                foreach (MacroscopeDocument msDoc in DocumentList.IterateDocuments())
                {
                    DocCount++;

                    if (DocTotal > 0)
                    {
                        this.ProgressForm.UpdatePercentages(
                            Title: null,
                            Message: null,
                            MajorPercentage: -1,
                            ProgressLabelMajor: null,
                            MinorPercentage: -1,
                            ProgressLabelMinor: null,
                            SubMinorPercentage: (( decimal )100 / DocTotal) * DocCount,
                            ProgressLabelSubMinor: "Documents Processed"
                            );
                    }

                    iCol = 1;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DicTerms[Term].ToString()));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Term));

                    iCol++;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc.GetUrl());

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateEtags(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            decimal CountOuter = 0;
            decimal CountInner = 0;
            decimal DocCount   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments());
            Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments());

            DocCount = ( decimal )DocCollection.CountDocuments();

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Etag = msDoc.GetEtag();

                if ((Etag != null) && (Etag.Length > 0))
                {
                    if (!DuplicatesDocList.ContainsKey(msDoc.GetUrl()))
                    {
                        DuplicatesDocList.Add(msDoc.GetUrl(), msDoc);
                    }

                    if (DuplicatesList.ContainsKey(Etag))
                    {
                        DuplicatesList[Etag] = DuplicatesList[Etag] + 1;
                    }
                    else
                    {
                        DuplicatesList.Add(Etag, 1);
                    }
                }
            }

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Occurrences");
                ws.WriteField("ETag");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (string Etag in DuplicatesList.Keys)
            {
                CountOuter++;
                CountInner = 0;

                if (DuplicatesList[Etag] > 1)
                {
                    foreach (MacroscopeDocument msDoc in  DuplicatesDocList.Values)
                    {
                        CountInner++;

                        if (DocCount > 0)
                        {
                            this.ProgressForm.UpdatePercentages(
                                Title: null,
                                Message: null,
                                MajorPercentage: -1,
                                ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                                MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                                ProgressLabelMinor: Etag,
                                SubMinorPercentage: (( decimal )100 / DocCount) * CountInner,
                                ProgressLabelSubMinor: msDoc.GetUrl()
                                );
                        }

                        if (msDoc.GetEtag() == Etag)
                        {
                            int            StatusCode  = ( int )msDoc.GetStatusCode();
                            HttpStatusCode Status      = msDoc.GetStatusCode();
                            int            Occurrences = DuplicatesList[Etag];

                            this.InsertAndFormatStatusCodeCell(ws, StatusCode);

                            this.InsertAndFormatStatusCodeCell(ws, Status);

                            this.InsertAndFormatContentCell(ws, Occurrences);

                            this.InsertAndFormatContentCell(ws, msDoc.GetEtag());

                            this.InsertAndFormatUrlCell(ws, msDoc);

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
示例#21
0
        /**************************************************************************/


        private void RenderListViewSearchTargetUrls(
            List <ListViewItem> ListViewItems,
            MacroscopeDocument msDoc,
            string Url,
            string UrlFragment
            )
        {
            MacroscopeAllowedHosts       AllowedHosts  = this.MainForm.GetJobMaster().GetAllowedHosts();
            MacroscopeHyperlinksOut      HyperlinksOut = msDoc.GetHyperlinksOut();
            MacroscopeDocumentCollection DocCollection = this.MainForm.GetJobMaster().GetDocCollection();

            foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks())
            {
                string         UrlTarget      = HyperlinkOut.GetTargetUrl();
                HttpStatusCode StatusCode     = HttpStatusCode.NotFound;
                string         StatusCodeText = "Not crawled";
                string         StatusText     = "Not crawled";
                string         PairKey        = string.Join(":", UrlToDigest(Url: Url), UrlToDigest(Url: UrlTarget)).ToString();
                string         LinkTarget     = HyperlinkOut.GetLinkTarget();
                string         LinkText       = HyperlinkOut.GetAnchorText();
                string         LinkTitle      = HyperlinkOut.GetTitle();
                string         AltText        = HyperlinkOut.GetAltText();

                string LinkTextLabel  = LinkText;
                string LinkTitleLabel = LinkTitle;
                string AltTextLabel   = AltText;

                string DoFollow = "No Follow";

                try
                {
                    if (DocCollection.ContainsDocument(Url: HyperlinkOut.GetTargetUrl()))
                    {
                        StatusCode     = DocCollection.GetDocumentByUrl(Url: HyperlinkOut.GetTargetUrl()).GetStatusCode();
                        StatusCodeText = ((int)StatusCode).ToString();
                        StatusText     = StatusCode.ToString();
                    }
                    else
                    {
                        DebugMsg("Not in DocCollection");
                    }
                }
                catch (Exception ex)
                {
                    this.DebugMsg(ex.Message);
                }

                if (HyperlinkOut.GetDoFollow())
                {
                    DoFollow = "Follow";
                }

                if (LinkText.Length == 0)
                {
                    LinkTextLabel = "MISSING";
                }

                if (LinkTitle.Length == 0)
                {
                    LinkTitleLabel = "MISSING";
                }

                if (AltText.Length == 0)
                {
                    AltTextLabel = "MISSING";
                }

                if (
                    (UrlTarget != null) &&
                    (UrlTarget.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0))
                {
                    ListViewItem lvItem = null;

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        try
                        {
                            lvItem = this.DisplayListView.Items[PairKey];

                            lvItem.SubItems[ColUrl].Text                 = Url;
                            lvItem.SubItems[ColUrlTarget].Text           = UrlTarget;
                            lvItem.SubItems[ColStatusCode].Text          = StatusCodeText;
                            lvItem.SubItems[ColStatus].Text              = StatusText;
                            lvItem.SubItems[ColDoFollow].Text            = DoFollow;
                            lvItem.SubItems[ColLinkTarget].Text          = LinkTarget;
                            lvItem.SubItems[ColLinkAnchorTextLabel].Text = LinkTextLabel;
                            lvItem.SubItems[ColLinkTitleLabel].Text      = LinkTitleLabel;
                            lvItem.SubItems[ColAltTextLabel].Text        = AltTextLabel;
                        }
                        catch (Exception ex)
                        {
                            this.DebugMsg(string.Format("MacroscopeDisplayLinks 1: {0}", ex.Message));
                        }
                    }
                    else
                    {
                        try
                        {
                            lvItem = new ListViewItem(PairKey);
                            lvItem.UseItemStyleForSubItems = false;
                            lvItem.Name = PairKey;

                            lvItem.SubItems[ColUrl].Text = Url;
                            lvItem.SubItems.Add(UrlTarget);
                            lvItem.SubItems.Add(StatusCodeText);
                            lvItem.SubItems.Add(StatusText);
                            lvItem.SubItems.Add(DoFollow);
                            lvItem.SubItems.Add(LinkTarget);
                            lvItem.SubItems.Add(LinkTextLabel);
                            lvItem.SubItems.Add(LinkTitleLabel);
                            lvItem.SubItems.Add(AltTextLabel);

                            ListViewItems.Add(lvItem);
                        }
                        catch (Exception ex)
                        {
                            this.DebugMsg(string.Format("MacroscopeDisplayLinks 2: {0}", ex.Message));
                        }
                    }

                    if (lvItem != null)
                    {
                        for (int i = 0; i < lvItem.SubItems.Count; i++)
                        {
                            lvItem.SubItems[i].ForeColor = Color.Blue;
                        }

                        if (AllowedHosts.IsAllowedFromUrl(Url))
                        {
                            lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                        }

                        if (AllowedHosts.IsAllowedFromUrl(UrlTarget))
                        {
                            lvItem.SubItems[ColUrlTarget].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[ColUrlTarget].ForeColor = Color.Gray;
                        }

                        if (AllowedHosts.IsAllowedFromUrl(Url))
                        {
                            if (HyperlinkOut.GetDoFollow())
                            {
                                lvItem.SubItems[ColDoFollow].ForeColor = Color.Green;
                            }
                            else
                            {
                                lvItem.SubItems[ColDoFollow].ForeColor = Color.Red;
                            }
                        }
                        else
                        {
                            lvItem.SubItems[ColDoFollow].ForeColor = Color.Gray;
                        }

                        if (LinkText.Length == 0)
                        {
                            lvItem.SubItems[ColLinkAnchorTextLabel].ForeColor = Color.Gray;
                        }

                        if (LinkTitle.Length == 0)
                        {
                            lvItem.SubItems[ColLinkTitleLabel].ForeColor = Color.Gray;
                        }

                        if (AltText.Length == 0)
                        {
                            lvItem.SubItems[ColAltTextLabel].ForeColor = Color.Gray;
                        }

                        if (
                            (LinkText.Length == 0) &&
                            (LinkTitle.Length == 0) &&
                            (AltText.Length == 0))
                        {
                            lvItem.SubItems[ColLinkAnchorTextLabel].ForeColor = Color.Red;
                            lvItem.SubItems[ColLinkTitleLabel].ForeColor      = Color.Red;
                            lvItem.SubItems[ColAltTextLabel].ForeColor        = Color.Red;
                        }
                    }
                }
            }
        }
示例#22
0
        /**************************************************************************/

        private void BuildWorksheetPageRedirectChains(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();
            List <List <MacroscopeRedirectChainDocStruct> > RedirectChains = DocCollection.GetMacroscopeRedirectChains();

            {
                ws.Cell(iRow, iCol).Value = "Hop";
                iCol++;
                ws.Cell(iRow, iCol).Value = "Status";
            }

            iRow++;

            foreach (List <MacroscopeRedirectChainDocStruct> DocList in RedirectChains)
            {
                int iHop = 1;

                iCol = 1;

                foreach (MacroscopeRedirectChainDocStruct RedirectChainDocStruct in DocList)
                {
                    string Url        = RedirectChainDocStruct.Url;
                    string StatusCode = RedirectChainDocStruct.StatusCode.ToString();

                    ws.Cell(1, iCol).Value = string.Format("Hop {0} URL", iHop);
                    this.InsertAndFormatUrlCell(ws, iRow, iCol, Url);
                    iCol++;

                    if (AllowedHosts.IsInternalUrl(Url: Url))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    ws.Cell(1, iCol).Value = string.Format("Hop {0} Status", iHop);
                    this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode);
                    iCol++;

                    iHop++;
                }

                if (iCol > iColMax)
                {
                    iColMax = iCol;
                }

                iRow++;
            }

            if ((iRow > 1) && (iColMax > 2))
            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax - 1);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#23
0
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            if (msDoc.GetIsRedirect())
            {
                return;
            }

            if (msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
            {
                MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();
                string         Canonical            = msDoc.GetCanonical();
                HttpStatusCode StatusCode           = msDoc.GetStatusCode();
                string         CanonicalLabel       = Canonical;
                ListViewItem   lvItem  = null;
                string         PairKey = UrlToDigest(Url: Url).ToString();

                if (Canonical.Length == 0)
                {
                    CanonicalLabel = "MISSING";
                }

                if (DisplayListView.Items.ContainsKey(PairKey))
                {
                    try
                    {
                        lvItem = DisplayListView.Items[PairKey];
                        lvItem.SubItems[0].Text = Url;
                        lvItem.SubItems[1].Text = StatusCode.ToString();
                        lvItem.SubItems[2].Text = CanonicalLabel;
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayCanonical 1: {0}", ex.Message));
                    }
                }
                else
                {
                    try
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        lvItem.SubItems[0].Text = Url;
                        lvItem.SubItems.Add(StatusCode.ToString());
                        lvItem.SubItems.Add(CanonicalLabel);

                        DisplayListView.Items.Add(lvItem);
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayCanonical 2: {0}", ex.Message));
                    }
                }

                if (lvItem != null)
                {
                    lvItem.ForeColor = Color.Gray;

                    if (AllowedHosts.IsInternalUrl(Url))
                    {
                        lvItem.SubItems[0].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[0].ForeColor = Color.Gray;
                    }

                    if ((( int )StatusCode >= 100) && (( int )StatusCode <= 299))
                    {
                        lvItem.SubItems[1].ForeColor = Color.Green;
                    }
                    else
                    if ((( int )StatusCode >= 300) && (( int )StatusCode <= 399))
                    {
                        lvItem.SubItems[1].ForeColor = Color.Orange;
                    }
                    else
                    if ((( int )StatusCode >= 400) && (( int )StatusCode <= 599))
                    {
                        lvItem.SubItems[1].ForeColor = Color.Red;
                    }
                    else
                    {
                        lvItem.SubItems[2].ForeColor = Color.Gray;
                    }

                    if (Canonical.Length == 0)
                    {
                        if (AllowedHosts.IsInternalUrl(Url))
                        {
                            lvItem.SubItems[2].ForeColor = Color.Red;
                        }
                        else
                        {
                            lvItem.SubItems[2].ForeColor = Color.Gray;
                        }
                    }
                    else
                    {
                        if (AllowedHosts.IsInternalUrl(Canonical))
                        {
                            lvItem.SubItems[2].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[2].ForeColor = Color.Red;
                        }
                    }
                }
            }
        }
示例#24
0
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeCustomFilters CustomFilter
            )
        {
            if (this.FilterColOffset == -1)
            {
                throw (new Exception("this.FilterColOffset invalid"));
            }

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            MacroscopeAllowedHosts   AllowedHosts    = this.MainForm.GetJobMaster().GetAllowedHosts();
            Dictionary <string, int> FilterColsTable = new Dictionary <string, int>(CustomFilter.GetSize());
            List <ListViewItem>      ListViewItems   = new List <ListViewItem>();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)DocCollection.CountDocuments();
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
            {
                string FilterPattern = CustomFilter.GetPattern(Slot).Key;

                if (FilterColsTable.ContainsKey(FilterPattern))
                {
                    FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + 1);
                }
                else
                {
                    FilterColsTable.Add(FilterPattern, Slot + 1);
                }
            }

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc  = DocCollection.GetDocumentByUrl(Url: Url);
                ListViewItem       lvItem = null;
                string             DocUrl;
                string             PairKey;
                string             StatusCode;
                string             Status;
                string             MimeType;

                if (msDoc == null)
                {
                    continue;
                }
                else
                {
                    DocUrl     = msDoc.GetUrl();
                    PairKey    = DocUrl;
                    StatusCode = ((int)msDoc.GetStatusCode()).ToString();
                    Status     = msDoc.GetStatusCode().ToString();
                    MimeType   = msDoc.GetMimeType();
                }

                if (!CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc))
                {
                    continue;
                }

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    lvItem = this.DisplayListView.Items[PairKey];
                }
                else
                {
                    lvItem = new ListViewItem(PairKey);
                    lvItem.UseItemStyleForSubItems = false;
                    lvItem.Name = PairKey;

                    lvItem.SubItems.Add("");
                    lvItem.SubItems.Add("");
                    lvItem.SubItems.Add("");
                    lvItem.SubItems.Add("");

                    for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
                    {
                        lvItem.SubItems.Add("");
                    }

                    ListViewItems.Add(lvItem);
                }

                if (lvItem != null)
                {
                    try
                    {
                        lvItem.SubItems[ColUrl].Text        = DocUrl;
                        lvItem.SubItems[ColStatusCode].Text = StatusCode;
                        lvItem.SubItems[ColStatus].Text     = Status;
                        lvItem.SubItems[ColMimeType].Text   = MimeType;

                        for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
                        {
                            string FilterPattern = CustomFilter.GetPattern(Slot: Slot).Key;
                            KeyValuePair <string, MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern);
                            int ColOffset = this.FilterColOffset + FilterColsTable[FilterPattern];

                            if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED))
                            {
                                lvItem.SubItems[ColOffset].Text = MacroscopeConstants.TextPresenceLabels[Pair.Value];

                                switch (Pair.Value)
                                {
                                case MacroscopeConstants.TextPresence.CONTAINS_STRING:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.NOT_CONTAINS_STRING:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.MUST_CONTAIN_STRING:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_STRING:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                case MacroscopeConstants.TextPresence.CONTAINS_REGEX:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.NOT_CONTAINS_REGEX:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.MUST_CONTAIN_REGEX:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_REGEX:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                default:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Gray;
                                    break;
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.Message));
                        DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.StackTrace));
                    }
                }
                else
                {
                    DebugMsg(string.Format("MacroscopeDisplayCustomFilters MISSING: {0}", PairKey));
                }

                if (msDoc.GetIsInternal())
                {
                    lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                }
                else
                {
                    lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                }

                if (Regex.IsMatch(StatusCode, "^[2]"))
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Green;
                }
                else
                if (Regex.IsMatch(StatusCode, "^[3]"))
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Goldenrod;
                }
                else
                if (Regex.IsMatch(StatusCode, "^[45]"))
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Red;
                }
                else
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Blue;
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 100;
            this.DisplayListView.Columns[ColStatus].Width     = 100;
            this.DisplayListView.Columns[ColMimeType].Width   = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
示例#25
0
        /**************************************************************************/

        private void BuildWorksheetPageLinks(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Link Type";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Source URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Target URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Follow";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Alt Text";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Raw Source URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Raw Target URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                foreach (MacroscopeLink Link in msDoc.IterateOutlinks())
                {
                    string LinkType = Link.GetLinkType().ToString();

                    string SourceUrl = Link.GetSourceUrl();
                    string TargetUrl = Link.GetTargetUrl();

                    string AltText = Link.GetAltText();

                    string RawSourceUrl = Link.GetRawSourceUrl();
                    string RawTargetUrl = Link.GetRawTargetUrl();

                    string DoFollow = "No Follow";

                    if (Link.GetDoFollow())
                    {
                        DoFollow = "Follow";
                    }

                    if (string.IsNullOrEmpty(AltText))
                    {
                        AltText = "";
                    }

                    if (string.IsNullOrEmpty(RawSourceUrl))
                    {
                        RawSourceUrl = "";
                    }

                    if (string.IsNullOrEmpty(RawTargetUrl))
                    {
                        RawTargetUrl = "";
                    }

                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(LinkType));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(SourceUrl));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(TargetUrl));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DoFollow));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AltText));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(RawSourceUrl));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(RawTargetUrl));

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#26
0
        /**************************************************************************/

        private void BuildWorksheetPageRedirectedLinks(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Origin URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Destination URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument     msDoc        = DocCollection.GetDocument(Url);
                MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(Url);
                int    StatusCode = ( int )msDoc.GetStatusCode();
                string Status     = msDoc.GetStatusCode().ToString();

                if (
                    (StatusCode >= 300) &&
                    (StatusCode <= 399) &&
                    (HyperlinksIn != null))
                {
                    foreach (MacroscopeHyperlinkIn HyperlinkIn in HyperlinksIn.IterateLinks())
                    {
                        string OriginUrl = HyperlinkIn.GetSourceUrl();

                        if (
                            (OriginUrl != null) &&
                            (OriginUrl.Length > 0))
                        {
                            iCol = 1;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode.ToString());

                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                            }

                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, Status);

                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                            }

                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, OriginUrl);

                            if (AllowedHosts.IsInternalUrl(Url: OriginUrl))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                            if (AllowedHosts.IsInternalUrl(Url: Url))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iRow++;
                        }
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateChecksums(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            decimal CountOuter = 0;
            decimal CountInner = 0;
            decimal DocCount   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments());
            Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments());

            DocCount = ( decimal )DocCollection.CountDocuments();

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Checksum = msDoc.GetChecksum();

                if ((Checksum != null) && (Checksum.Length > 0))
                {
                    if (!DuplicatesDocList.ContainsKey(msDoc.GetUrl()))
                    {
                        DuplicatesDocList.Add(msDoc.GetUrl(), msDoc);
                    }

                    if (DuplicatesList.ContainsKey(Checksum))
                    {
                        DuplicatesList[Checksum] = DuplicatesList[Checksum] + 1;
                    }
                    else
                    {
                        DuplicatesList.Add(Checksum, 1);
                    }
                }
            }

            {
                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Checksum";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Checksum in DuplicatesList.Keys)
            {
                CountOuter++;
                CountInner = 0;

                if (DuplicatesList[Checksum] > 1)
                {
                    foreach (MacroscopeDocument msDoc in  DuplicatesDocList.Values)
                    {
                        CountInner++;

                        if (DocCount > 0)
                        {
                            this.ProgressForm.UpdatePercentages(
                                Title: null,
                                Message: null,
                                MajorPercentage: -1,
                                ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                                MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                                ProgressLabelMinor: Checksum,
                                SubMinorPercentage: (( decimal )100 / DocCount) * CountInner,
                                ProgressLabelSubMinor: msDoc.GetUrl()
                                );
                        }

                        if (msDoc.GetChecksum() == Checksum)
                        {
                            iCol = 1;

                            int            StatusCode  = ( int )msDoc.GetStatusCode();
                            HttpStatusCode Status      = msDoc.GetStatusCode();
                            int            Occurrences = DuplicatesList[Checksum];

                            this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode);
                            iCol++;

                            this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status);
                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences);
                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetChecksum());
                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                            iRow++;
                        }
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateTitles(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            decimal Count    = 0;
            decimal DocCount = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (DocCount > 0)
                {
                    Count++;
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", Count),
                        MinorPercentage: (( decimal )100 / DocCount) * Count,
                        ProgressLabelMinor: msDoc.GetUrl(),
                        SubMinorPercentage: -1,
                        ProgressLabelSubMinor: null
                        );
                }

                if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                {
                    switch (msDoc.GetDocumentType())
                    {
                    case MacroscopeConstants.DocumentType.HTML:
                        Proceed = true;
                        break;

                    case MacroscopeConstants.DocumentType.PDF:
                        Proceed = true;
                        break;

                    default:
                        Proceed = false;
                        break;
                    }
                }

                if (Proceed)
                {
                    string Title       = msDoc.GetTitle();
                    int    Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc);

                    if (Occurrences > 1)
                    {
                        iCol = 1;

                        this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                        if (msDoc.GetIsInternal())
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                        }
                        else
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                        }

                        iCol++;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences);

                        if (Occurrences > 1)
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange);
                        }
                        else
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                        }

                        iCol++;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Title));

                        iRow++;
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#29
0
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeDataExtractorCssSelectors DataExtractor
            )
        {
            MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem> ();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )DocCollection.CountDocuments();
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc      = DocCollection.GetDocumentByUrl(Url: Url);
                string             DocUrl     = msDoc.GetUrl();
                string             StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string             Status     = msDoc.GetStatusCode().ToString();
                string             MimeType   = msDoc.GetMimeType();

                if (!DataExtractor.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedCssSelectors())
                {
                    ListViewItem lvItem           = null;
                    string       CssSelectorLabel = DataExtractedPair.Key;
                    string       ExtractedValue   = DataExtractedPair.Value;
                    string       PairKey          = null;

                    if (
                        string.IsNullOrEmpty(CssSelectorLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    PairKey = string.Join(
                        ":",
                        UrlToDigest(DocUrl),
                        UrlToDigest(Macroscope.GetStringDigest(Text: CssSelectorLabel)),
                        UrlToDigest(Macroscope.GetStringDigest(Text: ExtractedValue))
                        );

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                    }
                    else
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        for (int i = 0; i < 6; i++)
                        {
                            lvItem.SubItems.Add("");
                        }

                        ListViewItems.Add(lvItem);
                    }

                    if (lvItem != null)
                    {
                        try
                        {
                            lvItem.SubItems[ColUrl].Text              = DocUrl;
                            lvItem.SubItems[ColStatusCode].Text       = StatusCode;
                            lvItem.SubItems[ColStatus].Text           = Status;
                            lvItem.SubItems[ColMimeType].Text         = MimeType;
                            lvItem.SubItems[ColCssSelectorLabel].Text = CssSelectorLabel;
                            lvItem.SubItems[ColExtractedValue].Text   = ExtractedValue;
                        }
                        catch (Exception ex)
                        {
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors: {0}", ex.Message));
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors: {0}", ex.StackTrace));
                        }
                    }
                    else
                    {
                        DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors MISSING: {0}", PairKey));
                    }

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                    }

                    if (Regex.IsMatch(StatusCode, "^[2]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Green;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[3]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Goldenrod;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[45]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Red;
                    }
                    else
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Blue;
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 100;
            this.DisplayListView.Columns[ColStatus].Width     = 100;
            this.DisplayListView.Columns[ColMimeType].Width   = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
示例#30
0
        /**************************************************************************/

        private void RenderListView(MacroscopeJobItem [] UriQueue)
        {
            if (UriQueue.Length == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem> (1);

            MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();
            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            int     Item            = 1;
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )UriQueue.Length;
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing URI Queue for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("URL {0} / {1}", Count, TotalDocs)
                    );
            }

            for (int i = 0; i < UriQueue.Length; i++)
            {
                ListViewItem lvItem = null;
                string       Url    = UriQueue[i].GetItemUrl();

                if (this.DisplayListView.Items.ContainsKey(Url))
                {
                    try
                    {
                        lvItem = this.DisplayListView.Items[Url];
                        lvItem.SubItems[0].Text = Item.ToString();
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("RenderListView 1: {0}", ex.Message));
                    }
                }
                else
                {
                    try
                    {
                        lvItem = new ListViewItem(Url);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name             = Url;
                        lvItem.SubItems[0].Text = Item.ToString();
                        lvItem.SubItems.Add(Url);

                        ListViewItems.Add(lvItem);
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("RenderListView 2: {0}", ex.Message));
                    }
                }

                if (lvItem != null)
                {
                    lvItem.ForeColor = Color.Blue;

                    lvItem.SubItems[0].ForeColor = Color.Blue;

                    if (AllowedHosts.IsInternalUrl(Url))
                    {
                        lvItem.SubItems[1].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[1].ForeColor = Color.Gray;
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    TotalDocs       = ( decimal )UriQueue.Length;
                    MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("URL {0} / {1}", Count, TotalDocs)
                        );
                }

                Item++;
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }