示例#1
0
        /**************************************************************************/

        private void BuildWorksheetXpaths(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField(MacroscopeConstants.Url);
                ws.WriteField(MacroscopeConstants.StatusCode);
                ws.WriteField(MacroscopeConstants.Status);
                ws.WriteField(MacroscopeConstants.ContentType);
                ws.WriteField("Extracted Label");
                ws.WriteField("Extracted Value");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc      = DocCollection.GetDocument(Url);
                string             DocUrl     = msDoc.GetUrl();
                string             StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string             Status     = msDoc.GetStatusCode().ToString();
                string             MimeType   = msDoc.GetMimeType();

                if (!this.DataExtractorXpaths.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedXpaths())
                {
                    string ExtractedLabel = DataExtractedPair.Key;
                    string ExtractedValue = DataExtractedPair.Value;

                    if (
                        string.IsNullOrEmpty(ExtractedLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatStatusCodeCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Status));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(MimeType));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ExtractedLabel));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ExtractedValue));

                    ws.NextRecord();
                }
            }
        }
示例#2
0
        /**************************************************************************/

        private void BuildWorksheetPageRedirectsAudit(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Origin URL");
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Destination URL");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url);

                if (!msDoc.GetIsRedirect())
                {
                    continue;
                }

                string OriginURL      = msDoc.GetUrlRedirectFrom();
                string StatusCode     = (( int )msDoc.GetStatusCode()).ToString();
                string Status         = msDoc.GetStatusCode().ToString();
                string DestinationURL = msDoc.GetUrlRedirectTo();

                if (string.IsNullOrEmpty(OriginURL))
                {
                    continue;
                }

                if (string.IsNullOrEmpty(DestinationURL))
                {
                    continue;
                }

                this.InsertAndFormatUrlCell(ws, OriginURL);

                this.InsertAndFormatContentCell(ws, StatusCode);

                this.InsertAndFormatContentCell(ws, Status);

                this.InsertAndFormatUrlCell(ws, DestinationURL);

                ws.NextRecord();
            }
        }
示例#3
0
        /**************************************************************************/

        private void BuildWorksheetPageGoodLinks(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Origin URL");
                ws.WriteField("Destination URL");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument     msDoc        = DocCollection.GetDocument(Url);
                MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(Url);
                int    StatusCode = ( int )msDoc.GetStatusCode();
                string Status     = msDoc.GetStatusCode().ToString();

                if (
                    (StatusCode >= 200) &&
                    (StatusCode <= 299) &&
                    (HyperlinksIn != null))
                {
                    foreach (MacroscopeHyperlinkIn HyperlinkIn in HyperlinksIn.IterateLinks())
                    {
                        string OriginUrl = HyperlinkIn.GetSourceUrl();

                        if (
                            (OriginUrl != null) &&
                            (OriginUrl.Length > 0))
                        {
                            this.InsertAndFormatContentCell(ws, StatusCode.ToString());

                            this.InsertAndFormatContentCell(ws, Status);

                            this.InsertAndFormatUrlCell(ws, OriginUrl);

                            this.InsertAndFormatUrlCell(ws, msDoc);

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageUriAnalysis(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("URL");
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Occurrences");
                ws.WriteField("Checksum");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url);

                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();
                string Checksum   = msDoc.GetChecksum();
                int    Count      = DocCollection.GetStatsChecksumCount(Checksum: Checksum);

                this.InsertAndFormatUrlCell(ws, msDoc);

                this.InsertAndFormatContentCell(ws, StatusCode);

                this.InsertAndFormatContentCell(ws, Status);

                this.InsertAndFormatContentCell(ws, Count.ToString());

                this.InsertAndFormatContentCell(ws, Checksum);

                ws.NextRecord();
            }
        }
示例#5
0
        /**************************************************************************/

        private void BuildWorksheetErrors(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument     msDoc        = DocCollection.GetDocument(Url);
                MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(Url);
                int    StatusCode = ( int )msDoc.GetStatusCode();
                string Status     = msDoc.GetStatusCode().ToString();

                if (
                    (StatusCode >= 400) &&
                    (StatusCode <= 599))
                {
                    this.InsertAndFormatContentCell(ws, StatusCode.ToString());

                    this.InsertAndFormatContentCell(ws, Status);

                    this.InsertAndFormatUrlCell(ws, Url);

                    ws.NextRecord();
                }
            }
        }
示例#6
0
        /**************************************************************************/

        private void BuildWorksheetPageText(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Page Locale";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Page Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Detected Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Word Count";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Readability Method";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Readability Grade";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Readability Grade Description";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                if (msDoc.GetIsHtml())
                {
                    Proceed = true;
                }
                else
                if (msDoc.GetIsPdf())
                {
                    Proceed = true;
                }

                if (Proceed)
                {
                    iCol = 1;

                    string PageLocale           = msDoc.GetLocale();
                    string PageLanguage         = msDoc.GetIsoLanguageCode();
                    string DetectedLanguage     = msDoc.GetDocumentTextLanguage();
                    int    WordCount            = msDoc.GetWordCount();
                    string ReadabilityGradeType = MacroscopeAnalyzeReadability.FormatAnalyzeReadabilityMethod(
                        ReadabilityMethod: msDoc.GetReadabilityGradeMethod()
                        );
                    string ReadabilityGrade            = msDoc.GetReadabilityGrade().ToString("00.00");
                    string ReadabilityGradeDescription = msDoc.GetReadabilityGradeDescription();

                    if (string.IsNullOrEmpty(PageLocale))
                    {
                        PageLocale = "";
                    }

                    if (string.IsNullOrEmpty(PageLanguage))
                    {
                        PageLanguage = "";
                    }

                    if (string.IsNullOrEmpty(DetectedLanguage))
                    {
                        DetectedLanguage = "";
                    }

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (msDoc.GetIsInternal())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLocale));

                    if (msDoc.GetIsInternal())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLanguage));

                    if (PageLanguage != DetectedLanguage)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DetectedLanguage));

                    if (PageLanguage != DetectedLanguage)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, WordCount);

                    if (msDoc.GetIsInternal())
                    {
                        if (WordCount > 0)
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                        }
                        else
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                        }
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, ReadabilityGradeType);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, ReadabilityGrade);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, ReadabilityGradeDescription);

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#7
0
        /**************************************************************************/

        private void BuildWorksheetPageKeywords(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.WriteField("URL");
                ws.WriteField("Occurrences");
                ws.WriteField("Keywords");
                ws.WriteField("Keywords Length");
                ws.WriteField("Number of Keywords");
                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                if (msDoc.GetIsHtml())
                {
                    Proceed = true;
                }
                else
                if (msDoc.GetIsPdf())
                {
                    Proceed = true;
                }

                if (Proceed)
                {
                    string Keywords       = msDoc.GetKeywords();
                    int    Occurrences    = 0;
                    int    KeywordsLength = msDoc.GetKeywordsLength();
                    int    KeywordsNumber = msDoc.GetKeywordsCount();

                    if (KeywordsLength > 0)
                    {
                        Occurrences = DocCollection.GetStatsKeywordsCount(msDoc);
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Occurrences.ToString()));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Keywords));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(KeywordsLength.ToString()));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(KeywordsNumber.ToString()));

                    ws.NextRecord();
                }
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageRedirectsAudit(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Origin URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Destination URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url);

                if (!msDoc.GetIsRedirect())
                {
                    continue;
                }

                string OriginURL      = msDoc.GetUrlRedirectFrom();
                string StatusCode     = (( int )msDoc.GetStatusCode()).ToString();
                string Status         = msDoc.GetStatusCode().ToString();
                string DestinationURL = msDoc.GetUrlRedirectTo();

                if (OriginURL == null)
                {
                    continue;
                }

                if (DestinationURL == null)
                {
                    continue;
                }

                iCol = 1;

                this.InsertAndFormatUrlCell(ws, iRow, iCol, OriginURL);

                if (AllowedHosts.IsInternalUrl(Url: OriginURL))
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                }

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode);

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, Status);

                iCol++;

                this.InsertAndFormatUrlCell(ws, iRow, iCol, DestinationURL);

                if (AllowedHosts.IsInternalUrl(Url: DestinationURL))
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                }

                iRow++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeCustomFilters CustomFilter
            )
        {
            if (this.FilterColOffset == -1)
            {
                throw(new Exception("this.FilterColOffset invalid"));
            }

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            MacroscopeAllowedHosts   AllowedHosts    = this.MainForm.GetJobMaster().GetAllowedHosts();
            Dictionary <string, int> FilterColsTable = new Dictionary <string, int> (CustomFilter.GetSize());
            List <ListViewItem>      ListViewItems   = new List <ListViewItem> ();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )DocCollection.CountDocuments();
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
            {
                string FilterPattern = CustomFilter.GetPattern(Slot).Key;

                if (FilterColsTable.ContainsKey(FilterPattern))
                {
                    FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + 1);
                }
                else
                {
                    FilterColsTable.Add(FilterPattern, Slot + 1);
                }
            }

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc  = DocCollection.GetDocument(Url: Url);
                ListViewItem       lvItem = null;
                string             DocUrl;
                string             PairKey;
                string             StatusCode;
                string             Status;
                string             MimeType;

                if (msDoc == null)
                {
                    continue;
                }
                else
                {
                    DocUrl     = msDoc.GetUrl();
                    PairKey    = DocUrl;
                    StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                    Status     = msDoc.GetStatusCode().ToString();
                    MimeType   = msDoc.GetMimeType();
                }

                if (!CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc))
                {
                    continue;
                }

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    lvItem = this.DisplayListView.Items[PairKey];
                }
                else
                {
                    lvItem = new ListViewItem(PairKey);
                    lvItem.UseItemStyleForSubItems = false;
                    lvItem.Name = PairKey;

                    lvItem.SubItems.Add("");
                    lvItem.SubItems.Add("");
                    lvItem.SubItems.Add("");
                    lvItem.SubItems.Add("");

                    for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
                    {
                        lvItem.SubItems.Add("");
                    }

                    ListViewItems.Add(lvItem);
                }

                if (lvItem != null)
                {
                    try
                    {
                        lvItem.SubItems[ColUrl].Text        = DocUrl;
                        lvItem.SubItems[ColStatusCode].Text = StatusCode;
                        lvItem.SubItems[ColStatus].Text     = Status;
                        lvItem.SubItems[ColMimeType].Text   = MimeType;

                        for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
                        {
                            string FilterPattern = CustomFilter.GetPattern(Slot: Slot).Key;
                            KeyValuePair <string,  MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern);
                            int ColOffset = this.FilterColOffset + FilterColsTable[FilterPattern];

                            if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED))
                            {
                                lvItem.SubItems[ColOffset].Text = MacroscopeConstants.TextPresenceLabels[Pair.Value];

                                switch (Pair.Value)
                                {
                                case MacroscopeConstants.TextPresence.CONTAINS:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.NOTCONTAINS:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.MUSTCONTAIN:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                case MacroscopeConstants.TextPresence.SHOULDNOTCONTAIN:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                default:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Gray;
                                    break;
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.Message));
                        DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.StackTrace));
                    }
                }
                else
                {
                    DebugMsg(string.Format("MacroscopeDisplayCustomFilters MISSING: {0}", PairKey));
                }

                if (msDoc.GetIsInternal())
                {
                    lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                }
                else
                {
                    lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                }

                if (Regex.IsMatch(StatusCode, "^[2]"))
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Green;
                }
                else
                if (Regex.IsMatch(StatusCode, "^[3]"))
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Goldenrod;
                }
                else
                if (Regex.IsMatch(StatusCode, "^[45]"))
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Red;
                }
                else
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Blue;
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 100;
            this.DisplayListView.Columns[ColStatus].Width     = 100;
            this.DisplayListView.Columns[ColMimeType].Width   = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
        /**************************************************************************/

        private void BuildWorksheetHrefLangMatrixUnspecified(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            Dictionary <string, string>  LocalesTable  = JobMaster.GetLocales();
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            Dictionary <string, int>     LocaleCols    = new Dictionary <string, int> ();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Site Locale";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title";
                iCol++;

                foreach (string LocaleKey in LocalesTable.Keys)
                {
                    DebugMsg(string.Format("EXCEL Locale: {0}", LocaleKey));
                    LocaleCols[LocaleKey]     = iCol;
                    ws.Cell(iRow, iCol).Value = LocaleKey;
                    iCol++;
                }

                for (int i = 1; i <= iCol; i++)
                {
                    ws.Cell(iRow, i).Style.Font.SetBold();
                }
            }

            iColMax = iCol;

            iRow++;

            foreach (string Key in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Key);
                Dictionary <string, MacroscopeHrefLang> HrefLangsTable = msDoc.GetHrefLangs();
                Boolean Proceed = false;

                foreach (string LocaleKey in LocalesTable.Keys)
                {
                    if (!string.IsNullOrEmpty(LocaleKey))
                    {
                        if (!HrefLangsTable.ContainsKey(LocaleKey))
                        {
                            Proceed = true;
                        }
                    }
                }

                if (Proceed)
                {
                    if (!string.IsNullOrEmpty(msDoc.GetLocale()))
                    {
                        continue;
                    }

                    string SiteLocale = this.FormatIfMissing(msDoc.GetLocale());
                    string Title      = this.FormatIfMissing(msDoc.GetTitle());
                    string LocaleCol  = msDoc.GetLocale();

                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);
                    iCol++;

                    this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);
                    iCol++;

                    ws.Cell(iRow, iCol).Value = SiteLocale;
                    if (SiteLocale == "MISSING")
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    iCol++;

                    ws.Cell(iRow, iCol).Value = Title;
                    if (Title == "MISSING")
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    iCol++;

                    if (LocaleCol != null)
                    {
                        ws.Cell(iRow, LocaleCols[LocaleCol]).Value = msDoc.GetUrl();
                    }
                    else
                    {
                        ;
                    }

                    foreach (string LocaleKey in LocalesTable.Keys)
                    {
                        if (!string.IsNullOrEmpty(LocaleKey))
                        {
                            if (HrefLangsTable.ContainsKey(LocaleKey))
                            {
                                MacroscopeHrefLang HrefLangAlternate = HrefLangsTable[LocaleKey];
                                string             Value             = HrefLangAlternate.GetUrl();

                                ws.Cell(iRow, LocaleCols[LocaleKey]).Value = Value;

                                if (JobMaster.GetAllowedHosts().IsInternalUrl(Value))
                                {
                                    ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Green);
                                }
                                else
                                {
                                    ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Blue);
                                }
                            }
                            else
                            {
                                ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Red);
                                ws.Cell(iRow, LocaleCols[LocaleKey]).Value = "NOT SPECIFIED";
                            }
                        }
                    }

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax - 1);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#11
0
        /**************************************************************************/

        private void BuildWorksheetOverview(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.WriteField("URL");
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Redirect");
                ws.WriteField("Duration");
                ws.WriteField("Crawled Date");
                ws.WriteField("Server Date");
                ws.WriteField("Modified Date");
                ws.WriteField("Expires Date");
                ws.WriteField("Content-Type");
                ws.WriteField("Locale");
                ws.WriteField("Language");
                ws.WriteField("Canonical");
                ws.WriteField("Page Depth");
                ws.WriteField("Links In");
                ws.WriteField("Links Out");
                ws.WriteField("Hyperlinks In");
                ws.WriteField("Hyperlinks Out");
                ws.WriteField("Title");
                ws.WriteField("Title Length");
                ws.WriteField("Description");
                ws.WriteField("Description Length");
                ws.WriteField("Error Condition");

                ws.NextRecord();
            }

            foreach (string Key in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Key);

                this.InsertAndFormatUrlCell(ws, msDoc);

                this.InsertAndFormatStatusCodeCell(ws, msDoc);

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetStatusCode().ToString()));

                this.InsertAndFormatRedirectCell(ws, msDoc);

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDurationInSecondsFormatted()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetCrawledDate()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateServer()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateModified()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateExpires()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetMimeType()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetLocale()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetIsoLanguageCode()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetCanonical()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDepth().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountInlinks().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountOutlinks().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountHyperlinksIn().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountHyperlinksOut().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetTitle()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetTitleLength().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDescription()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDescriptionLength().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetErrorCondition()));

                ws.NextRecord();
            }
        }
示例#12
0
        /**************************************************************************/

        private void BuildWorksheetPageText(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.WriteField("URL");
                ws.WriteField("Page Locale");
                ws.WriteField("Page Language");
                ws.WriteField("Detected Language");
                ws.WriteField("Word Count");
                ws.WriteField("Readability Method");
                ws.WriteField("Readability Grade");
                ws.WriteField("Readability Grade Description");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                if (msDoc.GetIsHtml())
                {
                    Proceed = true;
                }
                else
                if (msDoc.GetIsPdf())
                {
                    Proceed = true;
                }

                if (Proceed)
                {
                    string PageLocale           = msDoc.GetLocale();
                    string PageLanguage         = msDoc.GetIsoLanguageCode();
                    string DetectedLanguage     = msDoc.GetDocumentTextLanguage();
                    int    WordCount            = msDoc.GetWordCount();
                    string ReadabilityGradeType = MacroscopeAnalyzeReadability.FormatAnalyzeReadabilityMethod(
                        ReadabilityMethod: msDoc.GetReadabilityGradeMethod()
                        );
                    string ReadabilityGrade            = msDoc.GetReadabilityGrade().ToString("00.00");
                    string ReadabilityGradeDescription = msDoc.GetReadabilityGradeDescription();

                    if (string.IsNullOrEmpty(PageLocale))
                    {
                        PageLocale = "";
                    }

                    if (string.IsNullOrEmpty(PageLanguage))
                    {
                        PageLanguage = "";
                    }

                    if (string.IsNullOrEmpty(DetectedLanguage))
                    {
                        DetectedLanguage = "";
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLocale));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLanguage));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DetectedLanguage));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(WordCount.ToString()));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ReadabilityGradeType));
                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ReadabilityGrade));
                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ReadabilityGradeDescription));

                    ws.NextRecord();
                }
            }
        }
示例#13
0
        /**************************************************************************/

        private void BuildWorksheetPageHeadings(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.WriteField("URL");
                ws.WriteField("Occurences");
                ws.WriteField("Order");

                for (int i = 1; i <= 6; i++)
                {
                    ws.WriteField(string.Format("H{0}", i));
                }

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                if (msDoc.GetIsHtml())
                {
                    Proceed = true;
                }

                if (Proceed)
                {
                    for (ushort HeadingLevel = 1; HeadingLevel <= MacroscopePreferencesManager.GetMaxHeadingDepth(); HeadingLevel++)
                    {
                        List <string> HeadingsList = msDoc.GetHeadings(HeadingLevel);

                        for (int Order = 0; Order < HeadingsList.Count; Order++)
                        {
                            int Occurences = DocCollection.GetStatsHeadingsCount(HeadingLevel: HeadingLevel, Text: HeadingsList[Order]);

                            this.InsertAndFormatUrlCell(ws, msDoc);

                            this.InsertAndFormatContentCell(ws, Occurences.ToString());

                            this.InsertAndFormatContentCell(ws, this.FormatIfMissing((Order + 1).ToString()));

                            this.InsertAndFormatContentCell(ws, this.FormatIfMissing(HeadingsList[Order]));

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
示例#14
0
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateEtags(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            decimal CountOuter = 0;
            decimal CountInner = 0;
            decimal DocCount   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments());
            Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments());

            DocCount = ( decimal )DocCollection.CountDocuments();

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url);
                string             Etag  = msDoc.GetEtag();

                if ((Etag != null) && (Etag.Length > 0))
                {
                    if (!DuplicatesDocList.ContainsKey(Url))
                    {
                        DuplicatesDocList.Add(Url, msDoc);
                    }

                    if (DuplicatesList.ContainsKey(Etag))
                    {
                        DuplicatesList[Etag] = DuplicatesList[Etag] + 1;
                    }
                    else
                    {
                        DuplicatesList.Add(Etag, 1);
                    }
                }
            }

            {
                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "ETag";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Etag in DuplicatesList.Keys)
            {
                CountOuter++;
                CountInner = 0;

                if (DuplicatesList[Etag] > 1)
                {
                    foreach (MacroscopeDocument msDoc in  DuplicatesDocList.Values)
                    {
                        CountInner++;

                        if (DocCount > 0)
                        {
                            this.ProgressForm.UpdatePercentages(
                                Title: null,
                                Message: null,
                                MajorPercentage: -1,
                                ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                                MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                                ProgressLabelMinor: Etag,
                                SubMinorPercentage: (( decimal )100 / DocCount) * CountInner,
                                ProgressLabelSubMinor: msDoc.GetUrl()
                                );
                        }

                        if (msDoc.GetEtag() == Etag)
                        {
                            iCol = 1;

                            int            StatusCode  = ( int )msDoc.GetStatusCode();
                            HttpStatusCode Status      = msDoc.GetStatusCode();
                            int            Occurrences = DuplicatesList[Etag];

                            this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode);
                            iCol++;

                            this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status);
                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences);
                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetEtag());
                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                            iRow++;
                        }
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicatePages(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            decimal DocCount     = 0;
            decimal DocListCount = 0;
            decimal CountOuter   = 0;
            decimal CountInner   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();
            Dictionary <string, Boolean> CrossCheckList;

            CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList(
                Capacity: DocCollection.CountDocuments()
                );

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Origin URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Distance";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Similar URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string UrlLeft in DocCollection.DocumentKeys())
            {
                MacroscopeDocument            msDocLeft           = DocCollection.GetDocument(UrlLeft);
                MacroscopeLevenshteinAnalysis LevenshteinAnalysis = null;

                CountOuter++;
                CountInner = 0;

                if (DocCount > 0)
                {
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                        MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                        ProgressLabelMinor: UrlLeft,
                        SubMinorPercentage: 0,
                        ProgressLabelSubMinor: ""
                        );
                }

                if (msDocLeft.GetIsExternal())
                {
                    continue;
                }

                if (!msDocLeft.GetIsHtml())
                {
                    continue;
                }

                LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis(
                    msDoc: msDocLeft,
                    SizeDifference: MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference(),
                    Threshold: MacroscopePreferencesManager.GetMaxLevenshteinDistance(),
                    CrossCheckList: CrossCheckList,
                    IPercentageDone: this
                    );

                Dictionary <MacroscopeDocument, int> DocList;

                DocList = LevenshteinAnalysis.AnalyzeDocCollection(
                    DocCollection: DocCollection
                    );

                DocListCount = ( decimal )DocList.Count;

                foreach (MacroscopeDocument msDocDuplicate in DocList.Keys)
                {
                    int            StatusCode   = ( int )msDocLeft.GetStatusCode();
                    HttpStatusCode Status       = msDocLeft.GetStatusCode();
                    string         UrlDuplicate = msDocDuplicate.GetUrl();
                    int            Distance     = DocList[msDocDuplicate];

                    CountInner++;
                    iCol = 1;

                    if (DocCount > 0)
                    {
                        this.ProgressForm.UpdatePercentages(
                            Title: null,
                            Message: null,
                            MajorPercentage: -1,
                            ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                            MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                            ProgressLabelMinor: UrlLeft,
                            SubMinorPercentage: (( decimal )100 / DocListCount) * CountInner,
                            ProgressLabelSubMinor: UrlDuplicate
                            );
                    }

                    this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode);
                    iCol++;

                    this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status);
                    iCol++;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, UrlLeft);

                    if (AllowedHosts.IsInternalUrl(Url: UrlLeft))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, Distance.ToString());

                    if (Distance <= MacroscopePreferencesManager.GetMaxLevenshteinDistance())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, UrlDuplicate);

                    if (AllowedHosts.IsInternalUrl(Url: UrlDuplicate))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iRow++;

                    if (this.ProgressForm.Cancelled())
                    {
                        break;
                    }
                }

                if (this.ProgressForm.Cancelled())
                {
                    break;
                }

                Thread.Yield();
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDescriptions(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Page Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Detected Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Description";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Description Length";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                if (msDoc.GetIsHtml())
                {
                    Proceed = true;
                }
                else
                if (msDoc.GetIsPdf())
                {
                    Proceed = true;
                }

                if (Proceed)
                {
                    iCol = 1;

                    string Description       = msDoc.GetDescription();
                    string PageLanguage      = msDoc.GetIsoLanguageCode();
                    string DetectedLanguage  = msDoc.GetTitleLanguage();
                    int    Occurrences       = 0;
                    int    DescriptionLength = msDoc.GetDescriptionLength();

                    if (DescriptionLength > 0)
                    {
                        Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc);
                    }

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (msDoc.GetIsInternal())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }
                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLanguage));

                    if (PageLanguage != DetectedLanguage)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DetectedLanguage));

                    if (PageLanguage != DetectedLanguage)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences);

                    if (Occurrences > 1)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Description));

                    if (DescriptionLength <= 0)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                        ws.Cell(iRow, iCol).Value = "MISSING";
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, DescriptionLength);

                    if (DescriptionLength < MacroscopePreferencesManager.GetDescriptionMinLen())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    if (DescriptionLength > MacroscopePreferencesManager.GetDescriptionMaxLen())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#17
0
        /**************************************************************************/

        private void BuildWorksheetOverview(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Redirect";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Duration";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Crawled Date";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Server Date";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Modified Date";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Expires Date";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Content-Type";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Locale";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Canonical";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Page Depth";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Links In";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Links Out";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Hyperlinks In";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Hyperlinks Out";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title Length";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Description";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Description Length";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Error Condition";

                for (int i = 1; i <= iCol; i++)
                {
                    ws.Cell(iRow, i).Style.Font.SetBold();
                }
            }

            iColMax = iCol;

            iRow++;

            foreach (string sKey in DocCollection.DocumentKeys())
            {
                iCol = 1;

                MacroscopeDocument msDoc = DocCollection.GetDocument(sKey);

                this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);
                iCol++;

                this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetStatusCode().ToString()));
                iCol++;

                this.InsertAndFormatRedirectCell(ws, iRow, iCol, msDoc);
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDurationInSecondsFormatted());
                iCol++;

                this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetCrawledDate());
                iCol++;

                this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateServer());
                iCol++;

                this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateModified());
                iCol++;

                this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateExpires());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetMimeType()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetLocale()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetIsoLanguageCode()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetCanonical()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDepth().ToString());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountInlinks());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountOutlinks());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountHyperlinksIn());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountHyperlinksOut());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetTitle()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetTitleLength().ToString());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetDescription()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDescriptionLength());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetErrorCondition()));

                iRow++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetCustomFilter(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> FilterColsTable = new Dictionary <string, int> (CustomFilter.GetSize());

            const int FilterColOffset = 4;

            {
                ws.Cell(iRow, iCol).Value = MacroscopeConstants.Url;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.StatusCode;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.Status;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.ContentType;

                for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
                {
                    string FilterPattern = CustomFilter.GetPattern(Slot).Key;

                    iCol++;

                    if (FilterColsTable.ContainsKey(FilterPattern) || string.IsNullOrEmpty(FilterPattern))
                    {
                        FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + FilterColOffset);
                        ws.Cell(iRow, iCol).Value = string.Format("EMPTY{0}", Slot + 1);
                    }
                    else
                    {
                        FilterColsTable.Add(FilterPattern, Slot + FilterColOffset);
                        ws.Cell(iRow, iCol).Value = FilterPattern;
                    }
                }
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc      = DocCollection.GetDocument(Url);
                string             DocUrl     = msDoc.GetUrl();
                string             StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string             Status     = msDoc.GetStatusCode().ToString();
                string             MimeType   = msDoc.GetMimeType();

                if (!CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc))
                {
                    continue;
                }

                iCol = 1;

                this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                if (msDoc.GetIsInternal())
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                }

                iCol++;

                this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetStatusCode().ToString()));

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(MimeType));

                iCol++;

                for (int Slot = 0; Slot < this.CustomFilter.GetSize(); Slot++)
                {
                    string FilterPattern = this.CustomFilter.GetPattern(Slot: Slot).Key;
                    KeyValuePair <string,  MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern);

                    if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED))
                    {
                        string CustomFilterItemValue = MacroscopeConstants.TextPresenceLabels[Pair.Value];

                        this.InsertAndFormatContentCell(ws, iRow, iCol, CustomFilterItemValue);

                        switch (Pair.Value)
                        {
                        case MacroscopeConstants.TextPresence.CONTAINS:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            break;

                        case MacroscopeConstants.TextPresence.NOTCONTAINS:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            break;

                        case MacroscopeConstants.TextPresence.MUSTCONTAIN:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            break;

                        case MacroscopeConstants.TextPresence.SHOULDNOTCONTAIN:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            break;

                        default:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            break;
                        }
                    }
                    else
                    {
                        this.InsertAndFormatContentCell(ws, iRow, iCol, "");
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;
                }

                iRow++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#19
0
        /**************************************************************************/

        private void BuildWorksheetCssSelectors(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = MacroscopeConstants.Url;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.StatusCode;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.Status;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.ContentType;
                iCol++;

                ws.Cell(iRow, iCol).Value = "Extracted Label";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Extracted Value";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc      = DocCollection.GetDocument(Url);
                string             DocUrl     = msDoc.GetUrl();
                string             StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string             Status     = msDoc.GetStatusCode().ToString();
                string             MimeType   = msDoc.GetMimeType();

                if (!this.DataExtractorCssSelectors.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedCssSelectors())
                {
                    string ExtractedLabel = DataExtractedPair.Key;
                    string ExtractedValue = DataExtractedPair.Value;

                    if (
                        string.IsNullOrEmpty(ExtractedLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (msDoc.GetIsInternal())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Status));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(MimeType));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(ExtractedLabel));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(ExtractedValue));

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void RenderTreeView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList
            )
        {
            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            if (UrlList.Count == 0)
            {
                return;
            }

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )UrlList.Count;
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            this.tvTreeView.BeginUpdate();

            DebugMsg(string.Format("HIERARCHY: {0}", "BASE"));

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url);

                if (msDoc != null)
                {
                    this.RenderTreeView(msDoc, Url);
                }

                Count++;
                MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                ProgressForm.UpdatePercentages(
                    Title: null,
                    Message: null,
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            this.tvTreeView.ExpandAll();

            this.tvTreeView.EndUpdate();

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
示例#21
0
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeDataExtractorCssSelectors DataExtractor
            )
        {
            MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem> ();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )DocCollection.CountDocuments();
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc      = DocCollection.GetDocument(Url: Url);
                string             DocUrl     = msDoc.GetUrl();
                string             StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string             Status     = msDoc.GetStatusCode().ToString();
                string             MimeType   = msDoc.GetMimeType();

                if (!DataExtractor.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedCssSelectors())
                {
                    ListViewItem lvItem           = null;
                    string       CssSelectorLabel = DataExtractedPair.Key;
                    string       ExtractedValue   = DataExtractedPair.Value;
                    string       PairKey          = null;

                    if (
                        string.IsNullOrEmpty(CssSelectorLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    PairKey = string.Join(
                        "::",
                        DocUrl,
                        Macroscope.GetStringDigest(Text: CssSelectorLabel),
                        Macroscope.GetStringDigest(Text: ExtractedValue)
                        );

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                    }
                    else
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        for (int i = 0; i < 6; i++)
                        {
                            lvItem.SubItems.Add("");
                        }

                        ListViewItems.Add(lvItem);
                    }

                    if (lvItem != null)
                    {
                        try
                        {
                            lvItem.SubItems[ColUrl].Text              = DocUrl;
                            lvItem.SubItems[ColStatusCode].Text       = StatusCode;
                            lvItem.SubItems[ColStatus].Text           = Status;
                            lvItem.SubItems[ColMimeType].Text         = MimeType;
                            lvItem.SubItems[ColCssSelectorLabel].Text = CssSelectorLabel;
                            lvItem.SubItems[ColExtractedValue].Text   = ExtractedValue;
                        }
                        catch (Exception ex)
                        {
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors: {0}", ex.Message));
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors: {0}", ex.StackTrace));
                        }
                    }
                    else
                    {
                        DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors MISSING: {0}", PairKey));
                    }

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                    }

                    if (Regex.IsMatch(StatusCode, "^[2]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Green;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[3]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Goldenrod;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[45]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Red;
                    }
                    else
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Blue;
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 100;
            this.DisplayListView.Columns[ColStatus].Width     = 100;
            this.DisplayListView.Columns[ColMimeType].Width   = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
示例#22
0
        /**************************************************************************/

        private void BuildWorksheetPageUriAnalysis(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Checksum";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url);

                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();
                string Checksum   = msDoc.GetChecksum();
                int    Count      = DocCollection.GetStatsChecksumCount(Checksum: Checksum);

                iCol = 1;

                this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                if (AllowedHosts.IsInternalUrl(Url: Url))
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                }

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode);

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, Status);

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, Count);

                if (Count > 1)
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                }

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, Checksum);

                if (Count > 1)
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                }

                iRow++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#23
0
        /**************************************************************************/

        private void BuildWorksheetPageLinks(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Link Type";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Source URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Target URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Follow";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Alt Text";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Raw Source URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Raw Target URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url);

                foreach (MacroscopeLink Link in msDoc.IterateOutlinks())
                {
                    string LinkType = Link.GetLinkType().ToString();

                    string SourceUrl = Link.GetSourceUrl();
                    string TargetUrl = Link.GetTargetUrl();

                    string AltText = Link.GetAltText();

                    string RawSourceUrl = Link.GetRawSourceUrl();
                    string RawTargetUrl = Link.GetRawTargetUrl();

                    string DoFollow = "No Follow";

                    if (Link.GetDoFollow())
                    {
                        DoFollow = "Follow";
                    }

                    if (string.IsNullOrEmpty(AltText))
                    {
                        AltText = "";
                    }

                    if (string.IsNullOrEmpty(RawSourceUrl))
                    {
                        RawSourceUrl = "";
                    }

                    if (string.IsNullOrEmpty(RawTargetUrl))
                    {
                        RawTargetUrl = "";
                    }

                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (AllowedHosts.IsInternalUrl(Url: Url))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(LinkType));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(SourceUrl));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(TargetUrl));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DoFollow));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AltText));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(RawSourceUrl));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(RawTargetUrl));

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#24
0
        /**************************************************************************/

        private void BuildWorksheetPageKeywords(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Keywords";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Keywords Length";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Number of Keywords";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                if (msDoc.GetIsHtml())
                {
                    Proceed = true;
                }
                else
                if (msDoc.GetIsPdf())
                {
                    Proceed = true;
                }

                if (Proceed)
                {
                    iCol = 1;

                    string Keywords       = msDoc.GetKeywords();
                    int    Occurrences    = 0;
                    int    KeywordsLength = msDoc.GetKeywordsLength();
                    int    KeywordsNumber = msDoc.GetKeywordsCount();

                    if (KeywordsLength > 0)
                    {
                        Occurrences = DocCollection.GetStatsKeywordsCount(msDoc);
                    }

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (msDoc.GetIsInternal())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Keywords));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, KeywordsLength);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, KeywordsNumber);

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#25
0
        /**************************************************************************/

        private void BuildWorksheetPageLinks(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("URL");
                ws.WriteField("Link Type");
                ws.WriteField("Source URL");
                ws.WriteField("Target URL");
                ws.WriteField("Follow");
                ws.WriteField("Alt Text");
                ws.WriteField("Raw Source URL");
                ws.WriteField("Raw Target URL");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url);

                foreach (MacroscopeLink Link in msDoc.IterateOutlinks())
                {
                    string LinkType = Link.GetLinkType().ToString();

                    string SourceUrl = Link.GetSourceUrl();
                    string TargetUrl = Link.GetTargetUrl();

                    string AltText = Link.GetAltText();

                    string RawSourceUrl = Link.GetRawSourceUrl();
                    string RawTargetUrl = Link.GetRawTargetUrl();

                    string DoFollow = "No Follow";

                    if (Link.GetDoFollow())
                    {
                        DoFollow = "Follow";
                    }

                    if (string.IsNullOrEmpty(AltText))
                    {
                        AltText = "";
                    }

                    if (string.IsNullOrEmpty(RawSourceUrl))
                    {
                        RawSourceUrl = "";
                    }

                    if (string.IsNullOrEmpty(RawTargetUrl))
                    {
                        RawTargetUrl = "";
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(LinkType));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(SourceUrl));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(TargetUrl));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DoFollow));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(AltText));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(RawSourceUrl));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(RawTargetUrl));

                    ws.NextRecord();
                }
            }
        }
示例#26
0
        /**************************************************************************/

        private void BuildWorksheetPageTitles(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.WriteField("URL");
                ws.WriteField("Page Language");
                ws.WriteField("Detected Language");
                ws.WriteField("Occurrences");
                ws.WriteField("Title");
                ws.WriteField("Title Length");
                ws.WriteField("Pixel Width");
                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                if (msDoc.GetIsHtml())
                {
                    Proceed = true;
                }
                else
                if (msDoc.GetIsPdf())
                {
                    Proceed = true;
                }

                if (Proceed)
                {
                    string Title            = msDoc.GetTitle();
                    string PageLanguage     = msDoc.GetIsoLanguageCode();
                    string DetectedLanguage = msDoc.GetTitleLanguage();
                    int    Occurrences      = 0;
                    int    TitleLength      = msDoc.GetTitleLength();
                    int    TitlePixelWidth  = msDoc.GetTitlePixelWidth();

                    if (TitleLength > 0)
                    {
                        Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc);
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLanguage));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DetectedLanguage));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Occurrences.ToString()));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(TitleLength.ToString()));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(TitlePixelWidth.ToString()));

                    ws.NextRecord();
                }
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateEtags(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            decimal CountOuter = 0;
            decimal CountInner = 0;
            decimal DocCount   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments());
            Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments());

            DocCount = ( decimal )DocCollection.CountDocuments();

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url);
                string             Etag  = msDoc.GetEtag();

                if ((Etag != null) && (Etag.Length > 0))
                {
                    if (!DuplicatesDocList.ContainsKey(Url))
                    {
                        DuplicatesDocList.Add(Url, msDoc);
                    }

                    if (DuplicatesList.ContainsKey(Etag))
                    {
                        DuplicatesList[Etag] = DuplicatesList[Etag] + 1;
                    }
                    else
                    {
                        DuplicatesList.Add(Etag, 1);
                    }
                }
            }

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Occurrences");
                ws.WriteField("ETag");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (string Etag in DuplicatesList.Keys)
            {
                CountOuter++;
                CountInner = 0;

                if (DuplicatesList[Etag] > 1)
                {
                    foreach (MacroscopeDocument msDoc in  DuplicatesDocList.Values)
                    {
                        CountInner++;

                        if (DocCount > 0)
                        {
                            this.ProgressForm.UpdatePercentages(
                                Title: null,
                                Message: null,
                                MajorPercentage: -1,
                                ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                                MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                                ProgressLabelMinor: Etag,
                                SubMinorPercentage: (( decimal )100 / DocCount) * CountInner,
                                ProgressLabelSubMinor: msDoc.GetUrl()
                                );
                        }

                        if (msDoc.GetEtag() == Etag)
                        {
                            int            StatusCode  = ( int )msDoc.GetStatusCode();
                            HttpStatusCode Status      = msDoc.GetStatusCode();
                            int            Occurrences = DuplicatesList[Etag];

                            this.InsertAndFormatStatusCodeCell(ws, StatusCode);

                            this.InsertAndFormatStatusCodeCell(ws, Status);

                            this.InsertAndFormatContentCell(ws, Occurrences);

                            this.InsertAndFormatContentCell(ws, msDoc.GetEtag());

                            this.InsertAndFormatUrlCell(ws, msDoc);

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateTitles(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            decimal Count    = 0;
            decimal DocCount = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.WriteField("URL");
                ws.WriteField("Occurrences");
                ws.WriteField("Title");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (DocCount > 0)
                {
                    Count++;
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", Count),
                        MinorPercentage: (( decimal )100 / DocCount) * Count,
                        ProgressLabelMinor: Url,
                        SubMinorPercentage: -1,
                        ProgressLabelSubMinor: null
                        );
                }

                if (AllowedHosts.IsInternalUrl(Url: Url))
                {
                    if (msDoc.GetIsHtml())
                    {
                        Proceed = true;
                    }
                    else
                    if (msDoc.GetIsPdf())
                    {
                        Proceed = true;
                    }
                    else
                    {
                        Proceed = false;
                    }
                }

                if (Proceed)
                {
                    string Title       = msDoc.GetTitle();
                    int    Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc);

                    if (Occurrences > 1)
                    {
                        this.InsertAndFormatUrlCell(ws, msDoc);

                        this.InsertAndFormatContentCell(ws, Occurrences);

                        this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title));

                        ws.NextRecord();
                    }
                }
            }
        }
示例#29
0
        /**************************************************************************/

        private void BuildWorksheetPageRedirectedLinks(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Origin URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Destination URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument     msDoc        = DocCollection.GetDocument(Url);
                MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(Url);
                int    StatusCode = ( int )msDoc.GetStatusCode();
                string Status     = msDoc.GetStatusCode().ToString();

                if (
                    (StatusCode >= 300) &&
                    (StatusCode <= 399) &&
                    (HyperlinksIn != null))
                {
                    foreach (MacroscopeHyperlinkIn HyperlinkIn in HyperlinksIn.IterateLinks())
                    {
                        string OriginUrl = HyperlinkIn.GetSourceUrl();

                        if (
                            (OriginUrl != null) &&
                            (OriginUrl.Length > 0))
                        {
                            iCol = 1;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode.ToString());

                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                            }

                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, Status);

                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                            }

                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, OriginUrl);

                            if (AllowedHosts.IsInternalUrl(Url: OriginUrl))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                            if (AllowedHosts.IsInternalUrl(Url: Url))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iRow++;
                        }
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicatePages(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            decimal DocCount     = 0;
            decimal DocListCount = 0;
            decimal CountOuter   = 0;
            decimal CountInner   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();
            Dictionary <string, Boolean> CrossCheckList;

            CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList(
                Capacity: DocCollection.CountDocuments()
                );

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Origin URL");
                ws.WriteField("Distance");
                ws.WriteField("Similar URL");

                ws.NextRecord();
            }

            foreach (string UrlLeft in DocCollection.DocumentKeys())
            {
                MacroscopeDocument            msDocLeft           = DocCollection.GetDocument(UrlLeft);
                MacroscopeLevenshteinAnalysis LevenshteinAnalysis = null;

                CountOuter++;
                CountInner = 0;

                if (DocCount > 0)
                {
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                        MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                        ProgressLabelMinor: UrlLeft,
                        SubMinorPercentage: 0,
                        ProgressLabelSubMinor: ""
                        );
                }

                if (msDocLeft.GetIsExternal())
                {
                    continue;
                }

                if (!msDocLeft.GetIsHtml())
                {
                    continue;
                }

                LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis(
                    msDoc: msDocLeft,
                    SizeDifference: MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference(),
                    Threshold: MacroscopePreferencesManager.GetMaxLevenshteinDistance(),
                    CrossCheckList: CrossCheckList,
                    IPercentageDone: this
                    );

                Dictionary <MacroscopeDocument, int> DocList;

                DocList = LevenshteinAnalysis.AnalyzeDocCollection(
                    DocCollection: DocCollection
                    );

                DocListCount = ( decimal )DocList.Count;

                foreach (MacroscopeDocument msDocDuplicate in DocList.Keys)
                {
                    int            StatusCode   = ( int )msDocLeft.GetStatusCode();
                    HttpStatusCode Status       = msDocLeft.GetStatusCode();
                    string         UrlDuplicate = msDocDuplicate.GetUrl();
                    int            Distance     = DocList[msDocDuplicate];

                    CountInner++;

                    if (DocCount > 0)
                    {
                        this.ProgressForm.UpdatePercentages(
                            Title: null,
                            Message: null,
                            MajorPercentage: -1,
                            ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                            MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                            ProgressLabelMinor: UrlLeft,
                            SubMinorPercentage: (( decimal )100 / DocListCount) * CountInner,
                            ProgressLabelSubMinor: UrlDuplicate
                            );
                    }

                    this.InsertAndFormatStatusCodeCell(ws, StatusCode);

                    this.InsertAndFormatStatusCodeCell(ws, Status);

                    this.InsertAndFormatUrlCell(ws, UrlLeft);

                    this.InsertAndFormatContentCell(ws, Distance.ToString());

                    this.InsertAndFormatUrlCell(ws, UrlDuplicate);

                    ws.NextRecord();

                    if (this.ProgressForm.Cancelled())
                    {
                        break;
                    }
                }

                if (this.ProgressForm.Cancelled())
                {
                    break;
                }

                Thread.Yield();
            }
        }