/**************************************************************************/ private void BuildWorksheetPageBrokenLinks( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Anchor Text"); ws.WriteField("Alt Text"); ws.WriteField("Origin URL"); ws.WriteField("Destination URL"); ws.NextRecord(); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(msDoc.GetUrl()); int StatusCode = ( int )msDoc.GetStatusCode(); string Status = msDoc.GetStatusCode().ToString(); if ( (StatusCode >= 400) && (StatusCode <= 599) && (HyperlinksIn != null)) { foreach (MacroscopeHyperlinkIn HyperlinkIn in HyperlinksIn.IterateLinks()) { string OriginUrl = HyperlinkIn.GetSourceUrl(); string AnchorText = HyperlinkIn.GetAnchorText(); string AltText = HyperlinkIn.GetAltText(); if ( (OriginUrl != null) && (OriginUrl.Length > 0)) { this.InsertAndFormatContentCell(ws, StatusCode.ToString()); this.InsertAndFormatContentCell(ws, Status); this.InsertAndFormatContentCell(ws, AnchorText); this.InsertAndFormatContentCell(ws, AltText); this.InsertAndFormatUrlCell(ws, OriginUrl); this.InsertAndFormatUrlCell(ws, msDoc); ws.NextRecord(); } } } } }
/**************************************************************************/ public void RenderListView(MacroscopeJobMaster JobMaster) { Dictionary <String, bool> Blocked = JobMaster.GetBlockedByRobotsList(); if (Blocked.Count == 0) { return; } List <ListViewItem> ListViewItems = new List <ListViewItem>(1); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)Blocked.Count; decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (string Url in Blocked.Keys) { bool IsInternal = JobMaster.GetAllowedHosts().IsInternalUrl(Url); this.RenderListView( ListViewItems: ListViewItems, Url: Url, IsBlocked: Blocked[Url], IsInternal: IsInternal ); Count++; MajorPercentage = ((decimal)100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } ProgressForm.Dispose(); }
/**************************************************************************/ private void BuildWorksheetBlockedByRobotsInternal( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { if (msDoc.GetIsInternal() && (!msDoc.GetAllowedByRobots())) { iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl())) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatRobotsCell(ws, iRow, iCol, msDoc); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetSitemapErrors( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("Sitemap URL"); ws.WriteField("Status Code"); ws.WriteField("Robots"); ws.WriteField("URL"); ws.NextRecord(); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { if (msDoc.GetIsInternal() && msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML)) { foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks()) { string TargetUrl = Outlink.GetTargetUrl(); MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl); bool InsertRow = false; if (msDocLinked.GetIsInternal()) { int StatusCode = (int)msDocLinked.GetStatusCode(); if ((StatusCode >= 400) && (StatusCode <= 599)) { InsertRow = true; } if (!msDocLinked.GetAllowedByRobots()) { InsertRow = true; } } if (InsertRow) { this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatStatusCodeCell(ws, msDoc); this.InsertAndFormatRobotsCell(ws, msDoc); this.InsertAndFormatUrlCell(ws, TargetUrl); ws.NextRecord(); } } } } }
/**************************************************************************/ private void BuildWorksheetEmailAddresses( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Email Address"; iCol++; ws.Cell(iRow, iCol).Value = "URL"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { if (msDoc.GetIsHtml()) { Dictionary <string, string> EmailAddresses = msDoc.GetEmailAddresses(); foreach (string EmailAddress in EmailAddresses.Keys) { iCol = 1; this.InsertAndFormatContentCell(ws, iRow, iCol, EmailAddress); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); iRow++; } } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageRedirectsAudit( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("Origin URL"); ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Destination URL"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url); if (!msDoc.GetIsRedirect()) { continue; } string OriginURL = msDoc.GetUrlRedirectFrom(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string DestinationURL = msDoc.GetUrlRedirectTo(); if (string.IsNullOrEmpty(OriginURL)) { continue; } if (string.IsNullOrEmpty(DestinationURL)) { continue; } this.InsertAndFormatUrlCell(ws, OriginURL); this.InsertAndFormatContentCell(ws, StatusCode); this.InsertAndFormatContentCell(ws, Status); this.InsertAndFormatUrlCell(ws, DestinationURL); ws.NextRecord(); } }
/**************************************************************************/ private void BuildWorksheetTelephoneNumbers( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Telephone Number"; iCol++; ws.Cell(iRow, iCol).Value = "URL"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { if (msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML)) { Dictionary <string, string> TelephoneNumbers = msDoc.GetTelephoneNumbers(); foreach (string TelephoneNumber in TelephoneNumbers.Keys) { iCol = 1; this.InsertAndFormatContentCell(ws, iRow, iCol, TelephoneNumber); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); iRow++; } } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageUriAnalysis( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("URL"); ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Occurrences"); ws.WriteField("Checksum"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string Checksum = msDoc.GetChecksum(); int Count = DocCollection.GetStatsChecksumCount(Checksum: Checksum); this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, StatusCode); this.InsertAndFormatContentCell(ws, Status); this.InsertAndFormatContentCell(ws, Count.ToString()); this.InsertAndFormatContentCell(ws, Checksum); ws.NextRecord(); } }
/**************************************************************************/ private void BuildWorksheetErrors( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("URL"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(Url); int StatusCode = ( int )msDoc.GetStatusCode(); string Status = msDoc.GetStatusCode().ToString(); if ( (StatusCode >= 400) && (StatusCode <= 599)) { this.InsertAndFormatContentCell(ws, StatusCode.ToString()); this.InsertAndFormatContentCell(ws, Status); this.InsertAndFormatUrlCell(ws, Url); ws.NextRecord(); } } }
/**************************************************************************/ private void BuildWorksheetPageObservations( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("URL"); ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Observation"); ws.NextRecord(); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Url = msDoc.GetUrl(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); foreach (KeyValuePair <string, string> RemarkPair in msDoc.IterateRemarks()) { this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, StatusCode); this.InsertAndFormatContentCell(ws, Status); this.InsertAndFormatContentCell(ws, RemarkPair.Value); ws.NextRecord(); } } }
/**************************************************************************/ private void BuildWorksheetPageLinks( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Link Type"; iCol++; ws.Cell(iRow, iCol).Value = "Source URL"; iCol++; ws.Cell(iRow, iCol).Value = "Target URL"; iCol++; ws.Cell(iRow, iCol).Value = "Follow"; iCol++; ws.Cell(iRow, iCol).Value = "Alt Text"; iCol++; ws.Cell(iRow, iCol).Value = "Raw Source URL"; iCol++; ws.Cell(iRow, iCol).Value = "Raw Target URL"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { foreach (MacroscopeLink Link in msDoc.IterateOutlinks()) { string LinkType = Link.GetLinkType().ToString(); string SourceUrl = Link.GetSourceUrl(); string TargetUrl = Link.GetTargetUrl(); string AltText = Link.GetAltText(); string RawSourceUrl = Link.GetRawSourceUrl(); string RawTargetUrl = Link.GetRawTargetUrl(); string DoFollow = "No Follow"; if (Link.GetDoFollow()) { DoFollow = "Follow"; } if (string.IsNullOrEmpty(AltText)) { AltText = ""; } if (string.IsNullOrEmpty(RawSourceUrl)) { RawSourceUrl = ""; } if (string.IsNullOrEmpty(RawTargetUrl)) { RawTargetUrl = ""; } iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl())) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(LinkType)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(SourceUrl)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(TargetUrl)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DoFollow)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AltText)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(RawSourceUrl)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(RawTargetUrl)); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetErrors( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "URL"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(msDoc.GetUrl()); int StatusCode = ( int )msDoc.GetStatusCode(); string Status = msDoc.GetStatusCode().ToString(); if ( (StatusCode >= 400) && (StatusCode <= 599)) { iCol = 1; this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode.ToString()); if ((StatusCode >= 400) && (StatusCode <= 599)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Status); if ((StatusCode >= 400) && (StatusCode <= 599)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue); } iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc.GetUrl()); if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl())) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageRedirectsAudit( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Origin URL"; iCol++; ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Destination URL"; } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); if (!msDoc.GetIsRedirect()) { continue; } string OriginURL = msDoc.GetUrlRedirectFrom(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string DestinationURL = msDoc.GetUrlRedirectTo(); if (OriginURL == null) { continue; } if (DestinationURL == null) { continue; } iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, OriginURL); if (AllowedHosts.IsInternalUrl(Url: OriginURL)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Status); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, DestinationURL); if (AllowedHosts.IsInternalUrl(Url: DestinationURL)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iRow++; } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetSitemapXmlErrors( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Sitemap URL"; iCol++; ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Robots"; iCol++; ws.Cell(iRow, iCol).Value = "URL"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { if (msDoc.GetIsInternal() && msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML)) { foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks()) { string TargetUrl = Outlink.GetTargetUrl(); MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl); bool InsertRow = false; if (msDocLinked.GetIsInternal()) { int StatusCode = (int)msDocLinked.GetStatusCode(); if ((StatusCode >= 400) && (StatusCode <= 599)) { InsertRow = true; } if (!msDocLinked.GetAllowedByRobots()) { InsertRow = true; } } if (InsertRow) { iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl())) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatRobotsCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, TargetUrl); if (AllowedHosts.IsInternalUrl(Url: TargetUrl)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iRow++; } } } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetRegularExpressions( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = MacroscopeConstants.Url; iCol++; ws.Cell(iRow, iCol).Value = MacroscopeConstants.StatusCode; iCol++; ws.Cell(iRow, iCol).Value = MacroscopeConstants.Status; iCol++; ws.Cell(iRow, iCol).Value = MacroscopeConstants.ContentType; iCol++; ws.Cell(iRow, iCol).Value = "Extracted Label"; iCol++; ws.Cell(iRow, iCol).Value = "Extracted Value"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string DocUrl = msDoc.GetUrl(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string MimeType = msDoc.GetMimeType(); if (!this.DataExtractorRegexes.CanApplyDataExtractorsToDocument(msDoc: msDoc)) { continue; } foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedRegexes()) { string ExtractedLabel = DataExtractedPair.Key; string ExtractedValue = DataExtractedPair.Value; if ( string.IsNullOrEmpty(ExtractedLabel) || string.IsNullOrEmpty(ExtractedValue)) { continue; } iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Status)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(MimeType)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(ExtractedLabel)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(ExtractedValue)); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageObservations( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Observation"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Url = msDoc.GetUrl(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); foreach (KeyValuePair <string, string> RemarkPair in msDoc.IterateRemarks()) { iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (AllowedHosts.IsInternalUrl(Url: Url)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Status); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, RemarkPair.Value); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetCustomFilter( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, int> FilterColsTable = new Dictionary <string, int>(CustomFilter.GetSize()); const int FilterColOffset = 4; { ws.Cell(iRow, iCol).Value = MacroscopeConstants.Url; iCol++; ws.Cell(iRow, iCol).Value = MacroscopeConstants.StatusCode; iCol++; ws.Cell(iRow, iCol).Value = MacroscopeConstants.Status; iCol++; ws.Cell(iRow, iCol).Value = MacroscopeConstants.ContentType; for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { string FilterPattern = CustomFilter.GetPattern(Slot).Key; iCol++; if (FilterColsTable.ContainsKey(FilterPattern) || string.IsNullOrEmpty(FilterPattern)) { FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + FilterColOffset); ws.Cell(iRow, iCol).Value = string.Format("EMPTY{0}", Slot + 1); } else { FilterColsTable.Add(FilterPattern, Slot + FilterColOffset); ws.Cell(iRow, iCol).Value = FilterPattern; } } } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string DocUrl = msDoc.GetUrl(); string StatusCode = ((int)msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string MimeType = msDoc.GetMimeType(); if (!CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc)) { continue; } iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetStatusCode().ToString())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(MimeType)); iCol++; for (int Slot = 0; Slot < this.CustomFilter.GetSize(); Slot++) { string FilterPattern = this.CustomFilter.GetPattern(Slot: Slot).Key; KeyValuePair <string, MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern); if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED)) { string CustomFilterItemValue = MacroscopeConstants.TextPresenceLabels[Pair.Value]; this.InsertAndFormatContentCell(ws, iRow, iCol, CustomFilterItemValue); switch (Pair.Value) { case MacroscopeConstants.TextPresence.CONTAINS_STRING: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); break; case MacroscopeConstants.TextPresence.NOT_CONTAINS_STRING: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); break; case MacroscopeConstants.TextPresence.MUST_CONTAIN_STRING: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); break; case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_STRING: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); break; case MacroscopeConstants.TextPresence.CONTAINS_REGEX: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); break; case MacroscopeConstants.TextPresence.NOT_CONTAINS_REGEX: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); break; case MacroscopeConstants.TextPresence.MUST_CONTAIN_REGEX: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); break; case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_REGEX: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); break; default: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); break; } } else { this.InsertAndFormatContentCell(ws, iRow, iCol, ""); ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; } iRow++; } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageRedirectChains( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); List <List <MacroscopeRedirectChainDocStruct> > RedirectChains = DocCollection.GetMacroscopeRedirectChains(); { ws.Cell(iRow, iCol).Value = "Hop"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; } iRow++; foreach (List <MacroscopeRedirectChainDocStruct> DocList in RedirectChains) { int iHop = 1; iCol = 1; foreach (MacroscopeRedirectChainDocStruct RedirectChainDocStruct in DocList) { string Url = RedirectChainDocStruct.Url; string StatusCode = RedirectChainDocStruct.StatusCode.ToString(); ws.Cell(1, iCol).Value = string.Format("Hop {0} URL", iHop); this.InsertAndFormatUrlCell(ws, iRow, iCol, Url); iCol++; if (AllowedHosts.IsInternalUrl(Url: Url)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } ws.Cell(1, iCol).Value = string.Format("Hop {0} Status", iHop); this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode); iCol++; iHop++; } if (iCol > iColMax) { iColMax = iCol; } iRow++; } if ((iRow > 1) && (iColMax > 2)) { var rangeData = ws.Range(1, 1, iRow - 1, iColMax - 1); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetCustomFilter( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, int> FilterColsTable = new Dictionary <string, int> (CustomFilter.GetSize()); const int FilterColOffset = 3; { ws.WriteField(MacroscopeConstants.Url); ws.WriteField(MacroscopeConstants.StatusCode); ws.WriteField(MacroscopeConstants.Status); ws.WriteField(MacroscopeConstants.ContentType); for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { string FilterPattern = CustomFilter.GetPattern(Slot).Key; if (FilterColsTable.ContainsKey(FilterPattern) || string.IsNullOrEmpty(FilterPattern)) { FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + FilterColOffset); ws.WriteField(string.Format("EMPTY{0}", Slot + 1)); } else { FilterColsTable.Add(FilterPattern, Slot + FilterColOffset); ws.WriteField(FilterPattern); } } ws.NextRecord(); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string DocUrl = msDoc.GetUrl(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string MimeType = msDoc.GetMimeType(); if (!this.CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc)) { continue; } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatStatusCodeCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetStatusCode().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(MimeType)); for (int Slot = 0; Slot < this.CustomFilter.GetSize(); Slot++) { string FilterPattern = this.CustomFilter.GetPattern(Slot: Slot).Key; KeyValuePair <string, MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern); if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED)) { string CustomFilterItemValue = MacroscopeConstants.TextPresenceLabels[Pair.Value]; this.InsertAndFormatContentCell(ws, CustomFilterItemValue); } else { this.InsertAndFormatContentCell(ws, ""); } ws.NextRecord(); } } }
/**************************************************************************/ private void BuildWorksheetKeywordTerms( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel, Dictionary <string, int> DicTerms ) { var ws = wb.Worksheets.Add(WorksheetLabel); decimal TermTotal = DicTerms.Count; decimal TermCount = 0; int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Occurrences"; iCol++; ws.Cell(iRow, iCol).Value = "Term"; iCol++; ws.Cell(iRow, iCol).Value = "URL"; } iColMax = iCol; iRow++; foreach (string Term in DicTerms.Keys) { MacroscopeDocumentList DocumentList = DocCollection.GetDeepKeywordAnalysDocumentList(Term); decimal DocTotal = ( decimal )DocumentList.CountDocuments(); decimal DocCount = 0; TermCount++; if (TermTotal > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: null, MinorPercentage: (( decimal )100 / TermTotal) * TermCount, ProgressLabelMinor: "Keywords Processed", SubMinorPercentage: -1, ProgressLabelSubMinor: null ); } foreach (MacroscopeDocument msDoc in DocumentList.IterateDocuments()) { DocCount++; if (DocTotal > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: null, MinorPercentage: -1, ProgressLabelMinor: null, SubMinorPercentage: (( decimal )100 / DocTotal) * DocCount, ProgressLabelSubMinor: "Documents Processed" ); } iCol = 1; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DicTerms[Term].ToString())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Term)); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc.GetUrl()); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageHyperlinks( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Source URL"; iCol++; ws.Cell(iRow, iCol).Value = "Target URL"; iCol++; ws.Cell(iRow, iCol).Value = "Follow"; iCol++; ws.Cell(iRow, iCol).Value = "Target"; iCol++; ws.Cell(iRow, iCol).Value = "Anchor Text"; iCol++; ws.Cell(iRow, iCol).Value = "Title"; iCol++; ws.Cell(iRow, iCol).Value = "Alt Text"; iCol++; ws.Cell(iRow, iCol).Value = "Raw Target URL"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut(); foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks()) { string HyperlinkOutUrl = HyperlinkOut.GetTargetUrl(); string DoFollow = "No Follow"; string LinkTarget = HyperlinkOut.GetLinkTarget(); string AnchorText = HyperlinkOut.GetAnchorText(); string Title = HyperlinkOut.GetTitle(); string AltText = HyperlinkOut.GetAltText(); string RawTargetUrl = HyperlinkOut.GetRawTargetUrl(); if (HyperlinkOutUrl == null) { HyperlinkOutUrl = ""; } if (HyperlinkOut.GetDoFollow()) { DoFollow = "Follow"; } iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl())) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, HyperlinkOutUrl); if ((HyperlinkOutUrl.Length > 0) && (AllowedHosts.IsInternalUrl(Url: HyperlinkOutUrl))) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else if ((HyperlinkOutUrl.Length > 0) && (AllowedHosts.IsExternalUrl(Url: HyperlinkOutUrl))) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } else { this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(HyperlinkOutUrl)); ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, DoFollow); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, LinkTarget); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AnchorText)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Title)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AltText)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, RawTargetUrl); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetHrefLangMatrix( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; Dictionary <string, string> LocalesTable = JobMaster.GetLocales(); MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); Dictionary <string, int> LocaleCols = new Dictionary <string, int> (); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Site Locale"; iCol++; ws.Cell(iRow, iCol).Value = "Title"; iCol++; foreach (string LocaleKey in LocalesTable.Keys) { DebugMsg(string.Format("EXCEL Locale: {0}", LocaleKey)); string LocaleLabel = LocaleKey.ToUpper(); string DateServerLabel = string.Format("{0} Date Server", LocaleKey.ToUpper()); string DateModifiedLabel = string.Format("{0} Date Modified", LocaleKey.ToUpper()); LocaleCols[LocaleKey] = iCol; ws.Cell(iRow, iCol).Value = LocaleLabel; iCol++; ws.Cell(iRow, iCol).Value = DateServerLabel; iCol++; ws.Cell(iRow, iCol).Value = DateModifiedLabel; iCol++; } for (int i = 1; i <= iCol; i++) { ws.Cell(iRow, i).Style.Font.SetBold(); } } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { Dictionary <string, MacroscopeHrefLang> HrefLangsTable = msDoc.GetHrefLangs(); string SiteLocale = this.FormatIfMissing(msDoc.GetLocale()); string Title = this.FormatIfMissing(msDoc.GetTitle()); string LocaleCol = msDoc.GetLocale(); iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc); iCol++; ws.Cell(iRow, iCol).Value = SiteLocale; if (SiteLocale == "MISSING") { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } iCol++; ws.Cell(iRow, iCol).Value = Title; if (Title == "MISSING") { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } iCol++; if (LocaleCol != null) { this.InsertAndFormatUrlCell(ws, iRow, LocaleCols[LocaleCol], msDoc.GetUrl()); } else { ; } foreach (string LocaleKey in LocalesTable.Keys) { if (!string.IsNullOrEmpty(LocaleKey)) { if (HrefLangsTable.ContainsKey(LocaleKey)) { MacroscopeHrefLang HrefLangAlternate = HrefLangsTable[LocaleKey]; string HrefLangUrl = HrefLangAlternate.GetUrl(); DateTime HrefLangDateServer = HrefLangAlternate.GetDateServer(); DateTime HrefLangDateModified = HrefLangAlternate.GetDateModified(); this.InsertAndFormatUrlCell(ws, iRow, LocaleCols[LocaleKey], HrefLangUrl); if (JobMaster.GetAllowedHosts().IsInternalUrl(HrefLangUrl)) { ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Blue); } this.InsertAndFormatDateCell(ws, iRow, LocaleCols[LocaleKey] + 1, HrefLangDateServer.ToString()); this.InsertAndFormatDateCell(ws, iRow, LocaleCols[LocaleKey] + 2, HrefLangDateModified.ToString()); } else { ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Red); ws.Cell(iRow, LocaleCols[LocaleKey]).Value = "NOT SPECIFIED"; ws.Cell(iRow, LocaleCols[LocaleKey] + 1).Value = ""; ws.Cell(iRow, LocaleCols[LocaleKey] + 2).Value = ""; } } } iRow++; } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax - 1); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageDuplicateTitles( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; decimal Count = 0; decimal DocCount = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); DocCount = ( decimal )DocCollection.CountDocuments(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Occurrences"; iCol++; ws.Cell(iRow, iCol).Value = "Title"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { bool Proceed = false; if (DocCount > 0) { Count++; this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", Count), MinorPercentage: (( decimal )100 / DocCount) * Count, ProgressLabelMinor: msDoc.GetUrl(), SubMinorPercentage: -1, ProgressLabelSubMinor: null ); } if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl())) { switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: Proceed = false; break; } } if (Proceed) { string Title = msDoc.GetTitle(); int Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc); if (Occurrences > 1) { iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences); if (Occurrences > 1) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Title)); iRow++; } } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageDuplicateTitles( MacroscopeJobMaster JobMaster, CsvWriter ws ) { decimal Count = 0; decimal DocCount = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); DocCount = ( decimal )DocCollection.CountDocuments(); { ws.WriteField("URL"); ws.WriteField("Occurrences"); ws.WriteField("Title"); ws.NextRecord(); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { bool Proceed = false; if (DocCount > 0) { Count++; this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", Count), MinorPercentage: (( decimal )100 / DocCount) * Count, ProgressLabelMinor: msDoc.GetUrl(), SubMinorPercentage: -1, ProgressLabelSubMinor: null ); } if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl())) { switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: Proceed = false; break; } } if (Proceed) { string Title = msDoc.GetTitle(); int Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc); if (Occurrences > 1) { this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, Occurrences); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title)); ws.NextRecord(); } } } }
/**************************************************************************/ private void BuildWorksheetPageRedirectedLinks( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Origin URL"; iCol++; ws.Cell(iRow, iCol).Value = "Destination URL"; } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(Url); int StatusCode = ( int )msDoc.GetStatusCode(); string Status = msDoc.GetStatusCode().ToString(); if ( (StatusCode >= 300) && (StatusCode <= 399) && (HyperlinksIn != null)) { foreach (MacroscopeHyperlinkIn HyperlinkIn in HyperlinksIn.IterateLinks()) { string OriginUrl = HyperlinkIn.GetSourceUrl(); if ( (OriginUrl != null) && (OriginUrl.Length > 0)) { iCol = 1; this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode.ToString()); if ((StatusCode >= 400) && (StatusCode <= 599)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Status); if ((StatusCode >= 400) && (StatusCode <= 599)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue); } iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, OriginUrl); if (AllowedHosts.IsInternalUrl(Url: OriginUrl)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (AllowedHosts.IsInternalUrl(Url: Url)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iRow++; } } } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetHrefLangMatrixUnspecified( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; Dictionary <string, string> LocalesTable = JobMaster.GetLocales(); MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); Dictionary <string, int> LocaleCols = new Dictionary <string, int> (); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Site Locale"; iCol++; ws.Cell(iRow, iCol).Value = "Title"; iCol++; foreach (string LocaleKey in LocalesTable.Keys) { DebugMsg(string.Format("EXCEL Locale: {0}", LocaleKey)); LocaleCols[LocaleKey] = iCol; ws.Cell(iRow, iCol).Value = LocaleKey; iCol++; } for (int i = 1; i <= iCol; i++) { ws.Cell(iRow, i).Style.Font.SetBold(); } } iColMax = iCol; iRow++; foreach (string Key in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Key); Dictionary <string, MacroscopeHrefLang> HrefLangsTable = msDoc.GetHrefLangs(); Boolean Proceed = false; foreach (string LocaleKey in LocalesTable.Keys) { if (!string.IsNullOrEmpty(LocaleKey)) { if (!HrefLangsTable.ContainsKey(LocaleKey)) { Proceed = true; } } } if (Proceed) { if (!string.IsNullOrEmpty(msDoc.GetLocale())) { continue; } string SiteLocale = this.FormatIfMissing(msDoc.GetLocale()); string Title = this.FormatIfMissing(msDoc.GetTitle()); string LocaleCol = msDoc.GetLocale(); iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc); iCol++; ws.Cell(iRow, iCol).Value = SiteLocale; if (SiteLocale == "MISSING") { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } iCol++; ws.Cell(iRow, iCol).Value = Title; if (Title == "MISSING") { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } iCol++; if (LocaleCol != null) { ws.Cell(iRow, LocaleCols[LocaleCol]).Value = msDoc.GetUrl(); } else { ; } foreach (string LocaleKey in LocalesTable.Keys) { if (!string.IsNullOrEmpty(LocaleKey)) { if (HrefLangsTable.ContainsKey(LocaleKey)) { MacroscopeHrefLang HrefLangAlternate = HrefLangsTable[LocaleKey]; string Value = HrefLangAlternate.GetUrl(); ws.Cell(iRow, LocaleCols[LocaleKey]).Value = Value; if (JobMaster.GetAllowedHosts().IsInternalUrl(Value)) { ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Blue); } } else { ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Red); ws.Cell(iRow, LocaleCols[LocaleKey]).Value = "NOT SPECIFIED"; } } } iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax - 1); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageDuplicateEtags( MacroscopeJobMaster JobMaster, CsvWriter ws ) { decimal CountOuter = 0; decimal CountInner = 0; decimal DocCount = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments()); Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments()); DocCount = ( decimal )DocCollection.CountDocuments(); foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Etag = msDoc.GetEtag(); if ((Etag != null) && (Etag.Length > 0)) { if (!DuplicatesDocList.ContainsKey(msDoc.GetUrl())) { DuplicatesDocList.Add(msDoc.GetUrl(), msDoc); } if (DuplicatesList.ContainsKey(Etag)) { DuplicatesList[Etag] = DuplicatesList[Etag] + 1; } else { DuplicatesList.Add(Etag, 1); } } } { ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Occurrences"); ws.WriteField("ETag"); ws.WriteField("URL"); ws.NextRecord(); } foreach (string Etag in DuplicatesList.Keys) { CountOuter++; CountInner = 0; if (DuplicatesList[Etag] > 1) { foreach (MacroscopeDocument msDoc in DuplicatesDocList.Values) { CountInner++; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: Etag, SubMinorPercentage: (( decimal )100 / DocCount) * CountInner, ProgressLabelSubMinor: msDoc.GetUrl() ); } if (msDoc.GetEtag() == Etag) { int StatusCode = ( int )msDoc.GetStatusCode(); HttpStatusCode Status = msDoc.GetStatusCode(); int Occurrences = DuplicatesList[Etag]; this.InsertAndFormatStatusCodeCell(ws, StatusCode); this.InsertAndFormatStatusCodeCell(ws, Status); this.InsertAndFormatContentCell(ws, Occurrences); this.InsertAndFormatContentCell(ws, msDoc.GetEtag()); this.InsertAndFormatUrlCell(ws, msDoc); ws.NextRecord(); } } } } }
/**************************************************************************/ private void BuildWorksheetSitemapsAudit( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel, MacroscopeDocumentList DocumentList, bool InOut ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "In Sitemap"; iCol++; ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Is Redirect"; iCol++; ws.Cell(iRow, iCol).Value = "Robots"; iCol++; ws.Cell(iRow, iCol).Value = "Sitemap"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocumentList.IterateDocuments()) { string Url = null; string Robots = null; string SitemapUrl = null; int StatusCode; if (!msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML)) { continue; } if (msDoc.GetIsExternal()) { continue; } Url = msDoc.GetUrl(); StatusCode = (int)msDoc.GetStatusCode(); Robots = msDoc.GetAllowedByRobotsAsString(); SitemapUrl = DocumentList.GetDocumentNote(msDoc: msDoc); iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, InOut.ToString()); if (InOut) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatRedirectCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatRobotsCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, SitemapUrl); if (AllowedHosts.IsInternalUrl(Url: SitemapUrl)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iRow++; } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageDuplicateChecksums( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; decimal CountOuter = 0; decimal CountInner = 0; decimal DocCount = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments()); Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments()); DocCount = ( decimal )DocCollection.CountDocuments(); foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Checksum = msDoc.GetChecksum(); if ((Checksum != null) && (Checksum.Length > 0)) { if (!DuplicatesDocList.ContainsKey(msDoc.GetUrl())) { DuplicatesDocList.Add(msDoc.GetUrl(), msDoc); } if (DuplicatesList.ContainsKey(Checksum)) { DuplicatesList[Checksum] = DuplicatesList[Checksum] + 1; } else { DuplicatesList.Add(Checksum, 1); } } } { ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Occurrences"; iCol++; ws.Cell(iRow, iCol).Value = "Checksum"; iCol++; ws.Cell(iRow, iCol).Value = "URL"; } iColMax = iCol; iRow++; foreach (string Checksum in DuplicatesList.Keys) { CountOuter++; CountInner = 0; if (DuplicatesList[Checksum] > 1) { foreach (MacroscopeDocument msDoc in DuplicatesDocList.Values) { CountInner++; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: Checksum, SubMinorPercentage: (( decimal )100 / DocCount) * CountInner, ProgressLabelSubMinor: msDoc.GetUrl() ); } if (msDoc.GetChecksum() == Checksum) { iCol = 1; int StatusCode = ( int )msDoc.GetStatusCode(); HttpStatusCode Status = msDoc.GetStatusCode(); int Occurrences = DuplicatesList[Checksum]; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode); iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetChecksum()); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); iRow++; } } } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageLinks( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("URL"); ws.WriteField("Link Type"); ws.WriteField("Source URL"); ws.WriteField("Target URL"); ws.WriteField("Follow"); ws.WriteField("Alt Text"); ws.WriteField("Raw Source URL"); ws.WriteField("Raw Target URL"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url); foreach (MacroscopeLink Link in msDoc.IterateOutlinks()) { string LinkType = Link.GetLinkType().ToString(); string SourceUrl = Link.GetSourceUrl(); string TargetUrl = Link.GetTargetUrl(); string AltText = Link.GetAltText(); string RawSourceUrl = Link.GetRawSourceUrl(); string RawTargetUrl = Link.GetRawTargetUrl(); string DoFollow = "No Follow"; if (Link.GetDoFollow()) { DoFollow = "Follow"; } if (string.IsNullOrEmpty(AltText)) { AltText = ""; } if (string.IsNullOrEmpty(RawSourceUrl)) { RawSourceUrl = ""; } if (string.IsNullOrEmpty(RawTargetUrl)) { RawTargetUrl = ""; } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(LinkType)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(SourceUrl)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(TargetUrl)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DoFollow)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(AltText)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(RawSourceUrl)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(RawTargetUrl)); ws.NextRecord(); } } }