/**************************************************************************/ private void BuildWorksheetXpaths( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField(MacroscopeConstants.Url); ws.WriteField(MacroscopeConstants.StatusCode); ws.WriteField(MacroscopeConstants.Status); ws.WriteField(MacroscopeConstants.ContentType); ws.WriteField("Extracted Label"); ws.WriteField("Extracted Value"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); string DocUrl = msDoc.GetUrl(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string MimeType = msDoc.GetMimeType(); if (!this.DataExtractorXpaths.CanApplyDataExtractorsToDocument(msDoc: msDoc)) { continue; } foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedXpaths()) { string ExtractedLabel = DataExtractedPair.Key; string ExtractedValue = DataExtractedPair.Value; if ( string.IsNullOrEmpty(ExtractedLabel) || string.IsNullOrEmpty(ExtractedValue)) { continue; } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatStatusCodeCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Status)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(MimeType)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ExtractedLabel)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ExtractedValue)); ws.NextRecord(); } } }
/**************************************************************************/ private void BuildWorksheetPageRedirectsAudit( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("Origin URL"); ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Destination URL"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url); if (!msDoc.GetIsRedirect()) { continue; } string OriginURL = msDoc.GetUrlRedirectFrom(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string DestinationURL = msDoc.GetUrlRedirectTo(); if (string.IsNullOrEmpty(OriginURL)) { continue; } if (string.IsNullOrEmpty(DestinationURL)) { continue; } this.InsertAndFormatUrlCell(ws, OriginURL); this.InsertAndFormatContentCell(ws, StatusCode); this.InsertAndFormatContentCell(ws, Status); this.InsertAndFormatUrlCell(ws, DestinationURL); ws.NextRecord(); } }
/**************************************************************************/ private void BuildWorksheetPageGoodLinks( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Origin URL"); ws.WriteField("Destination URL"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(Url); int StatusCode = ( int )msDoc.GetStatusCode(); string Status = msDoc.GetStatusCode().ToString(); if ( (StatusCode >= 200) && (StatusCode <= 299) && (HyperlinksIn != null)) { foreach (MacroscopeHyperlinkIn HyperlinkIn in HyperlinksIn.IterateLinks()) { string OriginUrl = HyperlinkIn.GetSourceUrl(); if ( (OriginUrl != null) && (OriginUrl.Length > 0)) { this.InsertAndFormatContentCell(ws, StatusCode.ToString()); this.InsertAndFormatContentCell(ws, Status); this.InsertAndFormatUrlCell(ws, OriginUrl); this.InsertAndFormatUrlCell(ws, msDoc); ws.NextRecord(); } } } } }
/**************************************************************************/ private void BuildWorksheetPageUriAnalysis( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("URL"); ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Occurrences"); ws.WriteField("Checksum"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string Checksum = msDoc.GetChecksum(); int Count = DocCollection.GetStatsChecksumCount(Checksum: Checksum); this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, StatusCode); this.InsertAndFormatContentCell(ws, Status); this.InsertAndFormatContentCell(ws, Count.ToString()); this.InsertAndFormatContentCell(ws, Checksum); ws.NextRecord(); } }
/**************************************************************************/ private void BuildWorksheetErrors( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("URL"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(Url); int StatusCode = ( int )msDoc.GetStatusCode(); string Status = msDoc.GetStatusCode().ToString(); if ( (StatusCode >= 400) && (StatusCode <= 599)) { this.InsertAndFormatContentCell(ws, StatusCode.ToString()); this.InsertAndFormatContentCell(ws, Status); this.InsertAndFormatUrlCell(ws, Url); ws.NextRecord(); } } }
/**************************************************************************/ private void BuildWorksheetPageRedirectsAudit( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Origin URL"; iCol++; ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Destination URL"; } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); if (!msDoc.GetIsRedirect()) { continue; } string OriginURL = msDoc.GetUrlRedirectFrom(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string DestinationURL = msDoc.GetUrlRedirectTo(); if (OriginURL == null) { continue; } if (DestinationURL == null) { continue; } iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, OriginURL); if (AllowedHosts.IsInternalUrl(Url: OriginURL)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Status); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, DestinationURL); if (AllowedHosts.IsInternalUrl(Url: DestinationURL)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iRow++; } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetHrefLangMatrixUnspecified( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; Dictionary <string, string> LocalesTable = JobMaster.GetLocales(); MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); Dictionary <string, int> LocaleCols = new Dictionary <string, int> (); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Site Locale"; iCol++; ws.Cell(iRow, iCol).Value = "Title"; iCol++; foreach (string LocaleKey in LocalesTable.Keys) { DebugMsg(string.Format("EXCEL Locale: {0}", LocaleKey)); LocaleCols[LocaleKey] = iCol; ws.Cell(iRow, iCol).Value = LocaleKey; iCol++; } for (int i = 1; i <= iCol; i++) { ws.Cell(iRow, i).Style.Font.SetBold(); } } iColMax = iCol; iRow++; foreach (string Key in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Key); Dictionary <string, MacroscopeHrefLang> HrefLangsTable = msDoc.GetHrefLangs(); Boolean Proceed = false; foreach (string LocaleKey in LocalesTable.Keys) { if (!string.IsNullOrEmpty(LocaleKey)) { if (!HrefLangsTable.ContainsKey(LocaleKey)) { Proceed = true; } } } if (Proceed) { if (!string.IsNullOrEmpty(msDoc.GetLocale())) { continue; } string SiteLocale = this.FormatIfMissing(msDoc.GetLocale()); string Title = this.FormatIfMissing(msDoc.GetTitle()); string LocaleCol = msDoc.GetLocale(); iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc); iCol++; ws.Cell(iRow, iCol).Value = SiteLocale; if (SiteLocale == "MISSING") { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } iCol++; ws.Cell(iRow, iCol).Value = Title; if (Title == "MISSING") { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } iCol++; if (LocaleCol != null) { ws.Cell(iRow, LocaleCols[LocaleCol]).Value = msDoc.GetUrl(); } else { ; } foreach (string LocaleKey in LocalesTable.Keys) { if (!string.IsNullOrEmpty(LocaleKey)) { if (HrefLangsTable.ContainsKey(LocaleKey)) { MacroscopeHrefLang HrefLangAlternate = HrefLangsTable[LocaleKey]; string Value = HrefLangAlternate.GetUrl(); ws.Cell(iRow, LocaleCols[LocaleKey]).Value = Value; if (JobMaster.GetAllowedHosts().IsInternalUrl(Value)) { ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Blue); } } else { ws.Cell(iRow, LocaleCols[LocaleKey]).Style.Font.SetFontColor(XLColor.Red); ws.Cell(iRow, LocaleCols[LocaleKey]).Value = "NOT SPECIFIED"; } } } iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax - 1); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageKeywords( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.WriteField("URL"); ws.WriteField("Occurrences"); ws.WriteField("Keywords"); ws.WriteField("Keywords Length"); ws.WriteField("Number of Keywords"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } if (msDoc.GetIsHtml()) { Proceed = true; } else if (msDoc.GetIsPdf()) { Proceed = true; } if (Proceed) { string Keywords = msDoc.GetKeywords(); int Occurrences = 0; int KeywordsLength = msDoc.GetKeywordsLength(); int KeywordsNumber = msDoc.GetKeywordsCount(); if (KeywordsLength > 0) { Occurrences = DocCollection.GetStatsKeywordsCount(msDoc); } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Occurrences.ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Keywords)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(KeywordsLength.ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(KeywordsNumber.ToString())); ws.NextRecord(); } } }
/**************************************************************************/ private void BuildWorksheetPageDuplicateTitles( MacroscopeJobMaster JobMaster, CsvWriter ws ) { decimal Count = 0; decimal DocCount = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); DocCount = ( decimal )DocCollection.CountDocuments(); { ws.WriteField("URL"); ws.WriteField("Occurrences"); ws.WriteField("Title"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (DocCount > 0) { Count++; this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", Count), MinorPercentage: (( decimal )100 / DocCount) * Count, ProgressLabelMinor: Url, SubMinorPercentage: -1, ProgressLabelSubMinor: null ); } if (AllowedHosts.IsInternalUrl(Url: Url)) { if (msDoc.GetIsHtml()) { Proceed = true; } else if (msDoc.GetIsPdf()) { Proceed = true; } else { Proceed = false; } } if (Proceed) { string Title = msDoc.GetTitle(); int Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc); if (Occurrences > 1) { this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, Occurrences); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title)); ws.NextRecord(); } } } }
/**************************************************************************/ private void BuildWorksheetPageDuplicatePages( MacroscopeJobMaster JobMaster, CsvWriter ws ) { decimal DocCount = 0; decimal DocListCount = 0; decimal CountOuter = 0; decimal CountInner = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, Boolean> CrossCheckList; CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList( Capacity: DocCollection.CountDocuments() ); DocCount = ( decimal )DocCollection.CountDocuments(); { ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Origin URL"); ws.WriteField("Distance"); ws.WriteField("Similar URL"); ws.NextRecord(); } foreach (string UrlLeft in DocCollection.DocumentKeys()) { MacroscopeDocument msDocLeft = DocCollection.GetDocument(UrlLeft); MacroscopeLevenshteinAnalysis LevenshteinAnalysis = null; CountOuter++; CountInner = 0; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: UrlLeft, SubMinorPercentage: 0, ProgressLabelSubMinor: "" ); } if (msDocLeft.GetIsExternal()) { continue; } if (!msDocLeft.GetIsHtml()) { continue; } LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis( msDoc: msDocLeft, SizeDifference: MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference(), Threshold: MacroscopePreferencesManager.GetMaxLevenshteinDistance(), CrossCheckList: CrossCheckList, IPercentageDone: this ); Dictionary <MacroscopeDocument, int> DocList; DocList = LevenshteinAnalysis.AnalyzeDocCollection( DocCollection: DocCollection ); DocListCount = ( decimal )DocList.Count; foreach (MacroscopeDocument msDocDuplicate in DocList.Keys) { int StatusCode = ( int )msDocLeft.GetStatusCode(); HttpStatusCode Status = msDocLeft.GetStatusCode(); string UrlDuplicate = msDocDuplicate.GetUrl(); int Distance = DocList[msDocDuplicate]; CountInner++; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: UrlLeft, SubMinorPercentage: (( decimal )100 / DocListCount) * CountInner, ProgressLabelSubMinor: UrlDuplicate ); } this.InsertAndFormatStatusCodeCell(ws, StatusCode); this.InsertAndFormatStatusCodeCell(ws, Status); this.InsertAndFormatUrlCell(ws, UrlLeft); this.InsertAndFormatContentCell(ws, Distance.ToString()); this.InsertAndFormatUrlCell(ws, UrlDuplicate); ws.NextRecord(); if (this.ProgressForm.Cancelled()) { break; } } if (this.ProgressForm.Cancelled()) { break; } Thread.Yield(); } }
/**************************************************************************/ private void BuildWorksheetPageHeadings( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.WriteField("URL"); ws.WriteField("Occurences"); ws.WriteField("Order"); for (int i = 1; i <= 6; i++) { ws.WriteField(string.Format("H{0}", i)); } ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } if (msDoc.GetIsHtml()) { Proceed = true; } if (Proceed) { for (ushort HeadingLevel = 1; HeadingLevel <= MacroscopePreferencesManager.GetMaxHeadingDepth(); HeadingLevel++) { List <string> HeadingsList = msDoc.GetHeadings(HeadingLevel); for (int Order = 0; Order < HeadingsList.Count; Order++) { int Occurences = DocCollection.GetStatsHeadingsCount(HeadingLevel: HeadingLevel, Text: HeadingsList[Order]); this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, Occurences.ToString()); this.InsertAndFormatContentCell(ws, this.FormatIfMissing((Order + 1).ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(HeadingsList[Order])); ws.NextRecord(); } } } } }
/**************************************************************************/ private void BuildWorksheetOverview( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.WriteField("URL"); ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Redirect"); ws.WriteField("Duration"); ws.WriteField("Crawled Date"); ws.WriteField("Server Date"); ws.WriteField("Modified Date"); ws.WriteField("Expires Date"); ws.WriteField("Content-Type"); ws.WriteField("Locale"); ws.WriteField("Language"); ws.WriteField("Canonical"); ws.WriteField("Page Depth"); ws.WriteField("Links In"); ws.WriteField("Links Out"); ws.WriteField("Hyperlinks In"); ws.WriteField("Hyperlinks Out"); ws.WriteField("Title"); ws.WriteField("Title Length"); ws.WriteField("Description"); ws.WriteField("Description Length"); ws.WriteField("Error Condition"); ws.NextRecord(); } foreach (string Key in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Key); this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatStatusCodeCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetStatusCode().ToString())); this.InsertAndFormatRedirectCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDurationInSecondsFormatted())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetCrawledDate())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateServer())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateModified())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateExpires())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetMimeType())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetLocale())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetIsoLanguageCode())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetCanonical())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDepth().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountInlinks().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountOutlinks().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountHyperlinksIn().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.CountHyperlinksOut().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetTitle())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetTitleLength().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDescription())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDescriptionLength().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetErrorCondition())); ws.NextRecord(); } }
/**************************************************************************/ private void BuildWorksheetPageKeywords( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Occurrences"; iCol++; ws.Cell(iRow, iCol).Value = "Keywords"; iCol++; ws.Cell(iRow, iCol).Value = "Keywords Length"; iCol++; ws.Cell(iRow, iCol).Value = "Number of Keywords"; } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } if (msDoc.GetIsHtml()) { Proceed = true; } else if (msDoc.GetIsPdf()) { Proceed = true; } if (Proceed) { iCol = 1; string Keywords = msDoc.GetKeywords(); int Occurrences = 0; int KeywordsLength = msDoc.GetKeywordsLength(); int KeywordsNumber = msDoc.GetKeywordsCount(); if (KeywordsLength > 0) { Occurrences = DocCollection.GetStatsKeywordsCount(msDoc); } this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Keywords)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, KeywordsLength); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, KeywordsNumber); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageHyperlinks( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("Source URL"); ws.WriteField("Target URL"); ws.WriteField("Follow"); ws.WriteField("Target"); ws.WriteField("Link Text"); ws.WriteField("Title Text"); ws.WriteField("Alt Text"); ws.WriteField("Raw Target URL"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url); MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut(); foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks()) { string HyperlinkOutUrl = HyperlinkOut.GetTargetUrl(); string DoFollow = "No Follow"; string LinkTarget = HyperlinkOut.GetLinkTarget(); string LinkText = HyperlinkOut.GetLinkText(); string LinkTitle = HyperlinkOut.GetLinkTitle(); string AltText = HyperlinkOut.GetAltText(); string RawTargetUrl = HyperlinkOut.GetRawTargetUrl(); if (string.IsNullOrEmpty(HyperlinkOutUrl)) { HyperlinkOutUrl = ""; } if (HyperlinkOut.GetDoFollow()) { DoFollow = "Follow"; } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatUrlCell(ws, HyperlinkOutUrl); this.InsertAndFormatContentCell(ws, DoFollow); this.InsertAndFormatContentCell(ws, LinkTarget); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(LinkText)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(LinkTitle)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(AltText)); this.InsertAndFormatContentCell(ws, RawTargetUrl); ws.NextRecord(); } } }
/**************************************************************************/ private void BuildWorksheetCustomFilter( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, int> FilterColsTable = new Dictionary <string, int> (CustomFilter.GetSize()); const int FilterColOffset = 3; { ws.WriteField(MacroscopeConstants.Url); ws.WriteField(MacroscopeConstants.StatusCode); ws.WriteField(MacroscopeConstants.Status); ws.WriteField(MacroscopeConstants.ContentType); for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { string FilterPattern = CustomFilter.GetPattern(Slot).Key; if (FilterColsTable.ContainsKey(FilterPattern) || string.IsNullOrEmpty(FilterPattern)) { FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + FilterColOffset); ws.WriteField(string.Format("EMPTY{0}", Slot + 1)); } else { FilterColsTable.Add(FilterPattern, Slot + FilterColOffset); ws.WriteField(FilterPattern); } } ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); string DocUrl = msDoc.GetUrl(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string MimeType = msDoc.GetMimeType(); if (!this.CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc)) { continue; } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatStatusCodeCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetStatusCode().ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(MimeType)); for (int Slot = 0; Slot < this.CustomFilter.GetSize(); Slot++) { string FilterPattern = this.CustomFilter.GetPattern(Slot: Slot).Key; KeyValuePair <string, MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern); if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED)) { string CustomFilterItemValue = MacroscopeConstants.TextPresenceLabels[Pair.Value]; this.InsertAndFormatContentCell(ws, CustomFilterItemValue); } else { this.InsertAndFormatContentCell(ws, ""); } ws.NextRecord(); } } }
/**************************************************************************/ private void BuildWorksheetCustomFilter( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, int> FilterColsTable = new Dictionary <string, int> (CustomFilter.GetSize()); const int FilterColOffset = 4; { ws.Cell(iRow, iCol).Value = MacroscopeConstants.Url; iCol++; ws.Cell(iRow, iCol).Value = MacroscopeConstants.StatusCode; iCol++; ws.Cell(iRow, iCol).Value = MacroscopeConstants.Status; iCol++; ws.Cell(iRow, iCol).Value = MacroscopeConstants.ContentType; for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { string FilterPattern = CustomFilter.GetPattern(Slot).Key; iCol++; if (FilterColsTable.ContainsKey(FilterPattern) || string.IsNullOrEmpty(FilterPattern)) { FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + FilterColOffset); ws.Cell(iRow, iCol).Value = string.Format("EMPTY{0}", Slot + 1); } else { FilterColsTable.Add(FilterPattern, Slot + FilterColOffset); ws.Cell(iRow, iCol).Value = FilterPattern; } } } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); string DocUrl = msDoc.GetUrl(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string MimeType = msDoc.GetMimeType(); if (!CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc)) { continue; } iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetStatusCode().ToString())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(MimeType)); iCol++; for (int Slot = 0; Slot < this.CustomFilter.GetSize(); Slot++) { string FilterPattern = this.CustomFilter.GetPattern(Slot: Slot).Key; KeyValuePair <string, MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern); if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED)) { string CustomFilterItemValue = MacroscopeConstants.TextPresenceLabels[Pair.Value]; this.InsertAndFormatContentCell(ws, iRow, iCol, CustomFilterItemValue); switch (Pair.Value) { case MacroscopeConstants.TextPresence.CONTAINS: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); break; case MacroscopeConstants.TextPresence.NOTCONTAINS: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); break; case MacroscopeConstants.TextPresence.MUSTCONTAIN: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); break; case MacroscopeConstants.TextPresence.SHOULDNOTCONTAIN: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); break; default: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); break; } } else { this.InsertAndFormatContentCell(ws, iRow, iCol, ""); ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; } iRow++; } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageTitles( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.WriteField("URL"); ws.WriteField("Page Language"); ws.WriteField("Detected Language"); ws.WriteField("Occurrences"); ws.WriteField("Title"); ws.WriteField("Title Length"); ws.WriteField("Pixel Width"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } if (msDoc.GetIsHtml()) { Proceed = true; } else if (msDoc.GetIsPdf()) { Proceed = true; } if (Proceed) { string Title = msDoc.GetTitle(); string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetTitleLanguage(); int Occurrences = 0; int TitleLength = msDoc.GetTitleLength(); int TitlePixelWidth = msDoc.GetTitlePixelWidth(); if (TitleLength > 0) { Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc); } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLanguage)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DetectedLanguage)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Occurrences.ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(TitleLength.ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(TitlePixelWidth.ToString())); ws.NextRecord(); } } }
/**************************************************************************/ private void BuildWorksheetOverview( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Redirect"; iCol++; ws.Cell(iRow, iCol).Value = "Duration"; iCol++; ws.Cell(iRow, iCol).Value = "Crawled Date"; iCol++; ws.Cell(iRow, iCol).Value = "Server Date"; iCol++; ws.Cell(iRow, iCol).Value = "Modified Date"; iCol++; ws.Cell(iRow, iCol).Value = "Expires Date"; iCol++; ws.Cell(iRow, iCol).Value = "Content-Type"; iCol++; ws.Cell(iRow, iCol).Value = "Locale"; iCol++; ws.Cell(iRow, iCol).Value = "Language"; iCol++; ws.Cell(iRow, iCol).Value = "Canonical"; iCol++; ws.Cell(iRow, iCol).Value = "Page Depth"; iCol++; ws.Cell(iRow, iCol).Value = "Links In"; iCol++; ws.Cell(iRow, iCol).Value = "Links Out"; iCol++; ws.Cell(iRow, iCol).Value = "Hyperlinks In"; iCol++; ws.Cell(iRow, iCol).Value = "Hyperlinks Out"; iCol++; ws.Cell(iRow, iCol).Value = "Title"; iCol++; ws.Cell(iRow, iCol).Value = "Title Length"; iCol++; ws.Cell(iRow, iCol).Value = "Description"; iCol++; ws.Cell(iRow, iCol).Value = "Description Length"; iCol++; ws.Cell(iRow, iCol).Value = "Error Condition"; for (int i = 1; i <= iCol; i++) { ws.Cell(iRow, i).Style.Font.SetBold(); } } iColMax = iCol; iRow++; foreach (string sKey in DocCollection.DocumentKeys()) { iCol = 1; MacroscopeDocument msDoc = DocCollection.GetDocument(sKey); this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetStatusCode().ToString())); iCol++; this.InsertAndFormatRedirectCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDurationInSecondsFormatted()); iCol++; this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetCrawledDate()); iCol++; this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateServer()); iCol++; this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateModified()); iCol++; this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateExpires()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetMimeType())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetLocale())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetIsoLanguageCode())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetCanonical())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDepth().ToString()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountInlinks()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountOutlinks()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountHyperlinksIn()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountHyperlinksOut()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetTitle())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetTitleLength().ToString()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetDescription())); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDescriptionLength()); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetErrorCondition())); iRow++; } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageText( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Page Locale"; iCol++; ws.Cell(iRow, iCol).Value = "Page Language"; iCol++; ws.Cell(iRow, iCol).Value = "Detected Language"; iCol++; ws.Cell(iRow, iCol).Value = "Word Count"; iCol++; ws.Cell(iRow, iCol).Value = "Readability Method"; iCol++; ws.Cell(iRow, iCol).Value = "Readability Grade"; iCol++; ws.Cell(iRow, iCol).Value = "Readability Grade Description"; } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } if (msDoc.GetIsHtml()) { Proceed = true; } else if (msDoc.GetIsPdf()) { Proceed = true; } if (Proceed) { iCol = 1; string PageLocale = msDoc.GetLocale(); string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetDocumentTextLanguage(); int WordCount = msDoc.GetWordCount(); string ReadabilityGradeType = MacroscopeAnalyzeReadability.FormatAnalyzeReadabilityMethod( ReadabilityMethod: msDoc.GetReadabilityGradeMethod() ); string ReadabilityGrade = msDoc.GetReadabilityGrade().ToString("00.00"); string ReadabilityGradeDescription = msDoc.GetReadabilityGradeDescription(); if (string.IsNullOrEmpty(PageLocale)) { PageLocale = ""; } if (string.IsNullOrEmpty(PageLanguage)) { PageLanguage = ""; } if (string.IsNullOrEmpty(DetectedLanguage)) { DetectedLanguage = ""; } this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLocale)); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLanguage)); if (PageLanguage != DetectedLanguage) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DetectedLanguage)); if (PageLanguage != DetectedLanguage) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, WordCount); if (msDoc.GetIsInternal()) { if (WordCount > 0) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, ReadabilityGradeType); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, ReadabilityGrade); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, ReadabilityGradeDescription); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageDescriptions( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Page Language"; iCol++; ws.Cell(iRow, iCol).Value = "Detected Language"; iCol++; ws.Cell(iRow, iCol).Value = "Occurrences"; iCol++; ws.Cell(iRow, iCol).Value = "Description"; iCol++; ws.Cell(iRow, iCol).Value = "Description Length"; } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } if (msDoc.GetIsHtml()) { Proceed = true; } else if (msDoc.GetIsPdf()) { Proceed = true; } if (Proceed) { iCol = 1; string Description = msDoc.GetDescription(); string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetTitleLanguage(); int Occurrences = 0; int DescriptionLength = msDoc.GetDescriptionLength(); if (DescriptionLength > 0) { Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc); } this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLanguage)); if (PageLanguage != DetectedLanguage) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DetectedLanguage)); if (PageLanguage != DetectedLanguage) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences); if (Occurrences > 1) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Description)); if (DescriptionLength <= 0) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); ws.Cell(iRow, iCol).Value = "MISSING"; } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, DescriptionLength); if (DescriptionLength < MacroscopePreferencesManager.GetDescriptionMinLen()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else if (DescriptionLength > MacroscopePreferencesManager.GetDescriptionMaxLen()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetCssSelectors( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = MacroscopeConstants.Url; iCol++; ws.Cell(iRow, iCol).Value = MacroscopeConstants.StatusCode; iCol++; ws.Cell(iRow, iCol).Value = MacroscopeConstants.Status; iCol++; ws.Cell(iRow, iCol).Value = MacroscopeConstants.ContentType; iCol++; ws.Cell(iRow, iCol).Value = "Extracted Label"; iCol++; ws.Cell(iRow, iCol).Value = "Extracted Value"; } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); string DocUrl = msDoc.GetUrl(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string MimeType = msDoc.GetMimeType(); if (!this.DataExtractorCssSelectors.CanApplyDataExtractorsToDocument(msDoc: msDoc)) { continue; } foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedCssSelectors()) { string ExtractedLabel = DataExtractedPair.Key; string ExtractedValue = DataExtractedPair.Value; if ( string.IsNullOrEmpty(ExtractedLabel) || string.IsNullOrEmpty(ExtractedValue)) { continue; } iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Status)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(MimeType)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(ExtractedLabel)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(ExtractedValue)); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageDuplicateEtags( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; decimal CountOuter = 0; decimal CountInner = 0; decimal DocCount = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments()); Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments()); DocCount = ( decimal )DocCollection.CountDocuments(); foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); string Etag = msDoc.GetEtag(); if ((Etag != null) && (Etag.Length > 0)) { if (!DuplicatesDocList.ContainsKey(Url)) { DuplicatesDocList.Add(Url, msDoc); } if (DuplicatesList.ContainsKey(Etag)) { DuplicatesList[Etag] = DuplicatesList[Etag] + 1; } else { DuplicatesList.Add(Etag, 1); } } } { ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Occurrences"; iCol++; ws.Cell(iRow, iCol).Value = "ETag"; iCol++; ws.Cell(iRow, iCol).Value = "URL"; } iColMax = iCol; iRow++; foreach (string Etag in DuplicatesList.Keys) { CountOuter++; CountInner = 0; if (DuplicatesList[Etag] > 1) { foreach (MacroscopeDocument msDoc in DuplicatesDocList.Values) { CountInner++; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: Etag, SubMinorPercentage: (( decimal )100 / DocCount) * CountInner, ProgressLabelSubMinor: msDoc.GetUrl() ); } if (msDoc.GetEtag() == Etag) { iCol = 1; int StatusCode = ( int )msDoc.GetStatusCode(); HttpStatusCode Status = msDoc.GetStatusCode(); int Occurrences = DuplicatesList[Etag]; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode); iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetEtag()); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); iRow++; } } } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageLinks( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Link Type"; iCol++; ws.Cell(iRow, iCol).Value = "Source URL"; iCol++; ws.Cell(iRow, iCol).Value = "Target URL"; iCol++; ws.Cell(iRow, iCol).Value = "Follow"; iCol++; ws.Cell(iRow, iCol).Value = "Alt Text"; iCol++; ws.Cell(iRow, iCol).Value = "Raw Source URL"; iCol++; ws.Cell(iRow, iCol).Value = "Raw Target URL"; } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); foreach (MacroscopeLink Link in msDoc.IterateOutlinks()) { string LinkType = Link.GetLinkType().ToString(); string SourceUrl = Link.GetSourceUrl(); string TargetUrl = Link.GetTargetUrl(); string AltText = Link.GetAltText(); string RawSourceUrl = Link.GetRawSourceUrl(); string RawTargetUrl = Link.GetRawTargetUrl(); string DoFollow = "No Follow"; if (Link.GetDoFollow()) { DoFollow = "Follow"; } if (string.IsNullOrEmpty(AltText)) { AltText = ""; } if (string.IsNullOrEmpty(RawSourceUrl)) { RawSourceUrl = ""; } if (string.IsNullOrEmpty(RawTargetUrl)) { RawTargetUrl = ""; } iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (AllowedHosts.IsInternalUrl(Url: Url)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(LinkType)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(SourceUrl)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(TargetUrl)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DoFollow)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AltText)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(RawSourceUrl)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(RawTargetUrl)); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageDuplicatePages( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; decimal DocCount = 0; decimal DocListCount = 0; decimal CountOuter = 0; decimal CountInner = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, Boolean> CrossCheckList; CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList( Capacity: DocCollection.CountDocuments() ); DocCount = ( decimal )DocCollection.CountDocuments(); { ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Origin URL"; iCol++; ws.Cell(iRow, iCol).Value = "Distance"; iCol++; ws.Cell(iRow, iCol).Value = "Similar URL"; } iColMax = iCol; iRow++; foreach (string UrlLeft in DocCollection.DocumentKeys()) { MacroscopeDocument msDocLeft = DocCollection.GetDocument(UrlLeft); MacroscopeLevenshteinAnalysis LevenshteinAnalysis = null; CountOuter++; CountInner = 0; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: UrlLeft, SubMinorPercentage: 0, ProgressLabelSubMinor: "" ); } if (msDocLeft.GetIsExternal()) { continue; } if (!msDocLeft.GetIsHtml()) { continue; } LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis( msDoc: msDocLeft, SizeDifference: MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference(), Threshold: MacroscopePreferencesManager.GetMaxLevenshteinDistance(), CrossCheckList: CrossCheckList, IPercentageDone: this ); Dictionary <MacroscopeDocument, int> DocList; DocList = LevenshteinAnalysis.AnalyzeDocCollection( DocCollection: DocCollection ); DocListCount = ( decimal )DocList.Count; foreach (MacroscopeDocument msDocDuplicate in DocList.Keys) { int StatusCode = ( int )msDocLeft.GetStatusCode(); HttpStatusCode Status = msDocLeft.GetStatusCode(); string UrlDuplicate = msDocDuplicate.GetUrl(); int Distance = DocList[msDocDuplicate]; CountInner++; iCol = 1; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: UrlLeft, SubMinorPercentage: (( decimal )100 / DocListCount) * CountInner, ProgressLabelSubMinor: UrlDuplicate ); } this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode); iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, UrlLeft); if (AllowedHosts.IsInternalUrl(Url: UrlLeft)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Distance.ToString()); if (Distance <= MacroscopePreferencesManager.GetMaxLevenshteinDistance()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, UrlDuplicate); if (AllowedHosts.IsInternalUrl(Url: UrlDuplicate)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iRow++; if (this.ProgressForm.Cancelled()) { break; } } if (this.ProgressForm.Cancelled()) { break; } Thread.Yield(); } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageLinks( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("URL"); ws.WriteField("Link Type"); ws.WriteField("Source URL"); ws.WriteField("Target URL"); ws.WriteField("Follow"); ws.WriteField("Alt Text"); ws.WriteField("Raw Source URL"); ws.WriteField("Raw Target URL"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url); foreach (MacroscopeLink Link in msDoc.IterateOutlinks()) { string LinkType = Link.GetLinkType().ToString(); string SourceUrl = Link.GetSourceUrl(); string TargetUrl = Link.GetTargetUrl(); string AltText = Link.GetAltText(); string RawSourceUrl = Link.GetRawSourceUrl(); string RawTargetUrl = Link.GetRawTargetUrl(); string DoFollow = "No Follow"; if (Link.GetDoFollow()) { DoFollow = "Follow"; } if (string.IsNullOrEmpty(AltText)) { AltText = ""; } if (string.IsNullOrEmpty(RawSourceUrl)) { RawSourceUrl = ""; } if (string.IsNullOrEmpty(RawTargetUrl)) { RawTargetUrl = ""; } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(LinkType)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(SourceUrl)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(TargetUrl)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DoFollow)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(AltText)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(RawSourceUrl)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(RawTargetUrl)); ws.NextRecord(); } } }
/**************************************************************************/ private void BuildWorksheetPageText( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.WriteField("URL"); ws.WriteField("Page Locale"); ws.WriteField("Page Language"); ws.WriteField("Detected Language"); ws.WriteField("Word Count"); ws.WriteField("Readability Method"); ws.WriteField("Readability Grade"); ws.WriteField("Readability Grade Description"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } if (msDoc.GetIsHtml()) { Proceed = true; } else if (msDoc.GetIsPdf()) { Proceed = true; } if (Proceed) { string PageLocale = msDoc.GetLocale(); string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetDocumentTextLanguage(); int WordCount = msDoc.GetWordCount(); string ReadabilityGradeType = MacroscopeAnalyzeReadability.FormatAnalyzeReadabilityMethod( ReadabilityMethod: msDoc.GetReadabilityGradeMethod() ); string ReadabilityGrade = msDoc.GetReadabilityGrade().ToString("00.00"); string ReadabilityGradeDescription = msDoc.GetReadabilityGradeDescription(); if (string.IsNullOrEmpty(PageLocale)) { PageLocale = ""; } if (string.IsNullOrEmpty(PageLanguage)) { PageLanguage = ""; } if (string.IsNullOrEmpty(DetectedLanguage)) { DetectedLanguage = ""; } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLocale)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLanguage)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DetectedLanguage)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(WordCount.ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ReadabilityGradeType)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ReadabilityGrade)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ReadabilityGradeDescription)); ws.NextRecord(); } } }
/**************************************************************************/ private void BuildWorksheetPageDuplicateEtags( MacroscopeJobMaster JobMaster, CsvWriter ws ) { decimal CountOuter = 0; decimal CountInner = 0; decimal DocCount = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments()); Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments()); DocCount = ( decimal )DocCollection.CountDocuments(); foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); string Etag = msDoc.GetEtag(); if ((Etag != null) && (Etag.Length > 0)) { if (!DuplicatesDocList.ContainsKey(Url)) { DuplicatesDocList.Add(Url, msDoc); } if (DuplicatesList.ContainsKey(Etag)) { DuplicatesList[Etag] = DuplicatesList[Etag] + 1; } else { DuplicatesList.Add(Etag, 1); } } } { ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Occurrences"); ws.WriteField("ETag"); ws.WriteField("URL"); ws.NextRecord(); } foreach (string Etag in DuplicatesList.Keys) { CountOuter++; CountInner = 0; if (DuplicatesList[Etag] > 1) { foreach (MacroscopeDocument msDoc in DuplicatesDocList.Values) { CountInner++; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: Etag, SubMinorPercentage: (( decimal )100 / DocCount) * CountInner, ProgressLabelSubMinor: msDoc.GetUrl() ); } if (msDoc.GetEtag() == Etag) { int StatusCode = ( int )msDoc.GetStatusCode(); HttpStatusCode Status = msDoc.GetStatusCode(); int Occurrences = DuplicatesList[Etag]; this.InsertAndFormatStatusCodeCell(ws, StatusCode); this.InsertAndFormatStatusCodeCell(ws, Status); this.InsertAndFormatContentCell(ws, Occurrences); this.InsertAndFormatContentCell(ws, msDoc.GetEtag()); this.InsertAndFormatUrlCell(ws, msDoc); ws.NextRecord(); } } } } }
/**************************************************************************/ private void BuildWorksheetPageUriAnalysis( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Occurrences"; iCol++; ws.Cell(iRow, iCol).Value = "Checksum"; } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string Checksum = msDoc.GetChecksum(); int Count = DocCollection.GetStatsChecksumCount(Checksum: Checksum); iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (AllowedHosts.IsInternalUrl(Url: Url)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Status); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Count); if (Count > 1) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Checksum); if (Count > 1) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue); } iRow++; } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageRedirectedLinks( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Origin URL"; iCol++; ws.Cell(iRow, iCol).Value = "Destination URL"; } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(Url); int StatusCode = ( int )msDoc.GetStatusCode(); string Status = msDoc.GetStatusCode().ToString(); if ( (StatusCode >= 300) && (StatusCode <= 399) && (HyperlinksIn != null)) { foreach (MacroscopeHyperlinkIn HyperlinkIn in HyperlinksIn.IterateLinks()) { string OriginUrl = HyperlinkIn.GetSourceUrl(); if ( (OriginUrl != null) && (OriginUrl.Length > 0)) { iCol = 1; this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode.ToString()); if ((StatusCode >= 400) && (StatusCode <= 599)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Status); if ((StatusCode >= 400) && (StatusCode <= 599)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue); } iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, OriginUrl); if (AllowedHosts.IsInternalUrl(Url: OriginUrl)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (AllowedHosts.IsInternalUrl(Url: Url)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iRow++; } } } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetMissingLanguageSpecifier( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Site Locale"; iCol++; ws.Cell(iRow, iCol).Value = "Title"; for (int i = 1; i <= iCol; i++) { ws.Cell(iRow, i).Style.Font.SetBold(); } } iColMax = iCol; iRow++; foreach (string sKey in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(sKey); string SiteLocale = msDoc.GetLocale(); if ( AllowedHosts.IsAllowedFromUrl(msDoc.GetUrl()) && string.IsNullOrEmpty(SiteLocale)) { string SiteLocaleFormatted = this.FormatIfMissing(SiteLocale); string Title = this.FormatIfMissing(msDoc.GetTitle()); iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, SiteLocaleFormatted); if (SiteLocaleFormatted == "MISSING") { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Title); if (Title == "MISSING") { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }