/**************************************************************************/ protected override void RenderListView( List <ListViewItem> ListViewItems, MacroscopeDocumentCollection DocCollection, MacroscopeDocument msDoc, string Url ) { bool Proceed = false; if (msDoc.GetIsExternal()) { return; } if (msDoc.GetIsRedirect()) { return; } switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: break; } if (Proceed) { ListViewItem lvItem = null; int Occurrences = 0; string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetDescriptionLanguage(); string Description = msDoc.GetDescription(); int DescriptionLength = msDoc.GetDescriptionLength(); string PairKey = string.Join(":", UrlToDigest(Url), UrlToDigest(Description)); if (string.IsNullOrEmpty(PageLanguage)) { PageLanguage = ""; } if (string.IsNullOrEmpty(DetectedLanguage)) { DetectedLanguage = ""; } if (DescriptionLength > 0) { Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc); } else { Description = "MISSING"; } if (this.DisplayListView.Items.ContainsKey(PairKey)) { try { lvItem = this.DisplayListView.Items[PairKey]; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems[ColPageLanguage].Text = PageLanguage; lvItem.SubItems[ColDetectedLanguage].Text = DetectedLanguage; lvItem.SubItems[ColOccurences].Text = Occurrences.ToString(); lvItem.SubItems[ColDescriptionText].Text = Description; lvItem.SubItems[ColLength].Text = DescriptionLength.ToString(); } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayDescriptions 1: {0}", ex.Message)); } } else { try { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems.Add(PageLanguage); lvItem.SubItems.Add(DetectedLanguage); lvItem.SubItems.Add(Occurrences.ToString()); lvItem.SubItems.Add(Description); lvItem.SubItems.Add(DescriptionLength.ToString()); ListViewItems.Add(lvItem); } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayDescriptions 2: {0}", ex.Message)); } } if (lvItem != null) { lvItem.ForeColor = Color.Blue; // URL -------------------------------------------------------------// if (msDoc.GetIsInternal()) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } // Description Language --------------------------------------------// if (msDoc.GetIsInternal()) { lvItem.SubItems[ColPageLanguage].ForeColor = Color.Green; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Green; if (DetectedLanguage != PageLanguage) { lvItem.SubItems[ColPageLanguage].ForeColor = Color.Red; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red; } } else { lvItem.SubItems[ColPageLanguage].ForeColor = Color.Gray; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Gray; } // Check Description Length ----------------------------------------// if (msDoc.GetIsInternal()) { if (DescriptionLength < MacroscopePreferencesManager.GetDescriptionMinLen()) { lvItem.SubItems[ColUrl].ForeColor = Color.Red; lvItem.SubItems[ColOccurences].ForeColor = Color.Red; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red; lvItem.SubItems[ColDescriptionText].ForeColor = Color.Red; lvItem.SubItems[ColLength].ForeColor = Color.Red; } else if (DescriptionLength > MacroscopePreferencesManager.GetDescriptionMaxLen()) { lvItem.SubItems[ColUrl].ForeColor = Color.Red; lvItem.SubItems[ColOccurences].ForeColor = Color.Red; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red; lvItem.SubItems[ColDescriptionText].ForeColor = Color.Red; lvItem.SubItems[ColLength].ForeColor = Color.Red; } else { lvItem.SubItems[ColOccurences].ForeColor = Color.Green; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Green; lvItem.SubItems[ColDescriptionText].ForeColor = Color.Green; lvItem.SubItems[ColLength].ForeColor = Color.Green; } } else { lvItem.SubItems[ColOccurences].ForeColor = Color.Gray; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Gray; lvItem.SubItems[ColDescriptionText].ForeColor = Color.Gray; lvItem.SubItems[ColLength].ForeColor = Color.Gray; } } } }
/**************************************************************************/ private void BuildWorksheetPageDescriptions( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Page Language"; iCol++; ws.Cell(iRow, iCol).Value = "Detected Language"; iCol++; ws.Cell(iRow, iCol).Value = "Occurrences"; iCol++; ws.Cell(iRow, iCol).Value = "Description"; iCol++; ws.Cell(iRow, iCol).Value = "Description Length"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { bool Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: break; } if (Proceed) { iCol = 1; string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetTitleLanguage(); string Description = msDoc.GetDescription(); int Occurrences = 0; int DescriptionLength = msDoc.GetDescriptionLength(); if (DescriptionLength > 0) { Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc); } this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLanguage)); break; case MacroscopeConstants.DocumentType.PDF: this.InsertAndFormatContentCell(ws, iRow, iCol, PageLanguage); break; default: break; } if (PageLanguage != DetectedLanguage) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DetectedLanguage)); if (PageLanguage != DetectedLanguage) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences); if (Occurrences > 1) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Description)); if (DescriptionLength <= 0) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); ws.Cell(iRow, iCol).Value = "MISSING"; } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, DescriptionLength); if (DescriptionLength < MacroscopePreferencesManager.GetDescriptionMinLen()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else if (DescriptionLength > MacroscopePreferencesManager.GetDescriptionMaxLen()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageDescriptions( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.WriteField("URL"); ws.WriteField("Page Language"); ws.WriteField("Detected Language"); ws.WriteField("Occurrences"); ws.WriteField("Description"); ws.WriteField("Description Length"); ws.NextRecord(); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { bool Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: break; } if (Proceed) { string Description = msDoc.GetDescription(); string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetTitleLanguage(); int Occurrences = 0; int DescriptionLength = msDoc.GetDescriptionLength(); if (DescriptionLength > 0) { Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc); } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLanguage)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DetectedLanguage)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Occurrences.ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Description)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DescriptionLength.ToString())); ws.NextRecord(); } } }
/**************************************************************************/ private void BuildWorksheetPageDescriptions( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.WriteField("URL"); ws.WriteField("Page Language"); ws.WriteField("Detected Language"); ws.WriteField("Occurrences"); ws.WriteField("Description"); ws.WriteField("Description Length"); ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } if (msDoc.GetIsHtml()) { Proceed = true; } else if (msDoc.GetIsPdf()) { Proceed = true; } if (Proceed) { string Description = msDoc.GetDescription(); string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetTitleLanguage(); int Occurrences = 0; int DescriptionLength = msDoc.GetDescriptionLength(); if (DescriptionLength > 0) { Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc); } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLanguage)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DetectedLanguage)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Occurrences.ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Description)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DescriptionLength.ToString())); ws.NextRecord(); } } }