/**************************************************************************/ private void BuildWorksheetSitemapErrors( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("Sitemap URL"); ws.WriteField("Status Code"); ws.WriteField("Robots"); ws.WriteField("URL"); ws.NextRecord(); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { if (msDoc.GetIsInternal() && msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML)) { foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks()) { string TargetUrl = Outlink.GetTargetUrl(); MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl); bool InsertRow = false; if (msDocLinked.GetIsInternal()) { int StatusCode = (int)msDocLinked.GetStatusCode(); if ((StatusCode >= 400) && (StatusCode <= 599)) { InsertRow = true; } if (!msDocLinked.GetAllowedByRobots()) { InsertRow = true; } } if (InsertRow) { this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatStatusCodeCell(ws, msDoc); this.InsertAndFormatRobotsCell(ws, msDoc); this.InsertAndFormatUrlCell(ws, TargetUrl); ws.NextRecord(); } } } } }
/**************************************************************************/ private void RenderListView( MacroscopeDocumentCollection DocCollection, List <string> UrlList, MacroscopeDataExtractorCssSelectors DataExtractor ) { MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); if (DocCollection.CountDocuments() == 0) { return; } List <ListViewItem> ListViewItems = new List <ListViewItem> (); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = ( decimal )DocCollection.CountDocuments(); decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (string Url in UrlList) { MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url); string DocUrl = msDoc.GetUrl(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string MimeType = msDoc.GetMimeType(); if (!DataExtractor.CanApplyDataExtractorsToDocument(msDoc: msDoc)) { continue; } foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedCssSelectors()) { ListViewItem lvItem = null; string CssSelectorLabel = DataExtractedPair.Key; string ExtractedValue = DataExtractedPair.Value; string PairKey = null; if ( string.IsNullOrEmpty(CssSelectorLabel) || string.IsNullOrEmpty(ExtractedValue)) { continue; } PairKey = string.Join( ":", UrlToDigest(DocUrl), UrlToDigest(Macroscope.GetStringDigest(Text: CssSelectorLabel)), UrlToDigest(Macroscope.GetStringDigest(Text: ExtractedValue)) ); if (this.DisplayListView.Items.ContainsKey(PairKey)) { lvItem = this.DisplayListView.Items[PairKey]; } else { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; for (int i = 0; i < 6; i++) { lvItem.SubItems.Add(""); } ListViewItems.Add(lvItem); } if (lvItem != null) { try { lvItem.SubItems[ColUrl].Text = DocUrl; lvItem.SubItems[ColStatusCode].Text = StatusCode; lvItem.SubItems[ColStatus].Text = Status; lvItem.SubItems[ColMimeType].Text = MimeType; lvItem.SubItems[ColCssSelectorLabel].Text = CssSelectorLabel; lvItem.SubItems[ColExtractedValue].Text = ExtractedValue; } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors: {0}", ex.Message)); DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors: {0}", ex.StackTrace)); } } else { DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors MISSING: {0}", PairKey)); } if (msDoc.GetIsInternal()) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if (Regex.IsMatch(StatusCode, "^[2]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Green; lvItem.SubItems[ColStatus].ForeColor = Color.Green; } else if (Regex.IsMatch(StatusCode, "^[3]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod; lvItem.SubItems[ColStatus].ForeColor = Color.Goldenrod; } else if (Regex.IsMatch(StatusCode, "^[45]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Red; lvItem.SubItems[ColStatus].ForeColor = Color.Red; } else { lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue; lvItem.SubItems[ColStatus].ForeColor = Color.Blue; } } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = (( decimal )100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent); this.DisplayListView.Columns[ColUrl].Width = 300; this.DisplayListView.Columns[ColStatusCode].Width = 100; this.DisplayListView.Columns[ColStatus].Width = 100; this.DisplayListView.Columns[ColMimeType].Width = 100; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ private void RenderListViewSearchTargetUrls( List <ListViewItem> ListViewItems, MacroscopeDocument msDoc, string Url, string UrlFragment ) { MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut(); MacroscopeDocumentCollection DocCollection = this.MainForm.GetJobMaster().GetDocCollection(); foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks()) { string UrlTarget = HyperlinkOut.GetTargetUrl(); HttpStatusCode StatusCode = HttpStatusCode.NotFound; string StatusCodeText = "Not crawled"; string StatusText = "Not crawled"; string PairKey = string.Join(":", UrlToDigest(Url: Url), UrlToDigest(Url: UrlTarget)).ToString(); string LinkTarget = HyperlinkOut.GetLinkTarget(); string LinkText = HyperlinkOut.GetAnchorText(); string LinkTitle = HyperlinkOut.GetTitle(); string AltText = HyperlinkOut.GetAltText(); string LinkTextLabel = LinkText; string LinkTitleLabel = LinkTitle; string AltTextLabel = AltText; string DoFollow = "No Follow"; try { if (DocCollection.ContainsDocument(Url: HyperlinkOut.GetTargetUrl())) { StatusCode = DocCollection.GetDocumentByUrl(Url: HyperlinkOut.GetTargetUrl()).GetStatusCode(); StatusCodeText = ((int)StatusCode).ToString(); StatusText = StatusCode.ToString(); } else { DebugMsg("Not in DocCollection"); } } catch (Exception ex) { this.DebugMsg(ex.Message); } if (HyperlinkOut.GetDoFollow()) { DoFollow = "Follow"; } if (LinkText.Length == 0) { LinkTextLabel = "MISSING"; } if (LinkTitle.Length == 0) { LinkTitleLabel = "MISSING"; } if (AltText.Length == 0) { AltTextLabel = "MISSING"; } if ( (UrlTarget != null) && (UrlTarget.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0)) { ListViewItem lvItem = null; if (this.DisplayListView.Items.ContainsKey(PairKey)) { try { lvItem = this.DisplayListView.Items[PairKey]; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems[ColUrlTarget].Text = UrlTarget; lvItem.SubItems[ColStatusCode].Text = StatusCodeText; lvItem.SubItems[ColStatus].Text = StatusText; lvItem.SubItems[ColDoFollow].Text = DoFollow; lvItem.SubItems[ColLinkTarget].Text = LinkTarget; lvItem.SubItems[ColLinkAnchorTextLabel].Text = LinkTextLabel; lvItem.SubItems[ColLinkTitleLabel].Text = LinkTitleLabel; lvItem.SubItems[ColAltTextLabel].Text = AltTextLabel; } catch (Exception ex) { this.DebugMsg(string.Format("MacroscopeDisplayLinks 1: {0}", ex.Message)); } } else { try { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems.Add(UrlTarget); lvItem.SubItems.Add(StatusCodeText); lvItem.SubItems.Add(StatusText); lvItem.SubItems.Add(DoFollow); lvItem.SubItems.Add(LinkTarget); lvItem.SubItems.Add(LinkTextLabel); lvItem.SubItems.Add(LinkTitleLabel); lvItem.SubItems.Add(AltTextLabel); ListViewItems.Add(lvItem); } catch (Exception ex) { this.DebugMsg(string.Format("MacroscopeDisplayLinks 2: {0}", ex.Message)); } } if (lvItem != null) { for (int i = 0; i < lvItem.SubItems.Count; i++) { lvItem.SubItems[i].ForeColor = Color.Blue; } if (AllowedHosts.IsAllowedFromUrl(Url)) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if (AllowedHosts.IsAllowedFromUrl(UrlTarget)) { lvItem.SubItems[ColUrlTarget].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrlTarget].ForeColor = Color.Gray; } if (AllowedHosts.IsAllowedFromUrl(Url)) { if (HyperlinkOut.GetDoFollow()) { lvItem.SubItems[ColDoFollow].ForeColor = Color.Green; } else { lvItem.SubItems[ColDoFollow].ForeColor = Color.Red; } } else { lvItem.SubItems[ColDoFollow].ForeColor = Color.Gray; } if (LinkText.Length == 0) { lvItem.SubItems[ColLinkAnchorTextLabel].ForeColor = Color.Gray; } if (LinkTitle.Length == 0) { lvItem.SubItems[ColLinkTitleLabel].ForeColor = Color.Gray; } if (AltText.Length == 0) { lvItem.SubItems[ColAltTextLabel].ForeColor = Color.Gray; } if ( (LinkText.Length == 0) && (LinkTitle.Length == 0) && (AltText.Length == 0)) { lvItem.SubItems[ColLinkAnchorTextLabel].ForeColor = Color.Red; lvItem.SubItems[ColLinkTitleLabel].ForeColor = Color.Red; lvItem.SubItems[ColAltTextLabel].ForeColor = Color.Red; } } } } }
/**************************************************************************/ private void RenderTreeView( MacroscopeDocumentCollection DocCollection, List <string> UrlList ) { if (DocCollection.CountDocuments() == 0) { return; } if (UrlList.Count == 0) { return; } MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = ( decimal )UrlList.Count; decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } this.tvTreeView.BeginUpdate(); DebugMsg(string.Format("HIERARCHY: {0}", "BASE")); foreach (string Url in UrlList) { MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url); if (msDoc != null) { this.RenderTreeView(msDoc, Url); } Count++; MajorPercentage = (( decimal )100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } this.tvTreeView.ExpandAll(); this.tvTreeView.EndUpdate(); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ private void RenderListView( MacroscopeDocumentCollection DocCollection, List <string> UrlList, MacroscopeCustomFilters CustomFilter ) { if (this.FilterColOffset == -1) { throw (new Exception("this.FilterColOffset invalid")); } if (DocCollection.CountDocuments() == 0) { return; } MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); Dictionary <string, int> FilterColsTable = new Dictionary <string, int>(CustomFilter.GetSize()); List <ListViewItem> ListViewItems = new List <ListViewItem>(); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)DocCollection.CountDocuments(); decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { string FilterPattern = CustomFilter.GetPattern(Slot).Key; if (FilterColsTable.ContainsKey(FilterPattern)) { FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + 1); } else { FilterColsTable.Add(FilterPattern, Slot + 1); } } foreach (string Url in UrlList) { MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url); ListViewItem lvItem = null; string DocUrl; string PairKey; string StatusCode; string Status; string MimeType; if (msDoc == null) { continue; } else { DocUrl = msDoc.GetUrl(); PairKey = DocUrl; StatusCode = ((int)msDoc.GetStatusCode()).ToString(); Status = msDoc.GetStatusCode().ToString(); MimeType = msDoc.GetMimeType(); } if (!CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc)) { continue; } if (this.DisplayListView.Items.ContainsKey(PairKey)) { lvItem = this.DisplayListView.Items[PairKey]; } else { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { lvItem.SubItems.Add(""); } ListViewItems.Add(lvItem); } if (lvItem != null) { try { lvItem.SubItems[ColUrl].Text = DocUrl; lvItem.SubItems[ColStatusCode].Text = StatusCode; lvItem.SubItems[ColStatus].Text = Status; lvItem.SubItems[ColMimeType].Text = MimeType; for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { string FilterPattern = CustomFilter.GetPattern(Slot: Slot).Key; KeyValuePair <string, MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern); int ColOffset = this.FilterColOffset + FilterColsTable[FilterPattern]; if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED)) { lvItem.SubItems[ColOffset].Text = MacroscopeConstants.TextPresenceLabels[Pair.Value]; switch (Pair.Value) { case MacroscopeConstants.TextPresence.CONTAINS_STRING: lvItem.SubItems[ColOffset].ForeColor = Color.Green; break; case MacroscopeConstants.TextPresence.NOT_CONTAINS_STRING: lvItem.SubItems[ColOffset].ForeColor = Color.Green; break; case MacroscopeConstants.TextPresence.MUST_CONTAIN_STRING: lvItem.SubItems[ColOffset].ForeColor = Color.Red; break; case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_STRING: lvItem.SubItems[ColOffset].ForeColor = Color.Red; break; case MacroscopeConstants.TextPresence.CONTAINS_REGEX: lvItem.SubItems[ColOffset].ForeColor = Color.Green; break; case MacroscopeConstants.TextPresence.NOT_CONTAINS_REGEX: lvItem.SubItems[ColOffset].ForeColor = Color.Green; break; case MacroscopeConstants.TextPresence.MUST_CONTAIN_REGEX: lvItem.SubItems[ColOffset].ForeColor = Color.Red; break; case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_REGEX: lvItem.SubItems[ColOffset].ForeColor = Color.Red; break; default: lvItem.SubItems[ColOffset].ForeColor = Color.Gray; break; } } } } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.Message)); DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.StackTrace)); } } else { DebugMsg(string.Format("MacroscopeDisplayCustomFilters MISSING: {0}", PairKey)); } if (msDoc.GetIsInternal()) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if (Regex.IsMatch(StatusCode, "^[2]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Green; lvItem.SubItems[ColStatus].ForeColor = Color.Green; } else if (Regex.IsMatch(StatusCode, "^[3]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod; lvItem.SubItems[ColStatus].ForeColor = Color.Goldenrod; } else if (Regex.IsMatch(StatusCode, "^[45]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Red; lvItem.SubItems[ColStatus].ForeColor = Color.Red; } else { lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue; lvItem.SubItems[ColStatus].ForeColor = Color.Blue; } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = ((decimal)100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize); this.DisplayListView.Columns[ColUrl].Width = 300; this.DisplayListView.Columns[ColStatusCode].Width = 100; this.DisplayListView.Columns[ColStatus].Width = 100; this.DisplayListView.Columns[ColMimeType].Width = 100; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/** Render List ***********************************************************/ public void RenderListView( MacroscopeDocumentCollection DocCollection, List <string> UrlList ) { if (DocCollection.CountDocuments() == 0) { return; } if (UrlList.Count == 0) { return; } List <ListViewItem> ListViewItems = new List <ListViewItem>(UrlList.Count); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)UrlList.Count; decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.ControlBox = false; ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } lock (this.DisplayListViewLock) { foreach (string Url in UrlList) { Application.DoEvents(); MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url); if (msDoc != null) { this.RenderListView( ListViewItems: ListViewItems, DocCollection: DocCollection, msDoc: msDoc, Url: Url ); } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = ((decimal)100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ private void BuildWorksheetSitemapXmlErrors( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Sitemap URL"; iCol++; ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Robots"; iCol++; ws.Cell(iRow, iCol).Value = "URL"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { if (msDoc.GetIsInternal() && msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML)) { foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks()) { string TargetUrl = Outlink.GetTargetUrl(); MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl); bool InsertRow = false; if (msDocLinked.GetIsInternal()) { int StatusCode = (int)msDocLinked.GetStatusCode(); if ((StatusCode >= 400) && (StatusCode <= 599)) { InsertRow = true; } if (!msDocLinked.GetAllowedByRobots()) { InsertRow = true; } } if (InsertRow) { iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl())) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatRobotsCell(ws, iRow, iCol, msDoc); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, TargetUrl); if (AllowedHosts.IsInternalUrl(Url: TargetUrl)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iRow++; } } } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageDuplicatePages( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; decimal DocCount = 0; decimal DocListCount = 0; decimal CountOuter = 0; decimal CountInner = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, bool> CrossCheckList; CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList( Capacity: DocCollection.CountDocuments() ); DocCount = ( decimal )DocCollection.CountDocuments(); { ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Origin URL"; iCol++; ws.Cell(iRow, iCol).Value = "Distance"; iCol++; ws.Cell(iRow, iCol).Value = "Similar URL"; } iColMax = iCol; iRow++; foreach (string UrlLeft in DocCollection.DocumentUrls()) { MacroscopeDocument msDocLeft = DocCollection.GetDocumentByUrl(Url: UrlLeft); MacroscopeLevenshteinAnalysis LevenshteinAnalysis = null; CountOuter++; CountInner = 0; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: UrlLeft, SubMinorPercentage: 0, ProgressLabelSubMinor: "" ); } if (msDocLeft.GetIsExternal()) { continue; } if (!msDocLeft.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML)) { continue; } LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis( msDoc: msDocLeft, SizeDifference: MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference(), Threshold: MacroscopePreferencesManager.GetMaxLevenshteinDistance(), CrossCheckList: CrossCheckList, IPercentageDone: this ); Dictionary <MacroscopeDocument, int> DocList; DocList = LevenshteinAnalysis.AnalyzeDocCollection( DocCollection: DocCollection ); DocListCount = ( decimal )DocList.Count; foreach (MacroscopeDocument msDocDuplicate in DocList.Keys) { int StatusCode = ( int )msDocLeft.GetStatusCode(); HttpStatusCode Status = msDocLeft.GetStatusCode(); string UrlDuplicate = msDocDuplicate.GetUrl(); int Distance = DocList[msDocDuplicate]; CountInner++; iCol = 1; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: UrlLeft, SubMinorPercentage: (( decimal )100 / DocListCount) * CountInner, ProgressLabelSubMinor: UrlDuplicate ); } this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode); iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, UrlLeft); if (AllowedHosts.IsInternalUrl(Url: UrlLeft)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Distance.ToString()); if (Distance <= MacroscopePreferencesManager.GetMaxLevenshteinDistance()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, UrlDuplicate); if (AllowedHosts.IsInternalUrl(Url: UrlDuplicate)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iRow++; if (this.ProgressForm.Cancelled()) { break; } } if (this.ProgressForm.Cancelled()) { break; } //Thread.Yield(); } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ protected override void RenderListView( List <ListViewItem> ListViewItems, MacroscopeDocumentCollection DocCollection, MacroscopeDocument msDoc, string Url ) { MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); foreach (MacroscopeLink Link in msDoc.IterateOutlinks()) { ListViewItem lvItem = null; string LinkType = Link.GetLinkType().ToString(); string UrlTarget = Link.GetTargetUrl(); HttpStatusCode StatusCode = HttpStatusCode.NotFound; string StatusCodeText = "Not crawled"; string StatusText = "Not crawled"; string PairKey = string.Join(":", UrlToDigest(Url: Url), UrlToDigest(Url: UrlTarget)); string DoFollow = "No Follow"; string AltText = Link.GetAltText(); string AltTextLabel = AltText; string RawSourceUrl = Link.GetRawSourceUrl(); string RawTargetUrl = Link.GetRawTargetUrl(); try { if (DocCollection.ContainsDocument(Url: Link.GetTargetUrl())) { StatusCode = DocCollection.GetDocumentByUrl(Url: Link.GetTargetUrl()).GetStatusCode(); StatusCodeText = ((int)StatusCode).ToString(); StatusText = StatusCode.ToString(); } } catch (Exception ex) { this.DebugMsg(ex.Message); } if (Link.GetDoFollow()) { DoFollow = "Follow"; } if (string.IsNullOrEmpty(AltText)) { AltTextLabel = ""; } if (string.IsNullOrEmpty(RawSourceUrl)) { RawSourceUrl = ""; } if (string.IsNullOrEmpty(RawTargetUrl)) { RawTargetUrl = ""; } if (this.DisplayListView.Items.ContainsKey(PairKey)) { try { lvItem = this.DisplayListView.Items[PairKey]; lvItem.SubItems[ColType].Text = LinkType; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems[ColUrlTarget].Text = UrlTarget; lvItem.SubItems[ColStatusCode].Text = StatusCodeText; lvItem.SubItems[ColStatus].Text = StatusText; lvItem.SubItems[ColDoFollow].Text = DoFollow; lvItem.SubItems[ColAltTextLabel].Text = AltTextLabel; lvItem.SubItems[ColRawSourceUrl].Text = RawSourceUrl; lvItem.SubItems[ColRawTargetUrl].Text = RawTargetUrl; } catch (Exception ex) { this.DebugMsg(string.Format("MacroscopeDisplayLinks 1: {0}", ex.Message)); } } else { try { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems[ColType].Text = LinkType; lvItem.SubItems.Add(Url); lvItem.SubItems.Add(UrlTarget); lvItem.SubItems.Add(StatusCodeText); lvItem.SubItems.Add(StatusText); lvItem.SubItems.Add(DoFollow); lvItem.SubItems.Add(AltTextLabel); lvItem.SubItems.Add(RawSourceUrl); lvItem.SubItems.Add(RawTargetUrl); ListViewItems.Add(lvItem); } catch (Exception ex) { this.DebugMsg(string.Format("MacroscopeDisplayLinks 2: {0}", ex.Message)); } } if (lvItem != null) { for (int i = 0; i < lvItem.SubItems.Count; i++) { lvItem.SubItems[i].ForeColor = Color.Blue; } if (AllowedHosts.IsAllowedFromUrl(Url)) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if (AllowedHosts.IsAllowedFromUrl(UrlTarget)) { lvItem.SubItems[ColUrlTarget].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrlTarget].ForeColor = Color.Gray; } if (AllowedHosts.IsAllowedFromUrl(UrlTarget)) { if (Link.GetDoFollow()) { lvItem.SubItems[ColDoFollow].ForeColor = Color.Green; } else { lvItem.SubItems[ColDoFollow].ForeColor = Color.Red; } } else { lvItem.SubItems[ColDoFollow].ForeColor = Color.Gray; } } } }
/**************************************************************************/ private void BuildWorksheetPageDuplicatePages( MacroscopeJobMaster JobMaster, CsvWriter ws ) { decimal DocCount = 0; decimal DocListCount = 0; decimal CountOuter = 0; decimal CountInner = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, bool> CrossCheckList; CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList( Capacity: DocCollection.CountDocuments() ); DocCount = ( decimal )DocCollection.CountDocuments(); { ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Origin URL"); ws.WriteField("Distance"); ws.WriteField("Similar URL"); ws.NextRecord(); } foreach (string UrlLeft in DocCollection.DocumentUrls()) { MacroscopeDocument msDocLeft = DocCollection.GetDocumentByUrl(Url: UrlLeft); MacroscopeLevenshteinAnalysis LevenshteinAnalysis = null; CountOuter++; CountInner = 0; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: UrlLeft, SubMinorPercentage: 0, ProgressLabelSubMinor: "" ); } if (msDocLeft.GetIsExternal()) { continue; } if (!msDocLeft.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML)) { continue; } LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis( msDoc: msDocLeft, SizeDifference: MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference(), Threshold: MacroscopePreferencesManager.GetMaxLevenshteinDistance(), CrossCheckList: CrossCheckList, IPercentageDone: this ); Dictionary <MacroscopeDocument, int> DocList; DocList = LevenshteinAnalysis.AnalyzeDocCollection( DocCollection: DocCollection ); DocListCount = ( decimal )DocList.Count; foreach (MacroscopeDocument msDocDuplicate in DocList.Keys) { int StatusCode = ( int )msDocLeft.GetStatusCode(); HttpStatusCode Status = msDocLeft.GetStatusCode(); string UrlDuplicate = msDocDuplicate.GetUrl(); int Distance = DocList[msDocDuplicate]; CountInner++; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: UrlLeft, SubMinorPercentage: (( decimal )100 / DocListCount) * CountInner, ProgressLabelSubMinor: UrlDuplicate ); } this.InsertAndFormatStatusCodeCell(ws, StatusCode); this.InsertAndFormatStatusCodeCell(ws, Status); this.InsertAndFormatUrlCell(ws, UrlLeft); this.InsertAndFormatContentCell(ws, Distance.ToString()); this.InsertAndFormatUrlCell(ws, UrlDuplicate); ws.NextRecord(); if (this.ProgressForm.Cancelled()) { break; } } if (this.ProgressForm.Cancelled()) { break; } //Thread.Yield(); } }
/**************************************************************************/ private void RenderListViewSitemapErrors(MacroscopeDocumentCollection DocCollection) { List <ListViewItem> ListViewItems = new List <ListViewItem>(1); List <Dictionary <string, string> > CompiledTable = DocCollection.GetSitemapErrorsAsTable(); foreach (Dictionary <string, string> Entry in CompiledTable) { string SitemapUrl = Entry["sitemap_url"]; string StatusCode = Entry["status_code"]; string Robots = Entry["robots"]; string TargetUrl = Entry["target_url"]; string PairKey = string.Join("::::::::", SitemapUrl, TargetUrl); MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: SitemapUrl); MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl); ListViewItem lvItem = null; if (this.DisplayListView.Items.ContainsKey(PairKey)) { try { lvItem = this.DisplayListView.Items[PairKey]; lvItem.SubItems[0].Text = SitemapUrl; lvItem.SubItems[1].Text = StatusCode; lvItem.SubItems[2].Text = Robots; lvItem.SubItems[3].Text = TargetUrl; } catch (Exception ex) { DebugMsg(string.Format("RenderListViewSitemapErrors 1: {0}", ex.Message)); } } else { try { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems[0].Text = SitemapUrl; lvItem.SubItems.Add(StatusCode); lvItem.SubItems.Add(Robots); lvItem.SubItems.Add(TargetUrl); ListViewItems.Add(lvItem); } catch (Exception ex) { DebugMsg(string.Format("RenderListViewSitemapErrors 2: {0}", ex.Message)); } } if (lvItem != null) { lvItem.ForeColor = Color.Blue; if (msDoc.GetIsInternal()) { lvItem.SubItems[0].ForeColor = Color.Green; } else { lvItem.SubItems[0].ForeColor = Color.Gray; } if (!msDocLinked.GetAllowedByRobots()) { lvItem.SubItems[2].ForeColor = Color.Red; } else { lvItem.SubItems[2].ForeColor = Color.Green; } if (msDocLinked.GetIsInternal()) { lvItem.SubItems[3].ForeColor = Color.Green; } } else { lvItem.SubItems[3].ForeColor = Color.Gray; } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); return; }