/**************************************************************************/ private void RenderListViewSearchTargetUrls( List <ListViewItem> ListViewItems, MacroscopeDocument msDoc, string Url, string UrlFragment ) { MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut(); MacroscopeDocumentCollection DocCollection = this.MainForm.GetJobMaster().GetDocCollection(); foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks()) { string UrlTarget = HyperlinkOut.GetTargetUrl(); HttpStatusCode StatusCode = HttpStatusCode.NotFound; string StatusCodeText = "Not crawled"; string StatusText = "Not crawled"; string PairKey = string.Join(":", UrlToDigest(Url: Url), UrlToDigest(Url: UrlTarget)).ToString(); string LinkTarget = HyperlinkOut.GetLinkTarget(); string LinkText = HyperlinkOut.GetAnchorText(); string LinkTitle = HyperlinkOut.GetTitle(); string AltText = HyperlinkOut.GetAltText(); string LinkTextLabel = LinkText; string LinkTitleLabel = LinkTitle; string AltTextLabel = AltText; string DoFollow = "No Follow"; try { if (DocCollection.ContainsDocument(Url: HyperlinkOut.GetTargetUrl())) { StatusCode = DocCollection.GetDocumentByUrl(Url: HyperlinkOut.GetTargetUrl()).GetStatusCode(); StatusCodeText = ((int)StatusCode).ToString(); StatusText = StatusCode.ToString(); } else { DebugMsg("Not in DocCollection"); } } catch (Exception ex) { this.DebugMsg(ex.Message); } if (HyperlinkOut.GetDoFollow()) { DoFollow = "Follow"; } if (LinkText.Length == 0) { LinkTextLabel = "MISSING"; } if (LinkTitle.Length == 0) { LinkTitleLabel = "MISSING"; } if (AltText.Length == 0) { AltTextLabel = "MISSING"; } if ( (UrlTarget != null) && (UrlTarget.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0)) { ListViewItem lvItem = null; if (this.DisplayListView.Items.ContainsKey(PairKey)) { try { lvItem = this.DisplayListView.Items[PairKey]; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems[ColUrlTarget].Text = UrlTarget; lvItem.SubItems[ColStatusCode].Text = StatusCodeText; lvItem.SubItems[ColStatus].Text = StatusText; lvItem.SubItems[ColDoFollow].Text = DoFollow; lvItem.SubItems[ColLinkTarget].Text = LinkTarget; lvItem.SubItems[ColLinkAnchorTextLabel].Text = LinkTextLabel; lvItem.SubItems[ColLinkTitleLabel].Text = LinkTitleLabel; lvItem.SubItems[ColAltTextLabel].Text = AltTextLabel; } catch (Exception ex) { this.DebugMsg(string.Format("MacroscopeDisplayLinks 1: {0}", ex.Message)); } } else { try { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems.Add(UrlTarget); lvItem.SubItems.Add(StatusCodeText); lvItem.SubItems.Add(StatusText); lvItem.SubItems.Add(DoFollow); lvItem.SubItems.Add(LinkTarget); lvItem.SubItems.Add(LinkTextLabel); lvItem.SubItems.Add(LinkTitleLabel); lvItem.SubItems.Add(AltTextLabel); ListViewItems.Add(lvItem); } catch (Exception ex) { this.DebugMsg(string.Format("MacroscopeDisplayLinks 2: {0}", ex.Message)); } } if (lvItem != null) { for (int i = 0; i < lvItem.SubItems.Count; i++) { lvItem.SubItems[i].ForeColor = Color.Blue; } if (AllowedHosts.IsAllowedFromUrl(Url)) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if (AllowedHosts.IsAllowedFromUrl(UrlTarget)) { lvItem.SubItems[ColUrlTarget].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrlTarget].ForeColor = Color.Gray; } if (AllowedHosts.IsAllowedFromUrl(Url)) { if (HyperlinkOut.GetDoFollow()) { lvItem.SubItems[ColDoFollow].ForeColor = Color.Green; } else { lvItem.SubItems[ColDoFollow].ForeColor = Color.Red; } } else { lvItem.SubItems[ColDoFollow].ForeColor = Color.Gray; } if (LinkText.Length == 0) { lvItem.SubItems[ColLinkAnchorTextLabel].ForeColor = Color.Gray; } if (LinkTitle.Length == 0) { lvItem.SubItems[ColLinkTitleLabel].ForeColor = Color.Gray; } if (AltText.Length == 0) { lvItem.SubItems[ColAltTextLabel].ForeColor = Color.Gray; } if ( (LinkText.Length == 0) && (LinkTitle.Length == 0) && (AltText.Length == 0)) { lvItem.SubItems[ColLinkAnchorTextLabel].ForeColor = Color.Red; lvItem.SubItems[ColLinkTitleLabel].ForeColor = Color.Red; lvItem.SubItems[ColAltTextLabel].ForeColor = Color.Red; } } } } }
/**************************************************************************/ protected override void RenderListView( List <ListViewItem> ListViewItems, MacroscopeDocumentCollection DocCollection, MacroscopeDocument msDoc, string Url ) { MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut(); foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks()) { ListViewItem lvItem = null; string UrlTarget = HyperlinkOut.GetTargetUrl(); string PairKey = string.Join(":", UrlToDigest(Url), UrlToDigest(UrlTarget)); string LinkTarget = HyperlinkOut.GetLinkTarget(); string LinkText = HyperlinkOut.GetLinkText(); string LinkTitle = HyperlinkOut.GetLinkTitle(); string AltText = HyperlinkOut.GetAltText(); string LinkTextLabel = LinkText; string LinkTitleLabel = LinkTitle; string AltTextLabel = AltText; string RawTargetUrl = HyperlinkOut.GetRawTargetUrl(); string DoFollow = "No Follow"; if (HyperlinkOut.GetDoFollow()) { DoFollow = "Follow"; } if (LinkText.Length == 0) { LinkTextLabel = "MISSING"; } if (LinkTitle.Length == 0) { LinkTitleLabel = "MISSING"; } if (AltText.Length == 0) { AltTextLabel = "MISSING"; } if (this.DisplayListView.Items.ContainsKey(PairKey)) { try { lvItem = this.DisplayListView.Items[PairKey]; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems[ColUrlTarget].Text = UrlTarget; lvItem.SubItems[ColDoFollow].Text = DoFollow; lvItem.SubItems[ColLinkTarget].Text = LinkTarget; lvItem.SubItems[ColLinkTextLabel].Text = LinkTextLabel; lvItem.SubItems[ColLinkTitleLabel].Text = LinkTitleLabel; lvItem.SubItems[ColAltTextLabel].Text = AltTextLabel; lvItem.SubItems[ColRawTargetUrl].Text = RawTargetUrl; } catch (Exception ex) { this.DebugMsg(string.Format("MacroscopeDisplayLinks 1: {0}", ex.Message)); } } else { try { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems.Add(UrlTarget); lvItem.SubItems.Add(DoFollow); lvItem.SubItems.Add(LinkTarget); lvItem.SubItems.Add(LinkTextLabel); lvItem.SubItems.Add(LinkTitleLabel); lvItem.SubItems.Add(AltTextLabel); lvItem.SubItems.Add(RawTargetUrl); ListViewItems.Add(lvItem); } catch (Exception ex) { this.DebugMsg(string.Format("MacroscopeDisplayLinks 2: {0}", ex.Message)); } } if (lvItem != null) { for (int i = 0; i < lvItem.SubItems.Count; i++) { lvItem.SubItems[i].ForeColor = Color.Blue; } if (AllowedHosts.IsAllowedFromUrl(Url)) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if (AllowedHosts.IsAllowedFromUrl(UrlTarget)) { lvItem.SubItems[ColUrlTarget].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrlTarget].ForeColor = Color.Gray; } if (AllowedHosts.IsAllowedFromUrl(Url)) { if (HyperlinkOut.GetDoFollow()) { lvItem.SubItems[ColDoFollow].ForeColor = Color.Green; } else { lvItem.SubItems[ColDoFollow].ForeColor = Color.Red; } } else { lvItem.SubItems[ColDoFollow].ForeColor = Color.Gray; } if (LinkText.Length == 0) { lvItem.SubItems[ColLinkTextLabel].ForeColor = Color.Gray; } if (LinkTitle.Length == 0) { lvItem.SubItems[ColLinkTitleLabel].ForeColor = Color.Gray; } if (AltText.Length == 0) { lvItem.SubItems[ColAltTextLabel].ForeColor = Color.Gray; } if ( (LinkText.Length == 0) && (LinkTitle.Length == 0) && (AltText.Length == 0)) { lvItem.SubItems[ColLinkTextLabel].ForeColor = Color.Red; lvItem.SubItems[ColLinkTitleLabel].ForeColor = Color.Red; lvItem.SubItems[ColAltTextLabel].ForeColor = Color.Red; } } } }
/**************************************************************************/ private void BuildWorksheetPageHyperlinks( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Source URL"; iCol++; ws.Cell(iRow, iCol).Value = "Target URL"; iCol++; ws.Cell(iRow, iCol).Value = "Follow"; iCol++; ws.Cell(iRow, iCol).Value = "Target"; iCol++; ws.Cell(iRow, iCol).Value = "Anchor Text"; iCol++; ws.Cell(iRow, iCol).Value = "Title"; iCol++; ws.Cell(iRow, iCol).Value = "Alt Text"; iCol++; ws.Cell(iRow, iCol).Value = "Raw Target URL"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut(); foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks()) { string HyperlinkOutUrl = HyperlinkOut.GetTargetUrl(); string DoFollow = "No Follow"; string LinkTarget = HyperlinkOut.GetLinkTarget(); string AnchorText = HyperlinkOut.GetAnchorText(); string Title = HyperlinkOut.GetTitle(); string AltText = HyperlinkOut.GetAltText(); string RawTargetUrl = HyperlinkOut.GetRawTargetUrl(); if (HyperlinkOutUrl == null) { HyperlinkOutUrl = ""; } if (HyperlinkOut.GetDoFollow()) { DoFollow = "Follow"; } iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl())) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, HyperlinkOutUrl); if ((HyperlinkOutUrl.Length > 0) && (AllowedHosts.IsInternalUrl(Url: HyperlinkOutUrl))) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else if ((HyperlinkOutUrl.Length > 0) && (AllowedHosts.IsExternalUrl(Url: HyperlinkOutUrl))) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } else { this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(HyperlinkOutUrl)); ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, DoFollow); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, LinkTarget); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AnchorText)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Title)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AltText)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, RawTargetUrl); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageHyperlinks( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("Source URL"); ws.WriteField("Target URL"); ws.WriteField("Follow"); ws.WriteField("Target"); ws.WriteField("Anchor Text"); ws.WriteField("Title"); ws.WriteField("Alt Text"); ws.WriteField("Raw Target URL"); ws.NextRecord(); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut(); foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks()) { string HyperlinkOutUrl = HyperlinkOut.GetTargetUrl(); string DoFollow = "No Follow"; string LinkTarget = HyperlinkOut.GetLinkTarget(); string AnchorText = HyperlinkOut.GetAnchorText(); string Title = HyperlinkOut.GetTitle(); string AltText = HyperlinkOut.GetAltText(); string RawTargetUrl = HyperlinkOut.GetRawTargetUrl(); if (string.IsNullOrEmpty(HyperlinkOutUrl)) { HyperlinkOutUrl = ""; } if (HyperlinkOut.GetDoFollow()) { DoFollow = "Follow"; } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatUrlCell(ws, HyperlinkOutUrl); this.InsertAndFormatContentCell(ws, DoFollow); this.InsertAndFormatContentCell(ws, LinkTarget); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(AnchorText)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(AltText)); this.InsertAndFormatContentCell(ws, RawTargetUrl); ws.NextRecord(); } } }