/**************************************************************************/ protected override void RenderListView( List <ListViewItem> ListViewItems, MacroscopeDocumentCollection DocCollection, MacroscopeDocument msDoc, string Url ) { MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut(); foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks()) { ListViewItem lvItem = null; string UrlTarget = HyperlinkOut.GetTargetUrl(); HttpStatusCode StatusCode = HttpStatusCode.NotFound; string StatusCodeText = "Not crawled"; string StatusText = "Not crawled"; string PairKey = string.Join(":", UrlToDigest(Url), UrlToDigest(UrlTarget)); string LinkTarget = HyperlinkOut.GetLinkTarget(); string LinkText = HyperlinkOut.GetAnchorText(); string LinkTitle = HyperlinkOut.GetTitle(); string AltText = HyperlinkOut.GetAltText(); string LinkTextLabel = LinkText; string LinkTitleLabel = LinkTitle; string AltTextLabel = AltText; string RawTargetUrl = HyperlinkOut.GetRawTargetUrl(); string DoFollow = "No Follow"; try { if (DocCollection.ContainsDocument(Url: HyperlinkOut.GetTargetUrl())) { StatusCode = DocCollection.GetDocumentByUrl(Url: HyperlinkOut.GetTargetUrl()).GetStatusCode(); StatusCodeText = ((int)StatusCode).ToString(); StatusText = StatusCode.ToString(); } else { DebugMsg("Not in DocCollection"); } } catch (Exception ex) { this.DebugMsg(ex.Message); } if (HyperlinkOut.GetDoFollow()) { DoFollow = "Follow"; } if (LinkText.Length == 0) { LinkTextLabel = "MISSING"; } if (LinkTitle.Length == 0) { LinkTitleLabel = "MISSING"; } if (AltText.Length == 0) { AltTextLabel = "MISSING"; } if (this.DisplayListView.Items.ContainsKey(PairKey)) { try { lvItem = this.DisplayListView.Items[PairKey]; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems[ColUrlTarget].Text = UrlTarget; lvItem.SubItems[ColStatusCode].Text = StatusCodeText; lvItem.SubItems[ColStatus].Text = StatusText; lvItem.SubItems[ColDoFollow].Text = DoFollow; lvItem.SubItems[ColLinkTarget].Text = LinkTarget; lvItem.SubItems[ColLinkAnchorTextLabel].Text = LinkTextLabel; lvItem.SubItems[ColLinkTitleLabel].Text = LinkTitleLabel; lvItem.SubItems[ColAltTextLabel].Text = AltTextLabel; lvItem.SubItems[ColRawTargetUrl].Text = RawTargetUrl; } catch (Exception ex) { this.DebugMsg(string.Format("MacroscopeDisplayLinks 1: {0}", ex.Message)); } } else { try { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems.Add(UrlTarget); lvItem.SubItems.Add(StatusCodeText); lvItem.SubItems.Add(StatusText); lvItem.SubItems.Add(DoFollow); lvItem.SubItems.Add(LinkTarget); lvItem.SubItems.Add(LinkTextLabel); lvItem.SubItems.Add(LinkTitleLabel); lvItem.SubItems.Add(AltTextLabel); lvItem.SubItems.Add(RawTargetUrl); ListViewItems.Add(lvItem); } catch (Exception ex) { this.DebugMsg(string.Format("MacroscopeDisplayLinks 2: {0}", ex.Message)); } } if (lvItem != null) { for (int i = 0; i < lvItem.SubItems.Count; i++) { lvItem.SubItems[i].ForeColor = Color.Blue; } if (AllowedHosts.IsAllowedFromUrl(Url)) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if (AllowedHosts.IsAllowedFromUrl(UrlTarget)) { lvItem.SubItems[ColUrlTarget].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrlTarget].ForeColor = Color.Gray; } if (AllowedHosts.IsAllowedFromUrl(Url)) { if (HyperlinkOut.GetDoFollow()) { lvItem.SubItems[ColDoFollow].ForeColor = Color.Green; } else { lvItem.SubItems[ColDoFollow].ForeColor = Color.Red; } } else { lvItem.SubItems[ColDoFollow].ForeColor = Color.Gray; } if (LinkText.Length == 0) { lvItem.SubItems[ColLinkAnchorTextLabel].ForeColor = Color.Gray; } if (LinkTitle.Length == 0) { lvItem.SubItems[ColLinkTitleLabel].ForeColor = Color.Gray; } if (AltText.Length == 0) { lvItem.SubItems[ColAltTextLabel].ForeColor = Color.Gray; } if ( (LinkText.Length == 0) && (LinkTitle.Length == 0) && (AltText.Length == 0)) { lvItem.SubItems[ColLinkAnchorTextLabel].ForeColor = Color.Red; lvItem.SubItems[ColLinkTitleLabel].ForeColor = Color.Red; lvItem.SubItems[ColAltTextLabel].ForeColor = Color.Red; } } } }
/**************************************************************************/ private void BuildWorksheetPageHyperlinks( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.Cell(iRow, iCol).Value = "Source URL"; iCol++; ws.Cell(iRow, iCol).Value = "Target URL"; iCol++; ws.Cell(iRow, iCol).Value = "Follow"; iCol++; ws.Cell(iRow, iCol).Value = "Target"; iCol++; ws.Cell(iRow, iCol).Value = "Anchor Text"; iCol++; ws.Cell(iRow, iCol).Value = "Title"; iCol++; ws.Cell(iRow, iCol).Value = "Alt Text"; iCol++; ws.Cell(iRow, iCol).Value = "Raw Target URL"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut(); foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks()) { string HyperlinkOutUrl = HyperlinkOut.GetTargetUrl(); string DoFollow = "No Follow"; string LinkTarget = HyperlinkOut.GetLinkTarget(); string AnchorText = HyperlinkOut.GetAnchorText(); string Title = HyperlinkOut.GetTitle(); string AltText = HyperlinkOut.GetAltText(); string RawTargetUrl = HyperlinkOut.GetRawTargetUrl(); if (HyperlinkOutUrl == null) { HyperlinkOutUrl = ""; } if (HyperlinkOut.GetDoFollow()) { DoFollow = "Follow"; } iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl())) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, HyperlinkOutUrl); if ((HyperlinkOutUrl.Length > 0) && (AllowedHosts.IsInternalUrl(Url: HyperlinkOutUrl))) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else if ((HyperlinkOutUrl.Length > 0) && (AllowedHosts.IsExternalUrl(Url: HyperlinkOutUrl))) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } else { this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(HyperlinkOutUrl)); ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, DoFollow); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, LinkTarget); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AnchorText)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Title)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AltText)); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, RawTargetUrl); iRow++; } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ public MacroscopeDocumentList AnalyzeOrphanedDocumentsInCollection(MacroscopeDocumentCollection DocCollection) { MacroscopeDocumentList OrphanedDocumentList = new MacroscopeDocumentList(); foreach (MacroscopeDocument msDocLeft in DocCollection.IterateDocuments()) { bool IsOrphan = true; string UrlLeft = msDocLeft.GetUrl(); if (!IsValidDocument(msDoc: msDocLeft)) { continue; } foreach (MacroscopeDocument msDocRight in DocCollection.IterateDocuments()) { if (MacroscopeHttpUrlUtils.CompareUrls(UrlLeft: UrlLeft, UrlRight: msDocRight.GetUrl())) { continue; } if (!this.IsValidDocument(msDoc: msDocRight)) { continue; } foreach (MacroscopeHyperlinkOut HyperlinkOut in msDocRight.IterateHyperlinksOut()) { string UrlRight = HyperlinkOut.GetTargetUrl(); string UrlRightRaw = HyperlinkOut.GetRawTargetUrl(); if (MacroscopeHttpUrlUtils.CompareUrls(UrlLeft: UrlLeft, UrlRight: UrlRight)) { IsOrphan = false; } else if (MacroscopeHttpUrlUtils.CompareUrls(UrlLeft: UrlLeft, UrlRight: UrlRightRaw)) { IsOrphan = false; } if (!IsOrphan) { break; } } if (!IsOrphan) { break; } } if (IsOrphan) { OrphanedDocumentList.AddDocument(msDoc: msDocLeft); msDocLeft.AddRemark("ORPHAN1", "This appears to be an orphaned page, not linked to from any other HTML page in this collection."); msDocLeft.AddRemark("ORPHAN2", "This page appears to only be referenced from one or more sitemaps."); } else { msDocLeft.RemoveRemark("ORPHAN1"); msDocLeft.RemoveRemark("ORPHAN2"); } } return(OrphanedDocumentList); }
/**************************************************************************/ private void BuildWorksheetPageHyperlinks( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); { ws.WriteField("Source URL"); ws.WriteField("Target URL"); ws.WriteField("Follow"); ws.WriteField("Target"); ws.WriteField("Anchor Text"); ws.WriteField("Title"); ws.WriteField("Alt Text"); ws.WriteField("Raw Target URL"); ws.NextRecord(); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut(); foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks()) { string HyperlinkOutUrl = HyperlinkOut.GetTargetUrl(); string DoFollow = "No Follow"; string LinkTarget = HyperlinkOut.GetLinkTarget(); string AnchorText = HyperlinkOut.GetAnchorText(); string Title = HyperlinkOut.GetTitle(); string AltText = HyperlinkOut.GetAltText(); string RawTargetUrl = HyperlinkOut.GetRawTargetUrl(); if (string.IsNullOrEmpty(HyperlinkOutUrl)) { HyperlinkOutUrl = ""; } if (HyperlinkOut.GetDoFollow()) { DoFollow = "Follow"; } this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatUrlCell(ws, HyperlinkOutUrl); this.InsertAndFormatContentCell(ws, DoFollow); this.InsertAndFormatContentCell(ws, LinkTarget); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(AnchorText)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title)); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(AltText)); this.InsertAndFormatContentCell(ws, RawTargetUrl); ws.NextRecord(); } } }