/**************************************************************************/

        private void BuildWorksheetPageHyperlinks(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Source URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Target URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Follow";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Target";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Anchor Text";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Alt Text";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Raw Target URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut();

                foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks())
                {
                    string HyperlinkOutUrl = HyperlinkOut.GetTargetUrl();
                    string DoFollow        = "No Follow";
                    string LinkTarget      = HyperlinkOut.GetLinkTarget();
                    string AnchorText      = HyperlinkOut.GetAnchorText();
                    string Title           = HyperlinkOut.GetTitle();
                    string AltText         = HyperlinkOut.GetAltText();

                    string RawTargetUrl = HyperlinkOut.GetRawTargetUrl();

                    if (HyperlinkOutUrl == null)
                    {
                        HyperlinkOutUrl = "";
                    }

                    if (HyperlinkOut.GetDoFollow())
                    {
                        DoFollow = "Follow";
                    }

                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, HyperlinkOutUrl);

                    if ((HyperlinkOutUrl.Length > 0) && (AllowedHosts.IsInternalUrl(Url: HyperlinkOutUrl)))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    if ((HyperlinkOutUrl.Length > 0) && (AllowedHosts.IsExternalUrl(Url: HyperlinkOutUrl)))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }
                    else
                    {
                        this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(HyperlinkOutUrl));
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, DoFollow);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, LinkTarget);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AnchorText));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Title));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(AltText));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, RawTargetUrl);

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
Ejemplo n.º 2
0
        /**************************************************************************/


        private void RenderListViewSearchTargetUrls(
            List <ListViewItem> ListViewItems,
            MacroscopeDocument msDoc,
            string Url,
            string UrlFragment
            )
        {
            MacroscopeAllowedHosts       AllowedHosts  = this.MainForm.GetJobMaster().GetAllowedHosts();
            MacroscopeHyperlinksOut      HyperlinksOut = msDoc.GetHyperlinksOut();
            MacroscopeDocumentCollection DocCollection = this.MainForm.GetJobMaster().GetDocCollection();

            foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks())
            {
                string         UrlTarget      = HyperlinkOut.GetTargetUrl();
                HttpStatusCode StatusCode     = HttpStatusCode.NotFound;
                string         StatusCodeText = "Not crawled";
                string         StatusText     = "Not crawled";
                string         PairKey        = string.Join(":", UrlToDigest(Url: Url), UrlToDigest(Url: UrlTarget)).ToString();
                string         LinkTarget     = HyperlinkOut.GetLinkTarget();
                string         LinkText       = HyperlinkOut.GetAnchorText();
                string         LinkTitle      = HyperlinkOut.GetTitle();
                string         AltText        = HyperlinkOut.GetAltText();

                string LinkTextLabel  = LinkText;
                string LinkTitleLabel = LinkTitle;
                string AltTextLabel   = AltText;

                string DoFollow = "No Follow";

                try
                {
                    if (DocCollection.ContainsDocument(Url: HyperlinkOut.GetTargetUrl()))
                    {
                        StatusCode     = DocCollection.GetDocumentByUrl(Url: HyperlinkOut.GetTargetUrl()).GetStatusCode();
                        StatusCodeText = ((int)StatusCode).ToString();
                        StatusText     = StatusCode.ToString();
                    }
                    else
                    {
                        DebugMsg("Not in DocCollection");
                    }
                }
                catch (Exception ex)
                {
                    this.DebugMsg(ex.Message);
                }

                if (HyperlinkOut.GetDoFollow())
                {
                    DoFollow = "Follow";
                }

                if (LinkText.Length == 0)
                {
                    LinkTextLabel = "MISSING";
                }

                if (LinkTitle.Length == 0)
                {
                    LinkTitleLabel = "MISSING";
                }

                if (AltText.Length == 0)
                {
                    AltTextLabel = "MISSING";
                }

                if (
                    (UrlTarget != null) &&
                    (UrlTarget.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0))
                {
                    ListViewItem lvItem = null;

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        try
                        {
                            lvItem = this.DisplayListView.Items[PairKey];

                            lvItem.SubItems[ColUrl].Text                 = Url;
                            lvItem.SubItems[ColUrlTarget].Text           = UrlTarget;
                            lvItem.SubItems[ColStatusCode].Text          = StatusCodeText;
                            lvItem.SubItems[ColStatus].Text              = StatusText;
                            lvItem.SubItems[ColDoFollow].Text            = DoFollow;
                            lvItem.SubItems[ColLinkTarget].Text          = LinkTarget;
                            lvItem.SubItems[ColLinkAnchorTextLabel].Text = LinkTextLabel;
                            lvItem.SubItems[ColLinkTitleLabel].Text      = LinkTitleLabel;
                            lvItem.SubItems[ColAltTextLabel].Text        = AltTextLabel;
                        }
                        catch (Exception ex)
                        {
                            this.DebugMsg(string.Format("MacroscopeDisplayLinks 1: {0}", ex.Message));
                        }
                    }
                    else
                    {
                        try
                        {
                            lvItem = new ListViewItem(PairKey);
                            lvItem.UseItemStyleForSubItems = false;
                            lvItem.Name = PairKey;

                            lvItem.SubItems[ColUrl].Text = Url;
                            lvItem.SubItems.Add(UrlTarget);
                            lvItem.SubItems.Add(StatusCodeText);
                            lvItem.SubItems.Add(StatusText);
                            lvItem.SubItems.Add(DoFollow);
                            lvItem.SubItems.Add(LinkTarget);
                            lvItem.SubItems.Add(LinkTextLabel);
                            lvItem.SubItems.Add(LinkTitleLabel);
                            lvItem.SubItems.Add(AltTextLabel);

                            ListViewItems.Add(lvItem);
                        }
                        catch (Exception ex)
                        {
                            this.DebugMsg(string.Format("MacroscopeDisplayLinks 2: {0}", ex.Message));
                        }
                    }

                    if (lvItem != null)
                    {
                        for (int i = 0; i < lvItem.SubItems.Count; i++)
                        {
                            lvItem.SubItems[i].ForeColor = Color.Blue;
                        }

                        if (AllowedHosts.IsAllowedFromUrl(Url))
                        {
                            lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                        }

                        if (AllowedHosts.IsAllowedFromUrl(UrlTarget))
                        {
                            lvItem.SubItems[ColUrlTarget].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[ColUrlTarget].ForeColor = Color.Gray;
                        }

                        if (AllowedHosts.IsAllowedFromUrl(Url))
                        {
                            if (HyperlinkOut.GetDoFollow())
                            {
                                lvItem.SubItems[ColDoFollow].ForeColor = Color.Green;
                            }
                            else
                            {
                                lvItem.SubItems[ColDoFollow].ForeColor = Color.Red;
                            }
                        }
                        else
                        {
                            lvItem.SubItems[ColDoFollow].ForeColor = Color.Gray;
                        }

                        if (LinkText.Length == 0)
                        {
                            lvItem.SubItems[ColLinkAnchorTextLabel].ForeColor = Color.Gray;
                        }

                        if (LinkTitle.Length == 0)
                        {
                            lvItem.SubItems[ColLinkTitleLabel].ForeColor = Color.Gray;
                        }

                        if (AltText.Length == 0)
                        {
                            lvItem.SubItems[ColAltTextLabel].ForeColor = Color.Gray;
                        }

                        if (
                            (LinkText.Length == 0) &&
                            (LinkTitle.Length == 0) &&
                            (AltText.Length == 0))
                        {
                            lvItem.SubItems[ColLinkAnchorTextLabel].ForeColor = Color.Red;
                            lvItem.SubItems[ColLinkTitleLabel].ForeColor      = Color.Red;
                            lvItem.SubItems[ColAltTextLabel].ForeColor        = Color.Red;
                        }
                    }
                }
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageHyperlinks(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Source URL");
                ws.WriteField("Target URL");
                ws.WriteField("Follow");
                ws.WriteField("Target");
                ws.WriteField("Anchor Text");
                ws.WriteField("Title");
                ws.WriteField("Alt Text");
                ws.WriteField("Raw Target URL");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                MacroscopeHyperlinksOut HyperlinksOut = msDoc.GetHyperlinksOut();

                foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks())
                {
                    string HyperlinkOutUrl = HyperlinkOut.GetTargetUrl();
                    string DoFollow        = "No Follow";
                    string LinkTarget      = HyperlinkOut.GetLinkTarget();
                    string AnchorText      = HyperlinkOut.GetAnchorText();
                    string Title           = HyperlinkOut.GetTitle();
                    string AltText         = HyperlinkOut.GetAltText();

                    string RawTargetUrl = HyperlinkOut.GetRawTargetUrl();

                    if (string.IsNullOrEmpty(HyperlinkOutUrl))
                    {
                        HyperlinkOutUrl = "";
                    }

                    if (HyperlinkOut.GetDoFollow())
                    {
                        DoFollow = "Follow";
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatUrlCell(ws, HyperlinkOutUrl);

                    this.InsertAndFormatContentCell(ws, DoFollow);

                    this.InsertAndFormatContentCell(ws, LinkTarget);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(AnchorText));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(AltText));

                    this.InsertAndFormatContentCell(ws, RawTargetUrl);

                    ws.NextRecord();
                }
            }
        }