コード例 #1
0
        /**************************************************************************/

        private void BuildWorksheetSitemapErrors(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Sitemap URL");
                ws.WriteField("Status Code");
                ws.WriteField("Robots");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.GetIsInternal() && msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML))
                {
                    foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks())
                    {
                        string             TargetUrl   = Outlink.GetTargetUrl();
                        MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl);
                        bool InsertRow = false;

                        if (msDocLinked.GetIsInternal())
                        {
                            int StatusCode = (int)msDocLinked.GetStatusCode();
                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                InsertRow = true;
                            }
                            if (!msDocLinked.GetAllowedByRobots())
                            {
                                InsertRow = true;
                            }
                        }

                        if (InsertRow)
                        {
                            this.InsertAndFormatUrlCell(ws, msDoc);

                            this.InsertAndFormatStatusCodeCell(ws, msDoc);

                            this.InsertAndFormatRobotsCell(ws, msDoc);

                            this.InsertAndFormatUrlCell(ws, TargetUrl);

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
コード例 #2
0
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeDataExtractorCssSelectors DataExtractor
            )
        {
            MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem> ();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )DocCollection.CountDocuments();
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc      = DocCollection.GetDocumentByUrl(Url: Url);
                string             DocUrl     = msDoc.GetUrl();
                string             StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string             Status     = msDoc.GetStatusCode().ToString();
                string             MimeType   = msDoc.GetMimeType();

                if (!DataExtractor.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedCssSelectors())
                {
                    ListViewItem lvItem           = null;
                    string       CssSelectorLabel = DataExtractedPair.Key;
                    string       ExtractedValue   = DataExtractedPair.Value;
                    string       PairKey          = null;

                    if (
                        string.IsNullOrEmpty(CssSelectorLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    PairKey = string.Join(
                        ":",
                        UrlToDigest(DocUrl),
                        UrlToDigest(Macroscope.GetStringDigest(Text: CssSelectorLabel)),
                        UrlToDigest(Macroscope.GetStringDigest(Text: ExtractedValue))
                        );

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                    }
                    else
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        for (int i = 0; i < 6; i++)
                        {
                            lvItem.SubItems.Add("");
                        }

                        ListViewItems.Add(lvItem);
                    }

                    if (lvItem != null)
                    {
                        try
                        {
                            lvItem.SubItems[ColUrl].Text              = DocUrl;
                            lvItem.SubItems[ColStatusCode].Text       = StatusCode;
                            lvItem.SubItems[ColStatus].Text           = Status;
                            lvItem.SubItems[ColMimeType].Text         = MimeType;
                            lvItem.SubItems[ColCssSelectorLabel].Text = CssSelectorLabel;
                            lvItem.SubItems[ColExtractedValue].Text   = ExtractedValue;
                        }
                        catch (Exception ex)
                        {
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors: {0}", ex.Message));
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors: {0}", ex.StackTrace));
                        }
                    }
                    else
                    {
                        DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors MISSING: {0}", PairKey));
                    }

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                    }

                    if (Regex.IsMatch(StatusCode, "^[2]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Green;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[3]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Goldenrod;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[45]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Red;
                    }
                    else
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Blue;
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 100;
            this.DisplayListView.Columns[ColStatus].Width     = 100;
            this.DisplayListView.Columns[ColMimeType].Width   = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
コード例 #3
0
        /**************************************************************************/


        private void RenderListViewSearchTargetUrls(
            List <ListViewItem> ListViewItems,
            MacroscopeDocument msDoc,
            string Url,
            string UrlFragment
            )
        {
            MacroscopeAllowedHosts       AllowedHosts  = this.MainForm.GetJobMaster().GetAllowedHosts();
            MacroscopeHyperlinksOut      HyperlinksOut = msDoc.GetHyperlinksOut();
            MacroscopeDocumentCollection DocCollection = this.MainForm.GetJobMaster().GetDocCollection();

            foreach (MacroscopeHyperlinkOut HyperlinkOut in HyperlinksOut.IterateLinks())
            {
                string         UrlTarget      = HyperlinkOut.GetTargetUrl();
                HttpStatusCode StatusCode     = HttpStatusCode.NotFound;
                string         StatusCodeText = "Not crawled";
                string         StatusText     = "Not crawled";
                string         PairKey        = string.Join(":", UrlToDigest(Url: Url), UrlToDigest(Url: UrlTarget)).ToString();
                string         LinkTarget     = HyperlinkOut.GetLinkTarget();
                string         LinkText       = HyperlinkOut.GetAnchorText();
                string         LinkTitle      = HyperlinkOut.GetTitle();
                string         AltText        = HyperlinkOut.GetAltText();

                string LinkTextLabel  = LinkText;
                string LinkTitleLabel = LinkTitle;
                string AltTextLabel   = AltText;

                string DoFollow = "No Follow";

                try
                {
                    if (DocCollection.ContainsDocument(Url: HyperlinkOut.GetTargetUrl()))
                    {
                        StatusCode     = DocCollection.GetDocumentByUrl(Url: HyperlinkOut.GetTargetUrl()).GetStatusCode();
                        StatusCodeText = ((int)StatusCode).ToString();
                        StatusText     = StatusCode.ToString();
                    }
                    else
                    {
                        DebugMsg("Not in DocCollection");
                    }
                }
                catch (Exception ex)
                {
                    this.DebugMsg(ex.Message);
                }

                if (HyperlinkOut.GetDoFollow())
                {
                    DoFollow = "Follow";
                }

                if (LinkText.Length == 0)
                {
                    LinkTextLabel = "MISSING";
                }

                if (LinkTitle.Length == 0)
                {
                    LinkTitleLabel = "MISSING";
                }

                if (AltText.Length == 0)
                {
                    AltTextLabel = "MISSING";
                }

                if (
                    (UrlTarget != null) &&
                    (UrlTarget.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0))
                {
                    ListViewItem lvItem = null;

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        try
                        {
                            lvItem = this.DisplayListView.Items[PairKey];

                            lvItem.SubItems[ColUrl].Text                 = Url;
                            lvItem.SubItems[ColUrlTarget].Text           = UrlTarget;
                            lvItem.SubItems[ColStatusCode].Text          = StatusCodeText;
                            lvItem.SubItems[ColStatus].Text              = StatusText;
                            lvItem.SubItems[ColDoFollow].Text            = DoFollow;
                            lvItem.SubItems[ColLinkTarget].Text          = LinkTarget;
                            lvItem.SubItems[ColLinkAnchorTextLabel].Text = LinkTextLabel;
                            lvItem.SubItems[ColLinkTitleLabel].Text      = LinkTitleLabel;
                            lvItem.SubItems[ColAltTextLabel].Text        = AltTextLabel;
                        }
                        catch (Exception ex)
                        {
                            this.DebugMsg(string.Format("MacroscopeDisplayLinks 1: {0}", ex.Message));
                        }
                    }
                    else
                    {
                        try
                        {
                            lvItem = new ListViewItem(PairKey);
                            lvItem.UseItemStyleForSubItems = false;
                            lvItem.Name = PairKey;

                            lvItem.SubItems[ColUrl].Text = Url;
                            lvItem.SubItems.Add(UrlTarget);
                            lvItem.SubItems.Add(StatusCodeText);
                            lvItem.SubItems.Add(StatusText);
                            lvItem.SubItems.Add(DoFollow);
                            lvItem.SubItems.Add(LinkTarget);
                            lvItem.SubItems.Add(LinkTextLabel);
                            lvItem.SubItems.Add(LinkTitleLabel);
                            lvItem.SubItems.Add(AltTextLabel);

                            ListViewItems.Add(lvItem);
                        }
                        catch (Exception ex)
                        {
                            this.DebugMsg(string.Format("MacroscopeDisplayLinks 2: {0}", ex.Message));
                        }
                    }

                    if (lvItem != null)
                    {
                        for (int i = 0; i < lvItem.SubItems.Count; i++)
                        {
                            lvItem.SubItems[i].ForeColor = Color.Blue;
                        }

                        if (AllowedHosts.IsAllowedFromUrl(Url))
                        {
                            lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                        }

                        if (AllowedHosts.IsAllowedFromUrl(UrlTarget))
                        {
                            lvItem.SubItems[ColUrlTarget].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[ColUrlTarget].ForeColor = Color.Gray;
                        }

                        if (AllowedHosts.IsAllowedFromUrl(Url))
                        {
                            if (HyperlinkOut.GetDoFollow())
                            {
                                lvItem.SubItems[ColDoFollow].ForeColor = Color.Green;
                            }
                            else
                            {
                                lvItem.SubItems[ColDoFollow].ForeColor = Color.Red;
                            }
                        }
                        else
                        {
                            lvItem.SubItems[ColDoFollow].ForeColor = Color.Gray;
                        }

                        if (LinkText.Length == 0)
                        {
                            lvItem.SubItems[ColLinkAnchorTextLabel].ForeColor = Color.Gray;
                        }

                        if (LinkTitle.Length == 0)
                        {
                            lvItem.SubItems[ColLinkTitleLabel].ForeColor = Color.Gray;
                        }

                        if (AltText.Length == 0)
                        {
                            lvItem.SubItems[ColAltTextLabel].ForeColor = Color.Gray;
                        }

                        if (
                            (LinkText.Length == 0) &&
                            (LinkTitle.Length == 0) &&
                            (AltText.Length == 0))
                        {
                            lvItem.SubItems[ColLinkAnchorTextLabel].ForeColor = Color.Red;
                            lvItem.SubItems[ColLinkTitleLabel].ForeColor      = Color.Red;
                            lvItem.SubItems[ColAltTextLabel].ForeColor        = Color.Red;
                        }
                    }
                }
            }
        }
コード例 #4
0
        /**************************************************************************/

        private void RenderTreeView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList
            )
        {
            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            if (UrlList.Count == 0)
            {
                return;
            }

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )UrlList.Count;
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            this.tvTreeView.BeginUpdate();

            DebugMsg(string.Format("HIERARCHY: {0}", "BASE"));

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url);

                if (msDoc != null)
                {
                    this.RenderTreeView(msDoc, Url);
                }

                Count++;
                MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                ProgressForm.UpdatePercentages(
                    Title: null,
                    Message: null,
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            this.tvTreeView.ExpandAll();

            this.tvTreeView.EndUpdate();

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
コード例 #5
0
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeCustomFilters CustomFilter
            )
        {
            if (this.FilterColOffset == -1)
            {
                throw (new Exception("this.FilterColOffset invalid"));
            }

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            MacroscopeAllowedHosts   AllowedHosts    = this.MainForm.GetJobMaster().GetAllowedHosts();
            Dictionary <string, int> FilterColsTable = new Dictionary <string, int>(CustomFilter.GetSize());
            List <ListViewItem>      ListViewItems   = new List <ListViewItem>();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)DocCollection.CountDocuments();
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
            {
                string FilterPattern = CustomFilter.GetPattern(Slot).Key;

                if (FilterColsTable.ContainsKey(FilterPattern))
                {
                    FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + 1);
                }
                else
                {
                    FilterColsTable.Add(FilterPattern, Slot + 1);
                }
            }

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc  = DocCollection.GetDocumentByUrl(Url: Url);
                ListViewItem       lvItem = null;
                string             DocUrl;
                string             PairKey;
                string             StatusCode;
                string             Status;
                string             MimeType;

                if (msDoc == null)
                {
                    continue;
                }
                else
                {
                    DocUrl     = msDoc.GetUrl();
                    PairKey    = DocUrl;
                    StatusCode = ((int)msDoc.GetStatusCode()).ToString();
                    Status     = msDoc.GetStatusCode().ToString();
                    MimeType   = msDoc.GetMimeType();
                }

                if (!CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc))
                {
                    continue;
                }

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    lvItem = this.DisplayListView.Items[PairKey];
                }
                else
                {
                    lvItem = new ListViewItem(PairKey);
                    lvItem.UseItemStyleForSubItems = false;
                    lvItem.Name = PairKey;

                    lvItem.SubItems.Add("");
                    lvItem.SubItems.Add("");
                    lvItem.SubItems.Add("");
                    lvItem.SubItems.Add("");

                    for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
                    {
                        lvItem.SubItems.Add("");
                    }

                    ListViewItems.Add(lvItem);
                }

                if (lvItem != null)
                {
                    try
                    {
                        lvItem.SubItems[ColUrl].Text        = DocUrl;
                        lvItem.SubItems[ColStatusCode].Text = StatusCode;
                        lvItem.SubItems[ColStatus].Text     = Status;
                        lvItem.SubItems[ColMimeType].Text   = MimeType;

                        for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
                        {
                            string FilterPattern = CustomFilter.GetPattern(Slot: Slot).Key;
                            KeyValuePair <string, MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern);
                            int ColOffset = this.FilterColOffset + FilterColsTable[FilterPattern];

                            if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED))
                            {
                                lvItem.SubItems[ColOffset].Text = MacroscopeConstants.TextPresenceLabels[Pair.Value];

                                switch (Pair.Value)
                                {
                                case MacroscopeConstants.TextPresence.CONTAINS_STRING:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.NOT_CONTAINS_STRING:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.MUST_CONTAIN_STRING:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_STRING:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                case MacroscopeConstants.TextPresence.CONTAINS_REGEX:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.NOT_CONTAINS_REGEX:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.MUST_CONTAIN_REGEX:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_REGEX:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                default:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Gray;
                                    break;
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.Message));
                        DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.StackTrace));
                    }
                }
                else
                {
                    DebugMsg(string.Format("MacroscopeDisplayCustomFilters MISSING: {0}", PairKey));
                }

                if (msDoc.GetIsInternal())
                {
                    lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                }
                else
                {
                    lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                }

                if (Regex.IsMatch(StatusCode, "^[2]"))
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Green;
                }
                else
                if (Regex.IsMatch(StatusCode, "^[3]"))
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Goldenrod;
                }
                else
                if (Regex.IsMatch(StatusCode, "^[45]"))
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Red;
                }
                else
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Blue;
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 100;
            this.DisplayListView.Columns[ColStatus].Width     = 100;
            this.DisplayListView.Columns[ColMimeType].Width   = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
コード例 #6
0
        /** Render List ***********************************************************/

        public void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList
            )
        {
            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            if (UrlList.Count == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem>(UrlList.Count);

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)UrlList.Count;
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.ControlBox = false;

                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }


            lock (this.DisplayListViewLock)
            {
                foreach (string Url in UrlList)
                {
                    Application.DoEvents();

                    MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url);

                    if (msDoc != null)
                    {
                        this.RenderListView(
                            ListViewItems: ListViewItems,
                            DocCollection: DocCollection,
                            msDoc: msDoc,
                            Url: Url
                            );
                    }

                    if (MacroscopePreferencesManager.GetShowProgressDialogues())
                    {
                        Count++;
                        MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                        ProgressForm.UpdatePercentages(
                            Title: null,
                            Message: null,
                            MajorPercentage: MajorPercentage,
                            ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                            );
                    }
                }

                this.DisplayListView.Items.AddRange(ListViewItems.ToArray());
            }


            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
コード例 #7
0
        /**************************************************************************/

        private void BuildWorksheetSitemapXmlErrors(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Sitemap URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Robots";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.GetIsInternal() && msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML))
                {
                    foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks())
                    {
                        string             TargetUrl   = Outlink.GetTargetUrl();
                        MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl);
                        bool InsertRow = false;

                        if (msDocLinked.GetIsInternal())
                        {
                            int StatusCode = (int)msDocLinked.GetStatusCode();
                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                InsertRow = true;
                            }
                            if (!msDocLinked.GetAllowedByRobots())
                            {
                                InsertRow = true;
                            }
                        }

                        if (InsertRow)
                        {
                            iCol = 1;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                            if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iCol++;

                            this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);

                            iCol++;

                            this.InsertAndFormatRobotsCell(ws, iRow, iCol, msDoc);

                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, TargetUrl);

                            if (AllowedHosts.IsInternalUrl(Url: TargetUrl))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iRow++;
                        }
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
コード例 #8
0
        /**************************************************************************/

        private void BuildWorksheetPageDuplicatePages(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            decimal DocCount     = 0;
            decimal DocListCount = 0;
            decimal CountOuter   = 0;
            decimal CountInner   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();
            Dictionary <string, bool>    CrossCheckList;

            CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList(
                Capacity: DocCollection.CountDocuments()
                );

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Origin URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Distance";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Similar URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string UrlLeft in DocCollection.DocumentUrls())
            {
                MacroscopeDocument            msDocLeft           = DocCollection.GetDocumentByUrl(Url: UrlLeft);
                MacroscopeLevenshteinAnalysis LevenshteinAnalysis = null;

                CountOuter++;
                CountInner = 0;

                if (DocCount > 0)
                {
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                        MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                        ProgressLabelMinor: UrlLeft,
                        SubMinorPercentage: 0,
                        ProgressLabelSubMinor: ""
                        );
                }

                if (msDocLeft.GetIsExternal())
                {
                    continue;
                }

                if (!msDocLeft.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
                {
                    continue;
                }

                LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis(
                    msDoc: msDocLeft,
                    SizeDifference: MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference(),
                    Threshold: MacroscopePreferencesManager.GetMaxLevenshteinDistance(),
                    CrossCheckList: CrossCheckList,
                    IPercentageDone: this
                    );

                Dictionary <MacroscopeDocument, int> DocList;

                DocList = LevenshteinAnalysis.AnalyzeDocCollection(
                    DocCollection: DocCollection
                    );

                DocListCount = ( decimal )DocList.Count;

                foreach (MacroscopeDocument msDocDuplicate in DocList.Keys)
                {
                    int            StatusCode   = ( int )msDocLeft.GetStatusCode();
                    HttpStatusCode Status       = msDocLeft.GetStatusCode();
                    string         UrlDuplicate = msDocDuplicate.GetUrl();
                    int            Distance     = DocList[msDocDuplicate];

                    CountInner++;
                    iCol = 1;

                    if (DocCount > 0)
                    {
                        this.ProgressForm.UpdatePercentages(
                            Title: null,
                            Message: null,
                            MajorPercentage: -1,
                            ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                            MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                            ProgressLabelMinor: UrlLeft,
                            SubMinorPercentage: (( decimal )100 / DocListCount) * CountInner,
                            ProgressLabelSubMinor: UrlDuplicate
                            );
                    }

                    this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode);
                    iCol++;

                    this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status);
                    iCol++;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, UrlLeft);

                    if (AllowedHosts.IsInternalUrl(Url: UrlLeft))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, Distance.ToString());

                    if (Distance <= MacroscopePreferencesManager.GetMaxLevenshteinDistance())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, UrlDuplicate);

                    if (AllowedHosts.IsInternalUrl(Url: UrlDuplicate))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iRow++;

                    if (this.ProgressForm.Cancelled())
                    {
                        break;
                    }
                }

                if (this.ProgressForm.Cancelled())
                {
                    break;
                }

                //Thread.Yield();
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
コード例 #9
0
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();

            foreach (MacroscopeLink Link in msDoc.IterateOutlinks())
            {
                ListViewItem   lvItem         = null;
                string         LinkType       = Link.GetLinkType().ToString();
                string         UrlTarget      = Link.GetTargetUrl();
                HttpStatusCode StatusCode     = HttpStatusCode.NotFound;
                string         StatusCodeText = "Not crawled";
                string         StatusText     = "Not crawled";
                string         PairKey        = string.Join(":", UrlToDigest(Url: Url), UrlToDigest(Url: UrlTarget));
                string         DoFollow       = "No Follow";
                string         AltText        = Link.GetAltText();
                string         AltTextLabel   = AltText;
                string         RawSourceUrl   = Link.GetRawSourceUrl();
                string         RawTargetUrl   = Link.GetRawTargetUrl();

                try
                {
                    if (DocCollection.ContainsDocument(Url: Link.GetTargetUrl()))
                    {
                        StatusCode     = DocCollection.GetDocumentByUrl(Url: Link.GetTargetUrl()).GetStatusCode();
                        StatusCodeText = ((int)StatusCode).ToString();
                        StatusText     = StatusCode.ToString();
                    }
                }
                catch (Exception ex)
                {
                    this.DebugMsg(ex.Message);
                }

                if (Link.GetDoFollow())
                {
                    DoFollow = "Follow";
                }

                if (string.IsNullOrEmpty(AltText))
                {
                    AltTextLabel = "";
                }

                if (string.IsNullOrEmpty(RawSourceUrl))
                {
                    RawSourceUrl = "";
                }

                if (string.IsNullOrEmpty(RawTargetUrl))
                {
                    RawTargetUrl = "";
                }

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    try
                    {
                        lvItem = this.DisplayListView.Items[PairKey];

                        lvItem.SubItems[ColType].Text         = LinkType;
                        lvItem.SubItems[ColUrl].Text          = Url;
                        lvItem.SubItems[ColUrlTarget].Text    = UrlTarget;
                        lvItem.SubItems[ColStatusCode].Text   = StatusCodeText;
                        lvItem.SubItems[ColStatus].Text       = StatusText;
                        lvItem.SubItems[ColDoFollow].Text     = DoFollow;
                        lvItem.SubItems[ColAltTextLabel].Text = AltTextLabel;
                        lvItem.SubItems[ColRawSourceUrl].Text = RawSourceUrl;
                        lvItem.SubItems[ColRawTargetUrl].Text = RawTargetUrl;
                    }
                    catch (Exception ex)
                    {
                        this.DebugMsg(string.Format("MacroscopeDisplayLinks 1: {0}", ex.Message));
                    }
                }
                else
                {
                    try
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        lvItem.SubItems[ColType].Text = LinkType;
                        lvItem.SubItems.Add(Url);
                        lvItem.SubItems.Add(UrlTarget);
                        lvItem.SubItems.Add(StatusCodeText);
                        lvItem.SubItems.Add(StatusText);
                        lvItem.SubItems.Add(DoFollow);
                        lvItem.SubItems.Add(AltTextLabel);
                        lvItem.SubItems.Add(RawSourceUrl);
                        lvItem.SubItems.Add(RawTargetUrl);

                        ListViewItems.Add(lvItem);
                    }
                    catch (Exception ex)
                    {
                        this.DebugMsg(string.Format("MacroscopeDisplayLinks 2: {0}", ex.Message));
                    }
                }

                if (lvItem != null)
                {
                    for (int i = 0; i < lvItem.SubItems.Count; i++)
                    {
                        lvItem.SubItems[i].ForeColor = Color.Blue;
                    }

                    if (AllowedHosts.IsAllowedFromUrl(Url))
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                    }

                    if (AllowedHosts.IsAllowedFromUrl(UrlTarget))
                    {
                        lvItem.SubItems[ColUrlTarget].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrlTarget].ForeColor = Color.Gray;
                    }

                    if (AllowedHosts.IsAllowedFromUrl(UrlTarget))
                    {
                        if (Link.GetDoFollow())
                        {
                            lvItem.SubItems[ColDoFollow].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[ColDoFollow].ForeColor = Color.Red;
                        }
                    }
                    else
                    {
                        lvItem.SubItems[ColDoFollow].ForeColor = Color.Gray;
                    }
                }
            }
        }
コード例 #10
0
        /**************************************************************************/

        private void BuildWorksheetPageDuplicatePages(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            decimal DocCount     = 0;
            decimal DocListCount = 0;
            decimal CountOuter   = 0;
            decimal CountInner   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();
            Dictionary <string, bool>    CrossCheckList;

            CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList(
                Capacity: DocCollection.CountDocuments()
                );

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Origin URL");
                ws.WriteField("Distance");
                ws.WriteField("Similar URL");

                ws.NextRecord();
            }

            foreach (string UrlLeft in DocCollection.DocumentUrls())
            {
                MacroscopeDocument            msDocLeft           = DocCollection.GetDocumentByUrl(Url: UrlLeft);
                MacroscopeLevenshteinAnalysis LevenshteinAnalysis = null;

                CountOuter++;
                CountInner = 0;

                if (DocCount > 0)
                {
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                        MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                        ProgressLabelMinor: UrlLeft,
                        SubMinorPercentage: 0,
                        ProgressLabelSubMinor: ""
                        );
                }

                if (msDocLeft.GetIsExternal())
                {
                    continue;
                }

                if (!msDocLeft.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
                {
                    continue;
                }

                LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis(
                    msDoc: msDocLeft,
                    SizeDifference: MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference(),
                    Threshold: MacroscopePreferencesManager.GetMaxLevenshteinDistance(),
                    CrossCheckList: CrossCheckList,
                    IPercentageDone: this
                    );

                Dictionary <MacroscopeDocument, int> DocList;

                DocList = LevenshteinAnalysis.AnalyzeDocCollection(
                    DocCollection: DocCollection
                    );

                DocListCount = ( decimal )DocList.Count;

                foreach (MacroscopeDocument msDocDuplicate in DocList.Keys)
                {
                    int            StatusCode   = ( int )msDocLeft.GetStatusCode();
                    HttpStatusCode Status       = msDocLeft.GetStatusCode();
                    string         UrlDuplicate = msDocDuplicate.GetUrl();
                    int            Distance     = DocList[msDocDuplicate];

                    CountInner++;

                    if (DocCount > 0)
                    {
                        this.ProgressForm.UpdatePercentages(
                            Title: null,
                            Message: null,
                            MajorPercentage: -1,
                            ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                            MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                            ProgressLabelMinor: UrlLeft,
                            SubMinorPercentage: (( decimal )100 / DocListCount) * CountInner,
                            ProgressLabelSubMinor: UrlDuplicate
                            );
                    }

                    this.InsertAndFormatStatusCodeCell(ws, StatusCode);

                    this.InsertAndFormatStatusCodeCell(ws, Status);

                    this.InsertAndFormatUrlCell(ws, UrlLeft);

                    this.InsertAndFormatContentCell(ws, Distance.ToString());

                    this.InsertAndFormatUrlCell(ws, UrlDuplicate);

                    ws.NextRecord();

                    if (this.ProgressForm.Cancelled())
                    {
                        break;
                    }
                }

                if (this.ProgressForm.Cancelled())
                {
                    break;
                }

                //Thread.Yield();
            }
        }
コード例 #11
0
        /**************************************************************************/

        private void RenderListViewSitemapErrors(MacroscopeDocumentCollection DocCollection)
        {
            List <ListViewItem> ListViewItems = new List <ListViewItem>(1);
            List <Dictionary <string, string> > CompiledTable = DocCollection.GetSitemapErrorsAsTable();

            foreach (Dictionary <string, string> Entry in CompiledTable)
            {
                string SitemapUrl = Entry["sitemap_url"];
                string StatusCode = Entry["status_code"];
                string Robots     = Entry["robots"];
                string TargetUrl  = Entry["target_url"];

                string PairKey = string.Join("::::::::", SitemapUrl, TargetUrl);

                MacroscopeDocument msDoc       = DocCollection.GetDocumentByUrl(Url: SitemapUrl);
                MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl);

                ListViewItem lvItem = null;

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    try
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                        lvItem.SubItems[0].Text = SitemapUrl;
                        lvItem.SubItems[1].Text = StatusCode;
                        lvItem.SubItems[2].Text = Robots;
                        lvItem.SubItems[3].Text = TargetUrl;
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("RenderListViewSitemapErrors 1: {0}", ex.Message));
                    }
                }
                else
                {
                    try
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        lvItem.SubItems[0].Text = SitemapUrl;
                        lvItem.SubItems.Add(StatusCode);
                        lvItem.SubItems.Add(Robots);
                        lvItem.SubItems.Add(TargetUrl);

                        ListViewItems.Add(lvItem);
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("RenderListViewSitemapErrors 2: {0}", ex.Message));
                    }
                }

                if (lvItem != null)
                {
                    lvItem.ForeColor = Color.Blue;

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[0].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[0].ForeColor = Color.Gray;
                    }


                    if (!msDocLinked.GetAllowedByRobots())
                    {
                        lvItem.SubItems[2].ForeColor = Color.Red;
                    }
                    else
                    {
                        lvItem.SubItems[2].ForeColor = Color.Green;
                    }

                    if (msDocLinked.GetIsInternal())
                    {
                        lvItem.SubItems[3].ForeColor = Color.Green;
                    }
                }
                else
                {
                    lvItem.SubItems[3].ForeColor = Color.Gray;
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            return;
        }