示例#1
0
        /**************************************************************************/

        public void RenderListViewSearchTargetUrls(
            MacroscopeDocumentCollection DocCollection,
            string UrlFragment
            )
        {
            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments());

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count     = 0;
            decimal TotalDocs = (decimal)DocCollection.CountDocuments();

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Displaying Links",
                    Message: "Processing links in document collection for display:",
                    MajorPercentage: ((decimal)100 / TotalDocs) * Count,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Url = msDoc.GetUrl();

                if (msDoc != null)
                {
                    this.RenderListViewSearchTargetUrls(
                        ListViewItems: ListViewItems,
                        msDoc: msDoc,
                        Url: Url,
                        UrlFragment: UrlFragment
                        );
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: ((decimal)100 / TotalDocs) * Count,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
        /** Render Entire DocCollection *******************************************/

        public void RenderTreeView(MacroscopeDocumentCollection DocCollection)
        {
            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )DocCollection.CountDocuments();
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                Application.DoEvents();

                if (msDoc == null)
                {
                    continue;
                }

                string Url = msDoc.GetUrl();

                this.RenderTreeView(msDoc, Url);

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
示例#3
0
        /**************************************************************************/

        public void RenderListViewSearchSourceUrls(
            MacroscopeDocumentCollection DocCollection,
            string UrlFragment
            )
        {
            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments());

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count     = 0;
            decimal TotalDocs = (decimal)DocCollection.CountDocuments();

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Displaying Links",
                    Message: "Processing links in document collection for display:",
                    MajorPercentage: ((decimal)100 / TotalDocs) * Count,
                    ProgressLabelMajor: "Documents Processed"
                    );
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Url = msDoc.GetUrl();

                if (Url.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0)
                {
                    this.RenderListView(
                        ListViewItems: ListViewItems,
                        DocCollection: DocCollection,
                        msDoc: msDoc,
                        Url: Url
                        );
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    TotalDocs = (decimal)DocCollection.CountDocuments();

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: ((decimal)100 / TotalDocs) * Count,
                        ProgressLabelMajor: null
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
        public async Task TestNHeadRequests()
        {
            MacroscopeJobMaster          JobMaster     = new MacroscopeJobMaster(JobRunTimeMode: MacroscopeConstants.RunTimeMode.LIVE, TaskController: this);
            MacroscopeDocumentCollection DocCollection = new MacroscopeDocumentCollection(JobMaster: JobMaster);

            Assert.AreEqual(0, DocCollection.CountDocuments());

            foreach (string Url in this.Urls)
            {
                MacroscopeDocument msDoc = DocCollection.CreateDocument(Url: Url);
                await msDoc.ExecuteHeadRequest();
            }

            Assert.AreEqual(this.MaxUrls, DocCollection.CountDocuments());
        }
示例#5
0
        /**************************************************************************/

        public void RefreshDataRedirectChains(MacroscopeDocumentCollection DocCollection)
        {
            if (DocCollection.CountDocuments() <= 0)
            {
                return;
            }

            if (this.MainForm.InvokeRequired)
            {
                this.MainForm.Invoke(
                    new MethodInvoker(
                        delegate
                {
                    Cursor.Current = Cursors.WaitCursor;
                    this.DisplayListView.BeginUpdate();
                    this.RenderListViewRedirectChains(DocCollection);
                    this.RenderUrlCount();
                    this.DisplayListView.EndUpdate();
                    Cursor.Current = Cursors.Default;
                }
                        )
                    );
            }
            else
            {
                Cursor.Current = Cursors.WaitCursor;
                this.DisplayListView.BeginUpdate();
                this.RenderListViewRedirectChains(DocCollection);
                this.RenderUrlCount();
                this.DisplayListView.EndUpdate();
                Cursor.Current = Cursors.Default;
            }
        }
示例#6
0
        /**************************************************************************/

        public void RefreshKeywordAnalysisData(MacroscopeDocumentCollection DocCollection)
        {
            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            if (this.MainForm.InvokeRequired)
            {
                this.MainForm.Invoke(
                    new MethodInvoker(
                        delegate
                {
                    Cursor.Current = Cursors.WaitCursor;
                    this.RefreshKeywordAnalysisDataProgress(DocCollection: DocCollection);
                    Cursor.Current = Cursors.Default;
                }
                        )
                    );
            }
            else
            {
                Cursor.Current = Cursors.WaitCursor;
                this.RefreshKeywordAnalysisDataProgress(DocCollection: DocCollection);
                Cursor.Current = Cursors.Default;
            }
        }
        /**************************************************************************/

        public void RefreshData(
            MacroscopeDocumentCollection DocCollection,
            string UrlFragment
            )
        {
            try
            {
                if (DocCollection.CountDocuments() <= 0)
                {
                    return;
                }

                if (this.MainForm.InvokeRequired)
                {
                    this.MainForm.Invoke(
                        new MethodInvoker(
                            delegate
                    {
                        Cursor.Current = Cursors.WaitCursor;
                        this.DisplayListView.BeginUpdate();
                        this.RenderListView(
                            DocCollection: DocCollection,
                            UrlFragment: UrlFragment
                            );
                        this.RenderUrlCount();
                        this.DisplayListView.EndUpdate();
                        Cursor.Current = Cursors.Default;
                    }
                            )
                        );
                }
                else
                {
                    Cursor.Current = Cursors.WaitCursor;
                    this.DisplayListView.BeginUpdate();
                    this.RenderListView(
                        DocCollection: DocCollection,
                        UrlFragment: UrlFragment
                        );
                    this.RenderUrlCount();
                    this.DisplayListView.EndUpdate();
                    Cursor.Current = Cursors.Default;
                }
            }
            catch (Exception ex)
            {
                this.DebugMsg(string.Format("RefreshData: {0}", ex.Message));
            }
        }
示例#8
0
        /**************************************************************************/

        public void RefreshDataSearchTargetUrls(
            MacroscopeDocumentCollection DocCollection,
            string UrlFragment
            )
        {
            if (DocCollection.CountDocuments() <= 0)
            {
                return;
            }

            if (this.MainForm.InvokeRequired)
            {
                this.MainForm.Invoke(
                    new MethodInvoker(
                        delegate
                {
                    Cursor.Current = Cursors.WaitCursor;
                    this.RenderListViewSearchTargetUrls(
                        DocCollection: DocCollection,
                        UrlFragment: UrlFragment
                        );
                    this.RenderUrlCount();
                    Cursor.Current = Cursors.Default;
                }
                        )
                    );
            }
            else
            {
                Cursor.Current = Cursors.WaitCursor;
                this.RenderListViewSearchTargetUrls(
                    DocCollection: DocCollection,
                    UrlFragment: UrlFragment
                    );
                this.RenderUrlCount();
                Cursor.Current = Cursors.Default;
            }
        }
示例#9
0
        /**************************************************************************/

        public void RenderListViewSearchSourceUrls(
            MacroscopeDocumentCollection DocCollection,
            string UrlFragment
            )
        {
            List <ListViewItem> ListViewItems = new List <ListViewItem> (DocCollection.CountDocuments());

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Url = msDoc.GetUrl();

                if (Url.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0)
                {
                    this.RenderListView(
                        ListViewItems: ListViewItems,
                        msDoc: msDoc,
                        Url: Url
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());
        }
示例#10
0
        /**************************************************************************/

        public void RenderListViewSearchTargetUrls(
            MacroscopeDocumentCollection DocCollection,
            string UrlFragment
            )
        {
            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments());

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Url = msDoc.GetUrl();

                if (msDoc != null)
                {
                    this.RenderListViewSearchTargetUrls(
                        ListViewItems: ListViewItems,
                        msDoc: msDoc,
                        Url: Url,
                        UrlFragment: UrlFragment
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());
        }
        /**************************************************************************/

        public override void RenderListView(MacroscopeDocumentCollection DocCollection)
        {
            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments());

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)DocCollection.CountDocuments();
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (((int)msDoc.GetStatusCode() >= 400) && ((int)msDoc.GetStatusCode() <= 499))
                {
                    Proceed = true;
                }
                else
                if (((int)msDoc.GetStatusCode() >= 500) && ((int)msDoc.GetStatusCode() <= 599))
                {
                    Proceed = true;
                }

                if (MacroscopePreferencesManager.GetIgnoreErrors410() && ((int)msDoc.GetStatusCode() == 410))
                {
                    Proceed = false;
                }

                if (MacroscopePreferencesManager.GetIgnoreErrors451() && ((int)msDoc.GetStatusCode() == 451))
                {
                    Proceed = false;
                }

                if (Proceed)
                {
                    this.RenderListView(
                        ListViewItems: ListViewItems,
                        DocCollection: DocCollection,
                        msDoc: msDoc,
                        Url: msDoc.GetUrl()
                        );
                }
                else
                {
                    this.RemoveFromListView(Url: msDoc.GetUrl());
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    TotalDocs       = (decimal)DocCollection.CountDocuments();
                    MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateTitles(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            decimal Count    = 0;
            decimal DocCount = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (DocCount > 0)
                {
                    Count++;
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", Count),
                        MinorPercentage: (( decimal )100 / DocCount) * Count,
                        ProgressLabelMinor: msDoc.GetUrl(),
                        SubMinorPercentage: -1,
                        ProgressLabelSubMinor: null
                        );
                }

                if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                {
                    switch (msDoc.GetDocumentType())
                    {
                    case MacroscopeConstants.DocumentType.HTML:
                        Proceed = true;
                        break;

                    case MacroscopeConstants.DocumentType.PDF:
                        Proceed = true;
                        break;

                    default:
                        Proceed = false;
                        break;
                    }
                }

                if (Proceed)
                {
                    string Title       = msDoc.GetTitle();
                    int    Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc);

                    if (Occurrences > 1)
                    {
                        iCol = 1;

                        this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                        if (msDoc.GetIsInternal())
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                        }
                        else
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                        }

                        iCol++;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences);

                        if (Occurrences > 1)
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange);
                        }
                        else
                        {
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                        }

                        iCol++;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Title));

                        iRow++;
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void RenderTreeView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList
            )
        {
            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            if (UrlList.Count == 0)
            {
                return;
            }

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )UrlList.Count;
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            this.tvTreeView.BeginUpdate();

            DebugMsg(string.Format("HIERARCHY: {0}", "BASE"));

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url);

                if (msDoc != null)
                {
                    this.RenderTreeView(msDoc, Url);
                }

                Count++;
                MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                ProgressForm.UpdatePercentages(
                    Title: null,
                    Message: null,
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            this.tvTreeView.ExpandAll();

            this.tvTreeView.EndUpdate();

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
示例#14
0
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeCustomFilters CustomFilter
            )
        {
            if (this.FilterColOffset == -1)
            {
                throw (new Exception("this.FilterColOffset invalid"));
            }

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            MacroscopeAllowedHosts   AllowedHosts    = this.MainForm.GetJobMaster().GetAllowedHosts();
            Dictionary <string, int> FilterColsTable = new Dictionary <string, int>(CustomFilter.GetSize());
            List <ListViewItem>      ListViewItems   = new List <ListViewItem>();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)DocCollection.CountDocuments();
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
            {
                string FilterPattern = CustomFilter.GetPattern(Slot).Key;

                if (FilterColsTable.ContainsKey(FilterPattern))
                {
                    FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + 1);
                }
                else
                {
                    FilterColsTable.Add(FilterPattern, Slot + 1);
                }
            }

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc  = DocCollection.GetDocumentByUrl(Url: Url);
                ListViewItem       lvItem = null;
                string             DocUrl;
                string             PairKey;
                string             StatusCode;
                string             Status;
                string             MimeType;

                if (msDoc == null)
                {
                    continue;
                }
                else
                {
                    DocUrl     = msDoc.GetUrl();
                    PairKey    = DocUrl;
                    StatusCode = ((int)msDoc.GetStatusCode()).ToString();
                    Status     = msDoc.GetStatusCode().ToString();
                    MimeType   = msDoc.GetMimeType();
                }

                if (!CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc))
                {
                    continue;
                }

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    lvItem = this.DisplayListView.Items[PairKey];
                }
                else
                {
                    lvItem = new ListViewItem(PairKey);
                    lvItem.UseItemStyleForSubItems = false;
                    lvItem.Name = PairKey;

                    lvItem.SubItems.Add("");
                    lvItem.SubItems.Add("");
                    lvItem.SubItems.Add("");
                    lvItem.SubItems.Add("");

                    for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
                    {
                        lvItem.SubItems.Add("");
                    }

                    ListViewItems.Add(lvItem);
                }

                if (lvItem != null)
                {
                    try
                    {
                        lvItem.SubItems[ColUrl].Text        = DocUrl;
                        lvItem.SubItems[ColStatusCode].Text = StatusCode;
                        lvItem.SubItems[ColStatus].Text     = Status;
                        lvItem.SubItems[ColMimeType].Text   = MimeType;

                        for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
                        {
                            string FilterPattern = CustomFilter.GetPattern(Slot: Slot).Key;
                            KeyValuePair <string, MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern);
                            int ColOffset = this.FilterColOffset + FilterColsTable[FilterPattern];

                            if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED))
                            {
                                lvItem.SubItems[ColOffset].Text = MacroscopeConstants.TextPresenceLabels[Pair.Value];

                                switch (Pair.Value)
                                {
                                case MacroscopeConstants.TextPresence.CONTAINS_STRING:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.NOT_CONTAINS_STRING:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.MUST_CONTAIN_STRING:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_STRING:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                case MacroscopeConstants.TextPresence.CONTAINS_REGEX:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.NOT_CONTAINS_REGEX:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Green;
                                    break;

                                case MacroscopeConstants.TextPresence.MUST_CONTAIN_REGEX:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_REGEX:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Red;
                                    break;

                                default:
                                    lvItem.SubItems[ColOffset].ForeColor = Color.Gray;
                                    break;
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.Message));
                        DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.StackTrace));
                    }
                }
                else
                {
                    DebugMsg(string.Format("MacroscopeDisplayCustomFilters MISSING: {0}", PairKey));
                }

                if (msDoc.GetIsInternal())
                {
                    lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                }
                else
                {
                    lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                }

                if (Regex.IsMatch(StatusCode, "^[2]"))
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Green;
                }
                else
                if (Regex.IsMatch(StatusCode, "^[3]"))
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Goldenrod;
                }
                else
                if (Regex.IsMatch(StatusCode, "^[45]"))
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Red;
                }
                else
                {
                    lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue;
                    lvItem.SubItems[ColStatus].ForeColor     = Color.Blue;
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 100;
            this.DisplayListView.Columns[ColStatus].Width     = 100;
            this.DisplayListView.Columns[ColMimeType].Width   = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
        /**************************************************************************/

        private void RenderTreeViewSummary(MacroscopeDocumentCollection DocCollection)
        {
            MacroscopeJobMaster JobMaster = this.MainForm.GetJobMaster();

            this.tvTreeView.BeginUpdate();

            try
            {
                {
                    SortedDictionary <string, double> DataPoints = new SortedDictionary <string, double>();

                    {
                        TreeNode[] Leaf  = this.tvTreeView.Nodes.Find("UrlsFound", true);
                        int        Count = JobMaster.GetPagesFound();
                        if (Leaf.Length > 0)
                        {
                            Leaf[0].Text = string.Format("Total URLs Found: {0}", Count);
                        }
                        DataPoints.Add("URLs Found", (double)Count);
                    }

                    {
                        TreeNode[] Leaf  = this.tvTreeView.Nodes.Find("UrlsCrawled", true);
                        int        Count = DocCollection.CountDocuments();
                        if (Leaf.Length > 0)
                        {
                            Leaf[0].Text = string.Format("Total URLs Crawled: {0}", Count);
                        }
                        DataPoints.Add("URLs Crawled", (double)Count);
                    }

                    {
                        TreeNode[] Leaf  = this.tvTreeView.Nodes.Find("UrlsInternal", true);
                        ulong      Count = DocCollection.CountUrlsInternal();
                        if (Leaf.Length > 0)
                        {
                            Leaf[0].Text = string.Format("Total Internal URLs: {0}", Count);
                        }
                        DataPoints.Add("Internal URLs", (double)Count);
                    }

                    {
                        TreeNode[] Leaf  = this.tvTreeView.Nodes.Find("UrlsExternal", true);
                        ulong      Count = DocCollection.CountUrlsExternal();
                        if (Leaf.Length > 0)
                        {
                            Leaf[0].Text = string.Format("Total External URLs: {0}", Count);
                        }
                        DataPoints.Add("External URLs", (double)Count);
                    }

                    this.SiteStructurePanelCharts.UpdateSiteSummary(DataPoints: DataPoints);
                }

                {
                    SortedDictionary <string, double> DataPoints = new SortedDictionary <string, double>();
                    decimal Fastest = DocCollection.GetStatsDurationsFastest();
                    decimal Slowest = DocCollection.GetStatsDurationsSlowest();
                    decimal Average = DocCollection.GetStatsDurationAverage();

                    {
                        TreeNode[] Leaf = this.tvTreeView.Nodes.Find("FASTESTPAGERESPONSE", true);
                        if (Leaf.Length > 0)
                        {
                            Leaf[0].Text = string.Format("Fastest Page Response: {0:0.00} secs", Fastest);
                            DataPoints.Add("Fastest Page Response", (double)Fastest);
                        }
                        else
                        {
                            DataPoints.Add("Fastest Page Response", 0);
                        }
                    }

                    {
                        TreeNode[] Leaf = this.tvTreeView.Nodes.Find("SLOWESTPAGERESPONSE", true);
                        if (Leaf.Length > 0)
                        {
                            Leaf[0].Text = string.Format("Slowest Page Response: {0:0.00} secs", Slowest);
                            DataPoints.Add("Slowest Page Response", (double)Slowest);
                        }
                        else
                        {
                            DataPoints.Add("Slowest Page Response", 0);
                        }
                    }

                    {
                        TreeNode[] Leaf = this.tvTreeView.Nodes.Find("AVERAGEPAGEDURATION", true);
                        if (Leaf.Length > 0)
                        {
                            Leaf[0].Text = string.Format("Average Page Duration: {0:0.00} secs", Average);
                            DataPoints.Add("Average Page Duration", (double)Average);
                        }
                        else
                        {
                            DataPoints.Add("Average Page Duration", 0);
                        }
                    }

                    this.SiteStructurePanelCharts.UpdateResponseTimes(DataPoints: DataPoints);
                }

                {
                    TreeNode[] Leaf  = this.tvTreeView.Nodes.Find("UrlsRobotsBlocked", true);
                    int        Count = JobMaster.GetBlockedByRobotsList().Count;
                    if (Leaf.Length > 0)
                    {
                        Leaf[0].Text = string.Format("URLs Blocked by Robots.txt: {0}", Count);
                    }
                }

                {
                    TreeNode[] Leaf  = this.tvTreeView.Nodes.Find("SitemapsFound", true);
                    ulong      Count = DocCollection.CountUrlsSitemaps();
                    if (Leaf.Length > 0)
                    {
                        Leaf[0].Text = string.Format("Sitemaps Found: {0}", Count);
                    }
                }

                {
                    TreeNode[] Leaves = this.tvTreeView.Nodes.Find("FETCH_WARNINGS", true);
                    if (Leaves.Length > 0)
                    {
                        TreeNode Leaf = Leaves[0];
                        if (Leaf != null)
                        {
                            Dictionary <string, int> dicMessages = DocCollection.GetStatsWarningsCount();
                            Leaf.Nodes.Clear();
                            foreach (string MessagesKey in dicMessages.Keys)
                            {
                                Leaf.Nodes.Add(string.Format("{0}: {1}", MessagesKey, dicMessages[MessagesKey]));
                            }
                        }
                    }
                }

                {
                    TreeNode[] Leaves = this.tvTreeView.Nodes.Find("FETCH_ERRORS", true);
                    if (Leaves.Length > 0)
                    {
                        TreeNode Leaf = Leaves[0];
                        if (Leaf != null)
                        {
                            Dictionary <string, int> dicMessages = DocCollection.GetStatsErrorsCount();
                            Leaf.Nodes.Clear();
                            foreach (string MessagesKey in dicMessages.Keys)
                            {
                                Leaf.Nodes.Add(string.Format("{0}: {1}", MessagesKey, dicMessages[MessagesKey]));
                            }
                        }
                    }
                }

                {
                    Dictionary <bool, int> Canonicals = DocCollection.GetStatsCanonicalsCount();
                    {
                        TreeNode[] Leaf = this.tvTreeView.Nodes.Find("CANONICALS_SPECIFIED_SPECIFIED", true);
                        if ((Leaf.Length > 0) && (Canonicals.ContainsKey(true)))
                        {
                            Leaf[0].Text = string.Format("Specified: {0}", Canonicals[true]);
                        }
                        else
                        {
                            Leaf[0].Text = string.Format("Specified: {0}", 0);
                        }
                    }
                    {
                        TreeNode[] Leaf = this.tvTreeView.Nodes.Find("CANONICALS_SPECIFIED_NOT_SPECIFIED", true);
                        if ((Leaf.Length > 0) && (Canonicals.ContainsKey(false)))
                        {
                            Leaf[0].Text = string.Format("Not Specified: {0}", Canonicals[false]);
                        }
                        else
                        {
                            Leaf[0].Text = string.Format("Not Specified: {0}", 0);
                        }
                    }
                }

                {
                    {
                        TreeNode[] Leaves = this.tvTreeView.Nodes.Find("DOCUMENT_TYPES_FOUND_INTERNAL", true);
                        if (Leaves.Length > 0)
                        {
                            TreeNode Leaf = Leaves[0];
                            if (Leaf != null)
                            {
                                Dictionary <string, int> Stats = DocCollection.GetStatsDocumentTypesInternalCount();
                                Leaf.Nodes.Clear();
                                foreach (string ContentKey in Stats.Keys)
                                {
                                    TreeNode LeafNode = Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, Stats[ContentKey]));
                                    LeafNode.Tag = "DOCUMENT_TYPES_FOUND_INTERNAL_TYPE"; // For graph selection click
                                }
                            }
                        }
                    }
                    {
                        TreeNode[] Leaves = this.tvTreeView.Nodes.Find("DOCUMENT_TYPES_FOUND_EXTERNAL", true);
                        if (Leaves.Length > 0)
                        {
                            TreeNode Leaf = Leaves[0];
                            if (Leaf != null)
                            {
                                Dictionary <string, int> Stats = DocCollection.GetStatsDocumentTypesExternalCount();
                                Leaf.Nodes.Clear();
                                foreach (string ContentKey in Stats.Keys)
                                {
                                    TreeNode LeafNode = Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, Stats[ContentKey]));
                                    LeafNode.Tag = "DOCUMENT_TYPES_FOUND_EXTERNAL_TYPE"; // For graph selection click
                                }
                            }
                        }
                    }
                }

                {
                    SortedDictionary <string, double> DataPoints = new SortedDictionary <string, double>();
                    TreeNode[] Leaves = this.tvTreeView.Nodes.Find("LANGUAGES_SPECIFIED_PAGES", true);
                    if (Leaves.Length > 0)
                    {
                        TreeNode Leaf = Leaves[0];
                        if (Leaf != null)
                        {
                            Dictionary <string, int> dicContents = DocCollection.GetStatsLanguagesPagesCount();
                            Leaf.Nodes.Clear();
                            foreach (string ContentKey in dicContents.Keys)
                            {
                                TreeNode LeafNode = Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, dicContents[ContentKey]));
                                LeafNode.Tag = "LANGUAGES_SPECIFIED_PAGES_LANG";
                                DataPoints.Add(ContentKey, (double)dicContents[ContentKey]);
                            }
                        }
                        this.SiteStructurePanelCharts.UpdateLanguagesSpecified(DataPoints: DataPoints);
                    }
                }

                {
                    TreeNode[] Leaves = this.tvTreeView.Nodes.Find("LANGUAGES_DETECTED_TITLES", true);

                    if (Leaves.Length > 0)
                    {
                        TreeNode Leaf = Leaves[0];

                        if (Leaf != null)
                        {
                            Dictionary <string, int> dicContents = DocCollection.GetStatsLanguagesTitlesCount();

                            Leaf.Nodes.Clear();

                            foreach (string ContentKey in dicContents.Keys)
                            {
                                Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, dicContents[ContentKey]));
                            }
                        }
                    }
                }

                {
                    TreeNode[] Leaves = this.tvTreeView.Nodes.Find("LANGUAGES_DETECTED_DESCRIPTIONS", true);

                    if (Leaves.Length > 0)
                    {
                        TreeNode Leaf = Leaves[0];

                        if (Leaf != null)
                        {
                            Dictionary <string, int> dicContents = DocCollection.GetStatsLanguagesDescriptionsCount();

                            Leaf.Nodes.Clear();

                            foreach (string ContentKey in dicContents.Keys)
                            {
                                Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, dicContents[ContentKey]));
                            }
                        }
                    }
                }

                {
                    TreeNode[] Leaves = this.tvTreeView.Nodes.Find("LANGUAGES_DETECTED_BODYTEXTS", true);

                    if (Leaves.Length > 0)
                    {
                        TreeNode Leaf = Leaves[0];

                        if (Leaf != null)
                        {
                            Dictionary <string, int> dicContents = DocCollection.GetStatsLanguagesBodyTextsCount();

                            Leaf.Nodes.Clear();

                            foreach (string ContentKey in dicContents.Keys)
                            {
                                Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, dicContents[ContentKey]));
                            }
                        }
                    }
                }

                {
                    SortedDictionary <string, double> DataPoints = new SortedDictionary <string, double>();

                    TreeNode[] Leaves = this.tvTreeView.Nodes.Find("TEXT_READABILITY", true);

                    if (Leaves.Length > 0)
                    {
                        TreeNode Leaf = Leaves[0];

                        if (Leaf != null)
                        {
                            SortedDictionary <string, int> dicContents = DocCollection.GetStatsReadabilityGradeStringsCount();

                            Leaf.Nodes.Clear();

                            foreach (string ContentKey in dicContents.Keys)
                            {
                                TreeNode LeafLeaf = Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, dicContents[ContentKey]));

                                LeafLeaf.Tag = "TEXT_READABILITY_NODE";

                                DataPoints.Add(ContentKey, (double)dicContents[ContentKey]);
                            }
                        }

                        this.SiteStructurePanelCharts.UpdateReadability(DataPoints: DataPoints);
                    }
                }
            }
            catch (Exception ex)
            {
                this.DebugMsg(ex.Message);
                this.DebugMsg(ex.Source);
            }

            this.tvTreeView.EndUpdate();
        }
示例#16
0
        /**************************************************************************/

        private void BuildWorksheetPageDuplicatePages(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            decimal DocCount     = 0;
            decimal DocListCount = 0;
            decimal CountOuter   = 0;
            decimal CountInner   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();
            Dictionary <string, bool>    CrossCheckList;

            CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList(
                Capacity: DocCollection.CountDocuments()
                );

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Origin URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Distance";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Similar URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string UrlLeft in DocCollection.DocumentUrls())
            {
                MacroscopeDocument            msDocLeft           = DocCollection.GetDocumentByUrl(Url: UrlLeft);
                MacroscopeLevenshteinAnalysis LevenshteinAnalysis = null;

                CountOuter++;
                CountInner = 0;

                if (DocCount > 0)
                {
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                        MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                        ProgressLabelMinor: UrlLeft,
                        SubMinorPercentage: 0,
                        ProgressLabelSubMinor: ""
                        );
                }

                if (msDocLeft.GetIsExternal())
                {
                    continue;
                }

                if (!msDocLeft.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
                {
                    continue;
                }

                LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis(
                    msDoc: msDocLeft,
                    SizeDifference: MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference(),
                    Threshold: MacroscopePreferencesManager.GetMaxLevenshteinDistance(),
                    CrossCheckList: CrossCheckList,
                    IPercentageDone: this
                    );

                Dictionary <MacroscopeDocument, int> DocList;

                DocList = LevenshteinAnalysis.AnalyzeDocCollection(
                    DocCollection: DocCollection
                    );

                DocListCount = ( decimal )DocList.Count;

                foreach (MacroscopeDocument msDocDuplicate in DocList.Keys)
                {
                    int            StatusCode   = ( int )msDocLeft.GetStatusCode();
                    HttpStatusCode Status       = msDocLeft.GetStatusCode();
                    string         UrlDuplicate = msDocDuplicate.GetUrl();
                    int            Distance     = DocList[msDocDuplicate];

                    CountInner++;
                    iCol = 1;

                    if (DocCount > 0)
                    {
                        this.ProgressForm.UpdatePercentages(
                            Title: null,
                            Message: null,
                            MajorPercentage: -1,
                            ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                            MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                            ProgressLabelMinor: UrlLeft,
                            SubMinorPercentage: (( decimal )100 / DocListCount) * CountInner,
                            ProgressLabelSubMinor: UrlDuplicate
                            );
                    }

                    this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode);
                    iCol++;

                    this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status);
                    iCol++;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, UrlLeft);

                    if (AllowedHosts.IsInternalUrl(Url: UrlLeft))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, Distance.ToString());

                    if (Distance <= MacroscopePreferencesManager.GetMaxLevenshteinDistance())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, UrlDuplicate);

                    if (AllowedHosts.IsInternalUrl(Url: UrlDuplicate))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iRow++;

                    if (this.ProgressForm.Cancelled())
                    {
                        break;
                    }
                }

                if (this.ProgressForm.Cancelled())
                {
                    break;
                }

                //Thread.Yield();
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        public void TestDifferent()
        {
            const string StartUrl = "https://nazuke.github.io/SEOMacroscope/";

            MacroscopeJobMaster JobMaster = new MacroscopeJobMaster(
                JobRunTimeMode: MacroscopeConstants.RunTimeMode.LIVE,
                TaskController: this
                );

            MacroscopeDocumentCollection DocCollection = new MacroscopeDocumentCollection(JobMaster: JobMaster);

            Dictionary <string, Boolean> CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList(Capacity: DocCollection.CountDocuments());

            MacroscopeDocument msDoc = DocCollection.CreateDocument(StartUrl);

            msDoc.Execute();
            DocCollection.AddDocument(msDoc);

            DebugMsg(string.Format("msDoc: {0}", msDoc.GetStatusCode()));

            MacroscopeLevenshteinAnalysis LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis(
                msDoc: msDoc,
                SizeDifference: 64,
                Threshold: 16,
                CrossCheckList: CrossCheckList
                );

            List <string> TargetUrls = new List <string> ()
            {
                {
                    "https://nazuke.github.io/SEOMacroscope/blog/"
                },
                {
                    "https://nazuke.github.io/SEOMacroscope/downloads/"
                },
                {
                    "https://nazuke.github.io/SEOMacroscope/manual/"
                }
            };

            foreach (string TargetUrl in TargetUrls)
            {
                MacroscopeDocument msDocTarget = DocCollection.CreateDocument(TargetUrl);
                msDocTarget.Execute();
                DocCollection.AddDocument(msDocTarget);
                DebugMsg(string.Format("msDocTarget: {0}", msDocTarget.GetStatusCode()));
            }

            for (int i = 1; i <= 10; i++)
            {
                Dictionary <MacroscopeDocument, int> DocList;

                DocList = LevenshteinAnalysis.AnalyzeDocCollection(
                    DocCollection: DocCollection
                    );

                DebugMsg(string.Format("DocList: {0}", DocList.Count));

                foreach (MacroscopeDocument msDocAnalyzed in DocList.Keys)
                {
                    DebugMsg(string.Format("msDocAnalyzed: {0} => {1}", DocList[msDocAnalyzed], msDocAnalyzed.GetUrl()));

                    Assert.AreNotEqual(
                        DocList[msDocAnalyzed],
                        0,
                        string.Format(
                            "FAIL: {0} => {1}",
                            DocList[msDocAnalyzed],
                            msDocAnalyzed.GetUrl()
                            )
                        );
                }
            }
        }
示例#18
0
        /**************************************************************************/

        public void RefreshKeywordAnalysisDataProgress(MacroscopeDocumentCollection DocCollection)
        {
            MacroscopeDoublePercentageProgressForm ProgressForm = new MacroscopeDoublePercentageProgressForm(this.MainForm);

            decimal MajorPercentage = 0;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing keyword terms collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: "",
                    MinorPercentage: 0,
                    ProgressLabelMinor: ""
                    );
            }

            try
            {
                ProgressForm.TopMost = true;
            }
            catch (Exception ex)
            {
                DebugMsg(string.Format("ProgressForm.Show(): {0}", ex.Message));
            }

            for (int i = 0; i <= 3; i++)
            {
                List <ListViewItem> ListViewItems = new List <ListViewItem> (DocCollection.CountDocuments());

                Application.DoEvents();

                if (!ProgressForm.Cancelled())
                {
                    Dictionary <string, int> DicTerms = DocCollection.GetDeepKeywordAnalysisAsDictonary(Words: i + 1);


                    if (MacroscopePreferencesManager.GetShowProgressDialogues())
                    {
                        MajorPercentage = (( decimal )100 / ( decimal )4) * ( decimal )(i + 1);

                        ProgressForm.UpdatePercentages(
                            Title: null,
                            Message: null,
                            MajorPercentage: MajorPercentage,
                            ProgressLabelMajor: string.Format("{0} Word Keywords", i + 1),
                            MinorPercentage: 0,
                            ProgressLabelMinor: ""
                            );
                    }

                    this.TargetListViews[i].BeginUpdate();

                    this.RenderKeywordAnalysisListView(
                        ListViewItems: ListViewItems,
                        TargetListView: this.TargetListViews[i],
                        DicTerms: DicTerms,
                        ProgressForm: ProgressForm
                        );

                    this.TargetListViews[i].Items.AddRange(ListViewItems.ToArray());

                    this.TargetListViews[i].EndUpdate();
                }
            }

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
示例#19
0
        /**************************************************************************/

        public void RefreshSiteSpeedData(MacroscopeDocumentCollection DocCollection)
        {
            if (this.lvListViewFastest.IsDisposed || this.lvListViewSlowest.IsDisposed)
            {
                return;
            }

            if (DocCollection.CountDocuments() > 0)
            {
                const int MeasurePages = 20;
                decimal   Average      = 0;
                int       Count        = 0;
                decimal   Maximus      = 0;

                SortedList <decimal, string> SortedListAll     = new SortedList <decimal, string> (DocCollection.CountDocuments(), this.DecimalSorterAscending);
                SortedList <decimal, string> SortedListSlowest = new SortedList <decimal, string> (MeasurePages, this.DecimalSorterDescending);
                SortedList <decimal, string> SortedListFastest = new SortedList <decimal, string> (MeasurePages, this.DecimalSorterAscending);

                foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
                {
                    string  Url      = msDoc.GetUrl();
                    decimal Duration = msDoc.GetDurationInSeconds();

                    if (msDoc.GetIsInternal() && msDoc.GetWasDownloaded())
                    {
                        Count++;
                        Maximus += Duration;

                        if (SortedListAll.ContainsKey(Duration))
                        {
                            SortedListAll[Duration] = Url;
                        }
                        else
                        {
                            SortedListAll.Add(Duration, Url);
                        }
                    }
                }

                foreach (decimal Duration in SortedListAll.Keys.Take(MeasurePages))
                {
                    SortedListFastest.Add(Duration, SortedListAll[Duration]);
                }

                foreach (decimal Duration in SortedListAll.Keys.Reverse().Take(MeasurePages))
                {
                    SortedListSlowest.Add(Duration, SortedListAll[Duration]);
                }

                if (Count > 0)
                {
                    Average = Maximus / Count;
                }

                if (this.MainForm.InvokeRequired)
                {
                    this.MainForm.Invoke(
                        new MethodInvoker(
                            delegate
                    {
                        Cursor.Current = Cursors.WaitCursor;
                        this.RenderSiteSpeedListView(this.lvListViewSlowest, SortedListSlowest);
                        this.RenderSiteSpeedListView(this.lvListViewFastest, SortedListFastest);
                        this.UpdateAverageLabel(Average);
                        Cursor.Current = Cursors.Default;
                    }
                            )
                        );
                }
                else
                {
                    Cursor.Current = Cursors.WaitCursor;
                    this.RenderSiteSpeedListView(this.lvListViewSlowest, SortedListSlowest);
                    this.RenderSiteSpeedListView(this.lvListViewFastest, SortedListFastest);
                    this.UpdateAverageLabel(Average);
                    Cursor.Current = Cursors.Default;
                }
            }
        }
        /** Render DocCollection Filtered by URL Fragment *************************/

        public void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            string UrlFragment
            )
        {
            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments());

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)DocCollection.CountDocuments();
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.ControlBox = false;

                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                Application.DoEvents();

                if (msDoc != null)
                {
                    string Url = msDoc.GetUrl();
                    if (Url.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0)
                    {
                        this.RenderListView(
                            ListViewItems: ListViewItems,
                            DocCollection: DocCollection,
                            msDoc: msDoc,
                            Url: Url
                            );
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
        /** Render Filtered DocCollection *******************************************/

        public void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            MacroscopeConstants.DocumentType DocumentType
            )
        {
            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments());

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)DocCollection.CountDocuments();
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.ControlBox = false;

                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                Application.DoEvents();

                if (msDoc != null)
                {
                    switch (DocumentType)
                    {
                    case MacroscopeConstants.DocumentType.INTERNALURL:
                        if (msDoc.GetIsInternal())
                        {
                            this.RenderListView(
                                ListViewItems: ListViewItems,
                                DocCollection: DocCollection,
                                msDoc: msDoc,
                                Url: msDoc.GetUrl()
                                );
                        }
                        break;

                    case MacroscopeConstants.DocumentType.EXTERNALURL:
                        if (msDoc.GetIsExternal())
                        {
                            this.RenderListView(
                                ListViewItems: ListViewItems,
                                DocCollection: DocCollection,
                                msDoc: msDoc,
                                Url: msDoc.GetUrl()
                                );
                        }
                        break;

                    default:
                        if (
                            (msDoc.GetDocumentType() == DocumentType) ||
                            (DocumentType == MacroscopeConstants.DocumentType.ALL))
                        {
                            this.RenderListView(
                                ListViewItems: ListViewItems,
                                DocCollection: DocCollection,
                                msDoc: msDoc,
                                Url: msDoc.GetUrl()
                                );
                        }
                        break;
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
示例#22
0
        /**************************************************************************/

        private void RenderListViewRedirectChains(MacroscopeDocumentCollection DocCollection)
        {
            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments());
            List <List <MacroscopeRedirectChainDocStruct> > RedirectChains = DocCollection.GetMacroscopeRedirectChains();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)DocCollection.CountDocuments();
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.ControlBox = false;

                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (List <MacroscopeRedirectChainDocStruct> DocList in RedirectChains)
            {
                Application.DoEvents();

                if (DocList.Count > 0)
                {
                    try
                    {
                        this.RenderListViewRedirectChains(
                            ListViewItems: ListViewItems,
                            DocCollection: DocCollection,
                            DocList: DocList
                            );
                    }
                    catch (Exception ex)
                    {
                        this.DebugMsg(string.Format("RenderListViewRedirectChains 1: {0}", ex.Message));
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());
            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent);
            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize);

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateEtags(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            decimal CountOuter = 0;
            decimal CountInner = 0;
            decimal DocCount   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments());
            Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments());

            DocCount = ( decimal )DocCollection.CountDocuments();

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Etag = msDoc.GetEtag();

                if ((Etag != null) && (Etag.Length > 0))
                {
                    if (!DuplicatesDocList.ContainsKey(msDoc.GetUrl()))
                    {
                        DuplicatesDocList.Add(msDoc.GetUrl(), msDoc);
                    }

                    if (DuplicatesList.ContainsKey(Etag))
                    {
                        DuplicatesList[Etag] = DuplicatesList[Etag] + 1;
                    }
                    else
                    {
                        DuplicatesList.Add(Etag, 1);
                    }
                }
            }

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Occurrences");
                ws.WriteField("ETag");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (string Etag in DuplicatesList.Keys)
            {
                CountOuter++;
                CountInner = 0;

                if (DuplicatesList[Etag] > 1)
                {
                    foreach (MacroscopeDocument msDoc in  DuplicatesDocList.Values)
                    {
                        CountInner++;

                        if (DocCount > 0)
                        {
                            this.ProgressForm.UpdatePercentages(
                                Title: null,
                                Message: null,
                                MajorPercentage: -1,
                                ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                                MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                                ProgressLabelMinor: Etag,
                                SubMinorPercentage: (( decimal )100 / DocCount) * CountInner,
                                ProgressLabelSubMinor: msDoc.GetUrl()
                                );
                        }

                        if (msDoc.GetEtag() == Etag)
                        {
                            int            StatusCode  = ( int )msDoc.GetStatusCode();
                            HttpStatusCode Status      = msDoc.GetStatusCode();
                            int            Occurrences = DuplicatesList[Etag];

                            this.InsertAndFormatStatusCodeCell(ws, StatusCode);

                            this.InsertAndFormatStatusCodeCell(ws, Status);

                            this.InsertAndFormatContentCell(ws, Occurrences);

                            this.InsertAndFormatContentCell(ws, msDoc.GetEtag());

                            this.InsertAndFormatUrlCell(ws, msDoc);

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
        public async Task TestDuplicate()
        {
            const string                 StartUrl = "https://nazuke.github.io/SEOMacroscope/";
            const string                 DupeUrl  = "https://nazuke.github.io/SEOMacroscope/index.html";
            MacroscopeJobMaster          JobMaster;
            MacroscopeDocumentCollection DocCollection;
            Dictionary <string, bool>    CrossCheckList;
            MacroscopeDocument           msDoc;
            MacroscopeDocument           msDocDifferent;

            JobMaster = new MacroscopeJobMaster(
                JobRunTimeMode: MacroscopeConstants.RunTimeMode.LIVE,
                TaskController: this
                );

            DocCollection = new MacroscopeDocumentCollection(JobMaster: JobMaster);

            CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList(Capacity: DocCollection.CountDocuments());

            msDoc          = DocCollection.CreateDocument(StartUrl);
            msDocDifferent = DocCollection.CreateDocument(DupeUrl);

            await msDoc.Execute();

            await msDocDifferent.Execute();

            DebugMsg(string.Format("msDoc: {0}", msDoc.GetStatusCode()));

            DebugMsg(string.Format("msDocDifferent: {0}", msDocDifferent.GetStatusCode()));

            for (int i = 1; i <= 100; i++)
            {
                MacroscopeLevenshteinAnalysis        LevenshteinAnalysis;
                Dictionary <MacroscopeDocument, int> DocList;

                LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis(
                    msDoc: msDoc,
                    SizeDifference: 64,
                    Threshold: 16,
                    CrossCheckList: CrossCheckList
                    );

                DocList = LevenshteinAnalysis.AnalyzeDocCollection(DocCollection: DocCollection);

                DebugMsg(string.Format("DocList: {0}", DocList.Count));

                foreach (MacroscopeDocument msDocAnalyzed in DocList.Keys)
                {
                    DebugMsg(string.Format("msDocAnalyzed: {0} => {1}", DocList[msDocAnalyzed], msDocAnalyzed.GetUrl()));

                    Assert.AreEqual(
                        DocList[msDocAnalyzed],
                        0,
                        string.Format("FAIL: {0} => {1}", DocList[msDocAnalyzed], msDocAnalyzed.GetUrl())
                        );
                }
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateTitles(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            decimal Count    = 0;
            decimal DocCount = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.WriteField("URL");
                ws.WriteField("Occurrences");
                ws.WriteField("Title");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (DocCount > 0)
                {
                    Count++;
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", Count),
                        MinorPercentage: (( decimal )100 / DocCount) * Count,
                        ProgressLabelMinor: msDoc.GetUrl(),
                        SubMinorPercentage: -1,
                        ProgressLabelSubMinor: null
                        );
                }

                if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                {
                    switch (msDoc.GetDocumentType())
                    {
                    case MacroscopeConstants.DocumentType.HTML:
                        Proceed = true;
                        break;

                    case MacroscopeConstants.DocumentType.PDF:
                        Proceed = true;
                        break;

                    default:
                        Proceed = false;
                        break;
                    }
                }

                if (Proceed)
                {
                    string Title       = msDoc.GetTitle();
                    int    Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc);

                    if (Occurrences > 1)
                    {
                        this.InsertAndFormatUrlCell(ws, msDoc);

                        this.InsertAndFormatContentCell(ws, Occurrences);

                        this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title));

                        ws.NextRecord();
                    }
                }
            }
        }
示例#26
0
        /**************************************************************************/

        private void BuildWorksheetKeywordsPresence(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel,
            MacroscopeDocumentCollection DocCollection
            )
        {
            var     ws       = wb.Worksheets.Add(WorksheetLabel);
            int     iRow     = 1;
            int     iCol     = 1;
            int     iColMax  = 1;
            decimal DocCount = 0;
            decimal DocTotal = (decimal)DocCollection.CountDocuments();

            {
                ws.Cell(iRow, iCol).Value = "Presence";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Keyword";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                List <KeyValuePair <string, MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS> > KeywordPresence;

                KeywordPresence = DocCollection.GetKeywordPresenceAnalysis(msDoc: msDoc);

                if (DocCount > 0)
                {
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: null,
                        MinorPercentage: ((decimal)100 / DocTotal) * (decimal)DocCount,
                        ProgressLabelMinor: "Documents Processed"
                        );
                }

                if (KeywordPresence != null)
                {
                    foreach (KeyValuePair <string, MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS> Pair in KeywordPresence)
                    {
                        MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS Present = Pair.Value;
                        string Keyword = Pair.Key;

                        iCol = 1;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, Pair.Value.ToString());

                        switch (Pair.Value)
                        {
                        case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.KEYWORDS_METATAG_EMPTY:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            break;

                        case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.MALFORMED_KEYWORDS_METATAG:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            break;

                        case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.PRESENT_IN_TITLE:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            break;

                        case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.MISSING_IN_TITLE:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            break;

                        case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.PRESENT_IN_DESCRIPTION:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            break;

                        case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.MISSING_IN_DESCRIPTION:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange);
                            break;

                        case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.PRESENT_IN_BODY:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            break;

                        case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.MISSING_IN_BODY:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            break;

                        default:
                            break;
                        }

                        iCol++;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, Keyword);

                        iCol++;

                        this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc.GetUrl());

                        iRow++;
                    }
                }

                DocCount++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateChecksums(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            decimal CountOuter = 0;
            decimal CountInner = 0;
            decimal DocCount   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments());
            Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments());

            DocCount = ( decimal )DocCollection.CountDocuments();

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Checksum = msDoc.GetChecksum();

                if ((Checksum != null) && (Checksum.Length > 0))
                {
                    if (!DuplicatesDocList.ContainsKey(msDoc.GetUrl()))
                    {
                        DuplicatesDocList.Add(msDoc.GetUrl(), msDoc);
                    }

                    if (DuplicatesList.ContainsKey(Checksum))
                    {
                        DuplicatesList[Checksum] = DuplicatesList[Checksum] + 1;
                    }
                    else
                    {
                        DuplicatesList.Add(Checksum, 1);
                    }
                }
            }

            {
                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Checksum";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Checksum in DuplicatesList.Keys)
            {
                CountOuter++;
                CountInner = 0;

                if (DuplicatesList[Checksum] > 1)
                {
                    foreach (MacroscopeDocument msDoc in  DuplicatesDocList.Values)
                    {
                        CountInner++;

                        if (DocCount > 0)
                        {
                            this.ProgressForm.UpdatePercentages(
                                Title: null,
                                Message: null,
                                MajorPercentage: -1,
                                ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                                MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                                ProgressLabelMinor: Checksum,
                                SubMinorPercentage: (( decimal )100 / DocCount) * CountInner,
                                ProgressLabelSubMinor: msDoc.GetUrl()
                                );
                        }

                        if (msDoc.GetChecksum() == Checksum)
                        {
                            iCol = 1;

                            int            StatusCode  = ( int )msDoc.GetStatusCode();
                            HttpStatusCode Status      = msDoc.GetStatusCode();
                            int            Occurrences = DuplicatesList[Checksum];

                            this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode);
                            iCol++;

                            this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status);
                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences);
                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetChecksum());
                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                            iRow++;
                        }
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /** XML Sitemap Generators ************************************************/

        public XmlDocument GenerateXmlSitemap(string Host)
        {
            Dictionary <string, bool> Dedupe     = new Dictionary <string, bool>(DocCollection.CountDocuments());
            XmlDocument    SitemapXml            = new XmlDocument();
            XmlDeclaration SitemapXmlDeclaration = SitemapXml.CreateXmlDeclaration("1.0", "UTF-8", null);
            XmlElement     RootNode   = SitemapXml.DocumentElement;
            XmlElement     UrlSetNode = SitemapXml.CreateElement(string.Empty, "urlset", MacroscopeSitemapGenerator.XmlNamespace);

            SitemapXml.InsertBefore(SitemapXmlDeclaration, RootNode);
            SitemapXml.AppendChild(UrlSetNode);

            foreach (MacroscopeDocument msDoc in this.DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (!msDoc.GetStatusCode().Equals(HttpStatusCode.OK))
                {
                    continue;
                }

                if (
                    (!msDoc.GetIsInternal()) ||
                    (msDoc.GetIsRedirect()))
                {
                    continue;
                }

                switch (msDoc.GetDocumentType())
                {
                case MacroscopeConstants.DocumentType.HTML:
                    Proceed = true;
                    break;

                case MacroscopeConstants.DocumentType.PDF:
                    Proceed = true;
                    break;

                default:
                    break;
                }

                if (!string.IsNullOrEmpty(Host))
                {
                    if (msDoc.GetHostAndPort().Equals(Host))
                    {
                        Proceed = true;
                    }
                    else
                    {
                        Proceed = false;
                    }
                }

                if (Proceed)
                {
                    XmlElement UrlNode = SitemapXml.CreateElement(string.Empty, "url", MacroscopeSitemapGenerator.XmlNamespace);

                    UrlSetNode.AppendChild(UrlNode);

                    {
                        XmlElement EntryNode = SitemapXml.CreateElement(string.Empty, "loc", MacroscopeSitemapGenerator.XmlNamespace);
                        XmlText    TextNode  = SitemapXml.CreateTextNode(msDoc.GetUrl());
                        UrlNode.AppendChild(EntryNode);
                        EntryNode.AppendChild(TextNode);
                    }

                    {
                        XmlElement EntryNode = SitemapXml.CreateElement(string.Empty, "lastmod", MacroscopeSitemapGenerator.XmlNamespace);
                        XmlText    TextNode  = SitemapXml.CreateTextNode(msDoc.GetDateModifiedForSitemapXml());
                        UrlNode.AppendChild(EntryNode);
                        EntryNode.AppendChild(TextNode);
                    }

                    {
                        XmlElement EntryNode = SitemapXml.CreateElement(string.Empty, "changefreq", MacroscopeSitemapGenerator.XmlNamespace);
                        XmlText    TextNode  = SitemapXml.CreateTextNode("daily");
                        UrlNode.AppendChild(EntryNode);
                        EntryNode.AppendChild(TextNode);
                    }

                    {
                        XmlElement EntryNode = SitemapXml.CreateElement(string.Empty, "priority", MacroscopeSitemapGenerator.XmlNamespace);
                        XmlText    TextNode  = SitemapXml.CreateTextNode("1.0");
                        UrlNode.AppendChild(EntryNode);
                        EntryNode.AppendChild(TextNode);
                    }

                    if (
                        MacroscopePreferencesManager.GetSitemapIncludeLinkedPdfs() &&
                        msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
                    {
                        this.GenerateXmlSitemapPdfEntries(
                            msDoc: msDoc,
                            SitemapXml: SitemapXml,
                            UrlSetNode: UrlSetNode,
                            Dedupe: Dedupe
                            );
                    }
                }
            }

            return(SitemapXml);
        }
示例#29
0
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeDataExtractorCssSelectors DataExtractor
            )
        {
            MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem> ();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )DocCollection.CountDocuments();
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc      = DocCollection.GetDocumentByUrl(Url: Url);
                string             DocUrl     = msDoc.GetUrl();
                string             StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string             Status     = msDoc.GetStatusCode().ToString();
                string             MimeType   = msDoc.GetMimeType();

                if (!DataExtractor.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedCssSelectors())
                {
                    ListViewItem lvItem           = null;
                    string       CssSelectorLabel = DataExtractedPair.Key;
                    string       ExtractedValue   = DataExtractedPair.Value;
                    string       PairKey          = null;

                    if (
                        string.IsNullOrEmpty(CssSelectorLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    PairKey = string.Join(
                        ":",
                        UrlToDigest(DocUrl),
                        UrlToDigest(Macroscope.GetStringDigest(Text: CssSelectorLabel)),
                        UrlToDigest(Macroscope.GetStringDigest(Text: ExtractedValue))
                        );

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                    }
                    else
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        for (int i = 0; i < 6; i++)
                        {
                            lvItem.SubItems.Add("");
                        }

                        ListViewItems.Add(lvItem);
                    }

                    if (lvItem != null)
                    {
                        try
                        {
                            lvItem.SubItems[ColUrl].Text              = DocUrl;
                            lvItem.SubItems[ColStatusCode].Text       = StatusCode;
                            lvItem.SubItems[ColStatus].Text           = Status;
                            lvItem.SubItems[ColMimeType].Text         = MimeType;
                            lvItem.SubItems[ColCssSelectorLabel].Text = CssSelectorLabel;
                            lvItem.SubItems[ColExtractedValue].Text   = ExtractedValue;
                        }
                        catch (Exception ex)
                        {
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors: {0}", ex.Message));
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors: {0}", ex.StackTrace));
                        }
                    }
                    else
                    {
                        DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors MISSING: {0}", PairKey));
                    }

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                    }

                    if (Regex.IsMatch(StatusCode, "^[2]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Green;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[3]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Goldenrod;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[45]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Red;
                    }
                    else
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Blue;
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 100;
            this.DisplayListView.Columns[ColStatus].Width     = 100;
            this.DisplayListView.Columns[ColMimeType].Width   = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            Dictionary <string, string> LocalesList
            )
        {
            MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();

            SortedDictionary <string, int> LocaleColsTable = new SortedDictionary <string, int> ();

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem> ();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )DocCollection.CountDocuments();
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            this.DisplayListView.Items.Clear();
            this.DisplayListView.Columns.Clear();

            {
                int LocaleColCount = 5;

                this.DisplayListView.Columns.Add("URL", "URL");
                this.DisplayListView.Columns.Add("Status Code", "Status Code");
                this.DisplayListView.Columns.Add("Site Locale", "Site Locale");
                this.DisplayListView.Columns.Add("HrefLang Present", "HrefLang Present");
                this.DisplayListView.Columns.Add("Title", "Title");

                foreach (string Locale in LocalesList.Keys)
                {
                    string LocaleLabel       = Locale.ToUpper();
                    string DateServerLabel   = string.Format("{0} Date Server", Locale.ToUpper());
                    string DateModifiedLabel = string.Format("{0} Date Modified", Locale.ToUpper());

                    this.DisplayListView.Columns.Add(LocaleLabel, LocaleLabel);
                    this.DisplayListView.Columns.Add(DateServerLabel, DateServerLabel);
                    this.DisplayListView.Columns.Add(DateModifiedLabel, DateModifiedLabel);

                    LocaleColsTable[Locale] = LocaleColCount;
                    LocaleColCount++;

                    LocaleColsTable[DateServerLabel] = LocaleColCount;
                    LocaleColCount++;

                    LocaleColsTable[DateModifiedLabel] = LocaleColCount;
                    LocaleColCount++;
                }
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (msDoc.GetIsInternal())
                {
                    Proceed = true;

                    if (msDoc.GetIsRedirect())
                    {
                        Proceed = false;
                    }

                    if (!msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
                    {
                        Proceed = false;
                    }
                }

                if (Proceed)
                {
                    Dictionary <string, MacroscopeHrefLang> HrefLangsTable = msDoc.GetHrefLangs();
                    string         DocUrl        = msDoc.GetUrl();
                    string         PairKey       = UrlToDigest(DocUrl).ToString();
                    HttpStatusCode StatusCode    = msDoc.GetStatusCode();
                    int            StatusCodeNum = ( int )StatusCode;
                    MacroscopeConstants.Specifiers HrefLangPresent = MacroscopeConstants.Specifiers.UNSPECIFIED;
                    string       DocLocale = msDoc.GetLocale();
                    string       DocTitle  = msDoc.GetTitle();
                    ListViewItem lvItem    = null;

                    if (
                        (HrefLangsTable != null) &&
                        (HrefLangsTable.Count > 1))
                    {
                        HrefLangPresent = MacroscopeConstants.Specifiers.SPECIFIED;
                    }
                    else
                    {
                        HrefLangPresent = MacroscopeConstants.Specifiers.UNSPECIFIED;
                    }

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                    }
                    else
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        lvItem.SubItems.Add("");
                        lvItem.SubItems.Add("");
                        lvItem.SubItems.Add("");
                        lvItem.SubItems.Add("");
                        lvItem.SubItems.Add("");

                        for (int i = 0; i < LocalesList.Keys.Count; i++)
                        {
                            lvItem.SubItems.Add("");
                            lvItem.SubItems.Add("");
                            lvItem.SubItems.Add("");
                        }

                        ListViewItems.Add(lvItem);
                    }

                    if (lvItem != null)
                    {
                        try
                        {
                            lvItem.SubItems[ColUrl].Text             = DocUrl;
                            lvItem.SubItems[ColStatusCode].Text      = StatusCode.ToString();
                            lvItem.SubItems[ColSiteLocale].Text      = DocLocale;
                            lvItem.SubItems[ColHrefLangPresent].Text = "";
                            lvItem.SubItems[ColTitle].Text           = DocTitle;

                            switch (HrefLangPresent)
                            {
                            case MacroscopeConstants.Specifiers.SPECIFIED:
                                lvItem.SubItems[ColHrefLangPresent].ForeColor = Color.Green;
                                lvItem.SubItems[ColHrefLangPresent].Text      = "SPECIFIED";
                                break;

                            default:
                                lvItem.SubItems[ColHrefLangPresent].ForeColor = Color.Red;
                                lvItem.SubItems[ColHrefLangPresent].Text      = "UNSPECIFIED";
                                break;
                            }

                            if (AllowedHosts.IsInternalUrl(DocUrl))
                            {
                                lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                            }
                            else
                            {
                                lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                            }

                            if ((StatusCodeNum >= 100) && (StatusCodeNum <= 299))
                            {
                                lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                            }
                            else
                            if ((StatusCodeNum >= 300) && (StatusCodeNum <= 399))
                            {
                                lvItem.SubItems[ColStatusCode].ForeColor = Color.Orange;
                            }
                            else
                            if ((StatusCodeNum >= 400) && (StatusCodeNum <= 599))
                            {
                                lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                            }
                            else
                            {
                                lvItem.SubItems[ColSiteLocale].ForeColor = Color.Gray;
                            }

                            foreach (string Locale in LocalesList.Keys)
                            {
                                if (!string.IsNullOrEmpty(Locale))
                                {
                                    string   HrefLangUrl          = null;
                                    DateTime HrefLangDateServer   = new DateTime();
                                    DateTime HrefLangDateModified = new DateTime();
                                    int      LocaleCol            = LocaleColsTable[Locale];

                                    if (
                                        (HrefLangsTable != null) &&
                                        (HrefLangsTable.Count > 0))
                                    {
                                        if (HrefLangsTable.ContainsKey(Locale))
                                        {
                                            MacroscopeHrefLang HrefLangAlternate = HrefLangsTable[Locale];

                                            if (HrefLangAlternate != null)
                                            {
                                                HrefLangUrl          = HrefLangAlternate.GetUrl();
                                                HrefLangDateServer   = HrefLangAlternate.GetDateServer();
                                                HrefLangDateModified = HrefLangAlternate.GetDateModified();
                                            }
                                        }
                                    }

                                    if (!string.IsNullOrEmpty(HrefLangUrl))
                                    {
                                        lvItem.SubItems[LocaleCol].ForeColor = Color.Blue;

                                        lvItem.SubItems[LocaleCol].Text = HrefLangUrl;

                                        lvItem.SubItems[LocaleCol + 1].Text = HrefLangDateServer.ToString();
                                        lvItem.SubItems[LocaleCol + 2].Text = HrefLangDateModified.ToString();
                                    }
                                    else
                                    {
                                        lvItem.SubItems[LocaleCol].ForeColor = Color.Red;

                                        lvItem.SubItems[LocaleCol].Text     = "NOT SPECIFIED";
                                        lvItem.SubItems[LocaleCol + 1].Text = "NOT SPECIFIED";
                                        lvItem.SubItems[LocaleCol + 2].Text = "NOT SPECIFIED";
                                    }
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            DebugMsg(string.Format("MacroscopeDisplayHrefLang: {0}", ex.Message));
                            DebugMsg(string.Format("MacroscopeDisplayHrefLang: {0}", ex.StackTrace));
                        }
                    }
                    else
                    {
                        DebugMsg(string.Format("MacroscopeDisplayHrefLang NOT SPECIFIED: {0}", PairKey));
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 80;
            this.DisplayListView.Columns[ColSiteLocale].Width = 100;
            this.DisplayListView.Columns[ColTitle].Width      = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }