示例#1
0
        /** Render Document List **************************************************/

        public void RenderListView(List <MacroscopeDocument> DocList)
        {
            MacroscopeDocumentCollection DocCollection = this.MainForm.GetJobMaster().GetDocCollection();

            if (DocList.Count == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocList.Count);

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)DocList.Count;
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.ControlBox = false;

                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            for (int i = 0; i < DocList.Count; i++)
            {
                MacroscopeDocument msDoc = DocList[i];

                if (msDoc != null)
                {
                    this.RenderListView(
                        ListViewItems: ListViewItems,
                        DocCollection: DocCollection,
                        msDoc: msDoc,
                        Url: msDoc.GetUrl()
                        );
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
示例#2
0
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            if (msDoc.GetIsRedirect())
            {
                return;
            }

            if (!msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.JAVASCRIPT))
            {
                return;
            }

            string StatusCode = msDoc.GetStatusCode().ToString();
            string MimeType   = msDoc.GetMimeType();
            string FileSize   = msDoc.GetContentLength().ToString();

            string PairKey = string.Join("", Url);

            ListViewItem lvItem = null;

            if (this.DisplayListView.Items.ContainsKey(PairKey))
            {
                try
                {
                    lvItem = this.DisplayListView.Items[PairKey];
                    lvItem.SubItems[0].Text = Url;
                    lvItem.SubItems[1].Text = StatusCode;
                    lvItem.SubItems[2].Text = MimeType;
                    lvItem.SubItems[3].Text = FileSize;
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("MacroscopeDisplayJavascripts 1: {0}", ex.Message));
                }
            }
            else
            {
                try
                {
                    lvItem = new ListViewItem(PairKey);
                    lvItem.UseItemStyleForSubItems = false;
                    lvItem.Name = PairKey;

                    lvItem.SubItems[0].Text = Url;
                    lvItem.SubItems.Add(StatusCode);
                    lvItem.SubItems.Add(MimeType);
                    lvItem.SubItems.Add(FileSize);

                    ListViewItems.Add(lvItem);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("MacroscopeDisplayJavascripts 2: {0}", ex.Message));
                }
            }

            if (lvItem != null)
            {
                lvItem.ForeColor = Color.Blue;

                // URL -------------------------------------------------------------//

                if (msDoc.GetIsInternal())
                {
                    lvItem.SubItems[0].ForeColor = Color.Green;
                }
                else
                {
                    lvItem.SubItems[0].ForeColor = Color.Gray;
                }

                // Status Code -------------------------------------------------------//

                if (msDoc.GetStatusCode() != HttpStatusCode.OK)
                {
                    lvItem.SubItems[1].ForeColor = Color.Red;
                }
                else
                {
                    lvItem.SubItems[1].ForeColor = Color.Green;
                }
            }
        }
示例#3
0
        /**************************************************************************/

        private void RenderListViewSearchTargetUrls(
            List <ListViewItem> ListViewItems,
            MacroscopeDocument msDoc,
            string Url,
            string UrlFragment
            )
        {
            MacroscopeAllowedHosts       AllowedHosts  = this.MainForm.GetJobMaster().GetAllowedHosts();
            MacroscopeDocumentCollection DocCollection = this.MainForm.GetJobMaster().GetDocCollection();

            foreach (MacroscopeLink Link in msDoc.IterateOutlinks())
            {
                string         LinkType       = Link.GetLinkType().ToString();
                string         UrlTarget      = Link.GetTargetUrl();
                HttpStatusCode StatusCode     = HttpStatusCode.NotFound;
                string         StatusCodeText = "Not crawled";
                string         StatusText     = "Not crawled";
                string         PairKey        = string.Join(":", UrlToDigest(Url: Url), UrlToDigest(Url: UrlTarget)).ToString();
                string         DoFollow       = "No Follow";
                string         AltText        = Link.GetAltText();
                string         AltTextLabel   = AltText;

                string RawSourceUrl = Link.GetRawSourceUrl();
                string RawTargetUrl = Link.GetRawTargetUrl();

                try
                {
                    if (DocCollection.ContainsDocument(Url: Link.GetTargetUrl()))
                    {
                        StatusCode     = DocCollection.GetDocumentByUrl(Url: Link.GetTargetUrl()).GetStatusCode();
                        StatusCodeText = ((int)StatusCode).ToString();
                        StatusText     = StatusCode.ToString();
                    }
                }
                catch (Exception ex)
                {
                    this.DebugMsg(ex.Message);
                }

                if (Link.GetDoFollow())
                {
                    DoFollow = "Follow";
                }

                if (string.IsNullOrEmpty(AltText))
                {
                    AltTextLabel = "";
                }

                if (string.IsNullOrEmpty(RawSourceUrl))
                {
                    RawSourceUrl = "";
                }

                if (string.IsNullOrEmpty(RawTargetUrl))
                {
                    RawTargetUrl = "";
                }

                if (
                    (UrlTarget != null) &&
                    (UrlTarget.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0))
                {
                    ListViewItem lvItem = null;

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        try
                        {
                            lvItem = this.DisplayListView.Items[PairKey];

                            lvItem.SubItems[ColType].Text         = LinkType;
                            lvItem.SubItems[ColUrl].Text          = Url;
                            lvItem.SubItems[ColUrlTarget].Text    = UrlTarget;
                            lvItem.SubItems[ColStatusCode].Text   = StatusCodeText;
                            lvItem.SubItems[ColStatus].Text       = StatusText;
                            lvItem.SubItems[ColDoFollow].Text     = DoFollow;
                            lvItem.SubItems[ColAltTextLabel].Text = AltTextLabel;
                            lvItem.SubItems[ColRawSourceUrl].Text = RawSourceUrl;
                            lvItem.SubItems[ColRawTargetUrl].Text = RawTargetUrl;
                        }
                        catch (Exception ex)
                        {
                            this.DebugMsg(string.Format("MacroscopeDisplayLinks 1: {0}", ex.Message));
                        }
                    }
                    else
                    {
                        try
                        {
                            lvItem = new ListViewItem(PairKey);
                            lvItem.UseItemStyleForSubItems = false;
                            lvItem.Name = PairKey;

                            lvItem.SubItems[ColType].Text = LinkType;
                            lvItem.SubItems.Add(Url);
                            lvItem.SubItems.Add(UrlTarget);
                            lvItem.SubItems.Add(StatusCodeText);
                            lvItem.SubItems.Add(StatusText);
                            lvItem.SubItems.Add(DoFollow);
                            lvItem.SubItems.Add(AltTextLabel);
                            lvItem.SubItems.Add(RawSourceUrl);
                            lvItem.SubItems.Add(RawTargetUrl);

                            ListViewItems.Add(lvItem);
                        }
                        catch (Exception ex)
                        {
                            this.DebugMsg(string.Format("MacroscopeDisplayLinks 2: {0}", ex.Message));
                        }
                    }

                    if (lvItem != null)
                    {
                        for (int i = 0; i < lvItem.SubItems.Count; i++)
                        {
                            lvItem.SubItems[i].ForeColor = Color.Blue;
                        }

                        if (AllowedHosts.IsAllowedFromUrl(Url))
                        {
                            lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                        }

                        if (AllowedHosts.IsAllowedFromUrl(UrlTarget))
                        {
                            lvItem.SubItems[ColUrlTarget].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[ColUrlTarget].ForeColor = Color.Gray;
                        }

                        if (AllowedHosts.IsAllowedFromUrl(UrlTarget))
                        {
                            if (Link.GetDoFollow())
                            {
                                lvItem.SubItems[ColDoFollow].ForeColor = Color.Green;
                            }
                            else
                            {
                                lvItem.SubItems[ColDoFollow].ForeColor = Color.Red;
                            }
                        }
                        else
                        {
                            lvItem.SubItems[ColDoFollow].ForeColor = Color.Gray;
                        }
                    }
                }
            }
        }
示例#4
0
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            bool Proceed = false;

            switch (msDoc.GetDocumentType())
            {
            case MacroscopeConstants.DocumentType.SITEMAPXML:
                Proceed = true;
                break;

            case MacroscopeConstants.DocumentType.SITEMAPTEXT:
                Proceed = true;
                break;

            default:
                break;
            }

            if (!Proceed)
            {
                return;
            }

            string PairKey = string.Join("", Url);

            ListViewItem lvItem = null;
            int          Count  = msDoc.CountOutlinks();

            if (this.DisplayListView.Items.ContainsKey(PairKey))
            {
                try
                {
                    lvItem = this.DisplayListView.Items[PairKey];
                    lvItem.SubItems[0].Text = Url;
                    lvItem.SubItems[1].Text = Count.ToString();
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("MacroscopeDisplaySitemaps 1: {0}", ex.Message));
                }
            }
            else
            {
                try
                {
                    lvItem = new ListViewItem(PairKey);
                    lvItem.UseItemStyleForSubItems = false;
                    lvItem.Name = PairKey;

                    lvItem.SubItems[0].Text = Url;
                    lvItem.SubItems.Add(Count.ToString());

                    ListViewItems.Add(lvItem);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("MacroscopeDisplaySitemaps 2: {0}", ex.Message));
                }
            }

            if (lvItem != null)
            {
                lvItem.ForeColor = Color.Blue;

                if (msDoc.GetIsInternal())
                {
                    lvItem.SubItems[0].ForeColor = Color.Green;
                    if (Count <= 0)
                    {
                        lvItem.SubItems[0].ForeColor = Color.Red;
                        lvItem.SubItems[1].ForeColor = Color.Red;
                    }
                }
                else
                {
                    lvItem.SubItems[0].ForeColor = Color.Gray;
                }
            }
        }
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            if (!msDoc.GetIsRedirect())
            {
                return;
            }

            MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();
            int    StatusCode     = (int)msDoc.GetStatusCode();
            string Status         = msDoc.GetStatusCode().ToString();
            string DestinationURL = msDoc.GetUrlRedirectTo();
            string PairKey        = string.Join("", Url);

            if (
                (!string.IsNullOrEmpty(Status)) &&
                (!string.IsNullOrEmpty(DestinationURL)))
            {
                ListViewItem lvItem = null;

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    try
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                        lvItem.SubItems[COLURL].Text            = Url;
                        lvItem.SubItems[COLSTATUSCODE].Text     = StatusCode.ToString();
                        lvItem.SubItems[COLSTATUS].Text         = Status;
                        lvItem.SubItems[COLDESTINATIONURL].Text = DestinationURL;
                    }
                    catch (Exception ex)
                    {
                        this.DebugMsg(string.Format("MacroscopeDisplayRedirectsAudit 1: {0}", ex.Message));
                    }
                }
                else
                {
                    try
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        lvItem.SubItems[COLURL].Text = Url;
                        lvItem.SubItems.Add(StatusCode.ToString());
                        lvItem.SubItems.Add(Status);
                        lvItem.SubItems.Add(DestinationURL);

                        ListViewItems.Add(lvItem);
                    }
                    catch (Exception ex)
                    {
                        this.DebugMsg(string.Format("MacroscopeDisplayRedirectsAudit 2: {0}", ex.Message));
                    }
                }

                if (lvItem != null)
                {
                    if (msDoc.GetIsInternal())
                    {
                        for (int i = 0; i <= 3; i++)
                        {
                            lvItem.SubItems[i].ForeColor = Color.Blue;
                        }

                        if ((StatusCode >= 200) && (StatusCode <= 299))
                        {
                            for (int i = 0; i <= 3; i++)
                            {
                                lvItem.SubItems[i].ForeColor = Color.Green;
                            }
                        }
                        else
                        if ((StatusCode >= 300) && (StatusCode <= 399))
                        {
                            for (int i = 0; i <= 3; i++)
                            {
                                lvItem.SubItems[i].ForeColor = Color.Goldenrod;
                            }
                        }
                        else
                        if ((StatusCode >= 400) && (StatusCode <= 599))
                        {
                            for (int i = 0; i <= 3; i++)
                            {
                                lvItem.SubItems[i].ForeColor = Color.Red;
                            }
                        }
                    }
                    else
                    {
                        for (int i = 0; i <= 3; i++)
                        {
                            lvItem.SubItems[i].ForeColor = Color.Gray;
                        }
                    }

                    if (AllowedHosts.IsInternalUrl(DestinationURL))
                    {
                        lvItem.SubItems[COLDESTINATIONURL].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[COLDESTINATIONURL].ForeColor = Color.Gray;
                    }
                }
            }
        }
示例#6
0
        /**************************************************************************/

        private void BuildWorksheetCustomFilter(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> FilterColsTable = new Dictionary <string, int> (CustomFilter.GetSize());

            const int FilterColOffset = 3;

            {
                ws.WriteField(MacroscopeConstants.Url);
                ws.WriteField(MacroscopeConstants.StatusCode);
                ws.WriteField(MacroscopeConstants.Status);
                ws.WriteField(MacroscopeConstants.ContentType);

                for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++)
                {
                    string FilterPattern = CustomFilter.GetPattern(Slot).Key;

                    if (FilterColsTable.ContainsKey(FilterPattern) || string.IsNullOrEmpty(FilterPattern))
                    {
                        FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + FilterColOffset);

                        ws.WriteField(string.Format("EMPTY{0}", Slot + 1));
                    }
                    else
                    {
                        FilterColsTable.Add(FilterPattern, Slot + FilterColOffset);

                        ws.WriteField(FilterPattern);
                    }
                }

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string DocUrl     = msDoc.GetUrl();
                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();
                string MimeType   = msDoc.GetMimeType();

                if (!this.CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc))
                {
                    continue;
                }

                this.InsertAndFormatUrlCell(ws, msDoc);

                this.InsertAndFormatStatusCodeCell(ws, msDoc);

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetStatusCode().ToString()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(MimeType));

                for (int Slot = 0; Slot < this.CustomFilter.GetSize(); Slot++)
                {
                    string FilterPattern = this.CustomFilter.GetPattern(Slot: Slot).Key;
                    KeyValuePair <string,  MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern);

                    if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED))
                    {
                        string CustomFilterItemValue = MacroscopeConstants.TextPresenceLabels[Pair.Value];

                        this.InsertAndFormatContentCell(ws, CustomFilterItemValue);
                    }
                    else
                    {
                        this.InsertAndFormatContentCell(ws, "");
                    }

                    ws.NextRecord();
                }
            }
        }
示例#7
0
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            string       StatusCode = ((int)msDoc.GetStatusCode()).ToString();
            string       Status     = msDoc.GetStatusCode().ToString();
            string       PairKey    = string.Join("", Url);
            ListViewItem lvItem     = null;

            if (this.DisplayListView.Items.ContainsKey(PairKey))
            {
                try
                {
                    lvItem = this.DisplayListView.Items[PairKey];
                    lvItem.SubItems[COL_URL].Text         = Url;
                    lvItem.SubItems[COL_STATUS_CODE].Text = StatusCode;
                    lvItem.SubItems[COL_STATUS].Text      = Status;
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("MacroscopeDisplayOrphanedPages 1: {0}", ex.Message));
                }
            }
            else
            {
                try
                {
                    lvItem = new ListViewItem(PairKey);
                    lvItem.UseItemStyleForSubItems = false;
                    lvItem.Name = PairKey;

                    lvItem.SubItems[COL_URL].Text = Url;
                    lvItem.SubItems.Add(StatusCode);
                    lvItem.SubItems.Add(Status);

                    ListViewItems.Add(lvItem);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("MacroscopeDisplayOrphanedPages 2: {0}", ex.Message));
                }
            }

            if (lvItem != null)
            {
                lvItem.ForeColor = Color.Blue;

                if (msDoc.GetIsInternal())
                {
                    lvItem.SubItems[COL_URL].ForeColor = Color.Green;
                }
                else
                {
                    lvItem.SubItems[COL_URL].ForeColor = Color.Gray;
                }

                if (Regex.IsMatch(StatusCode, "^[2]"))
                {
                    lvItem.SubItems[COL_STATUS_CODE].ForeColor = Color.Green;
                    lvItem.SubItems[COL_STATUS].ForeColor      = Color.Green;
                }
                else
                if (Regex.IsMatch(StatusCode, "^[3]"))
                {
                    lvItem.SubItems[COL_STATUS_CODE].ForeColor = Color.Goldenrod;
                    lvItem.SubItems[COL_STATUS].ForeColor      = Color.Goldenrod;
                }
                else
                if (Regex.IsMatch(StatusCode, "^[45]"))
                {
                    lvItem.SubItems[COL_STATUS_CODE].ForeColor = Color.Red;
                    lvItem.SubItems[COL_STATUS].ForeColor      = Color.Red;
                }
                else
                {
                    lvItem.SubItems[COL_STATUS_CODE].ForeColor = Color.Blue;
                    lvItem.SubItems[COL_STATUS].ForeColor      = Color.Blue;
                }
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageUriAnalysis(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Checksum";
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url);

                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();
                string Checksum   = msDoc.GetChecksum();
                int    Count      = DocCollection.GetStatsChecksumCount(Checksum: Checksum);

                iCol = 1;

                this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                if (AllowedHosts.IsInternalUrl(Url: Url))
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                }

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode);

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, Status);

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, Count);

                if (Count > 1)
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                }

                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, Checksum);

                if (Count > 1)
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                }
                else
                {
                    ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                }

                iRow++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#9
0
        /**************************************************************************/

        private void BuildWorksheetPageDuplicatePages(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            decimal DocCount     = 0;
            decimal DocListCount = 0;
            decimal CountOuter   = 0;
            decimal CountInner   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();
            Dictionary <string, bool>    CrossCheckList;

            CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList(
                Capacity: DocCollection.CountDocuments()
                );

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Origin URL");
                ws.WriteField("Distance");
                ws.WriteField("Similar URL");

                ws.NextRecord();
            }

            foreach (string UrlLeft in DocCollection.DocumentUrls())
            {
                MacroscopeDocument            msDocLeft           = DocCollection.GetDocumentByUrl(Url: UrlLeft);
                MacroscopeLevenshteinAnalysis LevenshteinAnalysis = null;

                CountOuter++;
                CountInner = 0;

                if (DocCount > 0)
                {
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                        MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                        ProgressLabelMinor: UrlLeft,
                        SubMinorPercentage: 0,
                        ProgressLabelSubMinor: ""
                        );
                }

                if (msDocLeft.GetIsExternal())
                {
                    continue;
                }

                if (!msDocLeft.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
                {
                    continue;
                }

                LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis(
                    msDoc: msDocLeft,
                    SizeDifference: MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference(),
                    Threshold: MacroscopePreferencesManager.GetMaxLevenshteinDistance(),
                    CrossCheckList: CrossCheckList,
                    IPercentageDone: this
                    );

                Dictionary <MacroscopeDocument, int> DocList;

                DocList = LevenshteinAnalysis.AnalyzeDocCollection(
                    DocCollection: DocCollection
                    );

                DocListCount = ( decimal )DocList.Count;

                foreach (MacroscopeDocument msDocDuplicate in DocList.Keys)
                {
                    int            StatusCode   = ( int )msDocLeft.GetStatusCode();
                    HttpStatusCode Status       = msDocLeft.GetStatusCode();
                    string         UrlDuplicate = msDocDuplicate.GetUrl();
                    int            Distance     = DocList[msDocDuplicate];

                    CountInner++;

                    if (DocCount > 0)
                    {
                        this.ProgressForm.UpdatePercentages(
                            Title: null,
                            Message: null,
                            MajorPercentage: -1,
                            ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                            MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                            ProgressLabelMinor: UrlLeft,
                            SubMinorPercentage: (( decimal )100 / DocListCount) * CountInner,
                            ProgressLabelSubMinor: UrlDuplicate
                            );
                    }

                    this.InsertAndFormatStatusCodeCell(ws, StatusCode);

                    this.InsertAndFormatStatusCodeCell(ws, Status);

                    this.InsertAndFormatUrlCell(ws, UrlLeft);

                    this.InsertAndFormatContentCell(ws, Distance.ToString());

                    this.InsertAndFormatUrlCell(ws, UrlDuplicate);

                    ws.NextRecord();

                    if (this.ProgressForm.Cancelled())
                    {
                        break;
                    }
                }

                if (this.ProgressForm.Cancelled())
                {
                    break;
                }

                //Thread.Yield();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateTitles(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            decimal Count    = 0;
            decimal DocCount = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            DocCount = ( decimal )DocCollection.CountDocuments();

            {
                ws.WriteField("URL");
                ws.WriteField("Occurrences");
                ws.WriteField("Title");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (DocCount > 0)
                {
                    Count++;
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: string.Format("Documents Processed: {0}", Count),
                        MinorPercentage: (( decimal )100 / DocCount) * Count,
                        ProgressLabelMinor: msDoc.GetUrl(),
                        SubMinorPercentage: -1,
                        ProgressLabelSubMinor: null
                        );
                }

                if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                {
                    switch (msDoc.GetDocumentType())
                    {
                    case MacroscopeConstants.DocumentType.HTML:
                        Proceed = true;
                        break;

                    case MacroscopeConstants.DocumentType.PDF:
                        Proceed = true;
                        break;

                    default:
                        Proceed = false;
                        break;
                    }
                }

                if (Proceed)
                {
                    string Title       = msDoc.GetTitle();
                    int    Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc);

                    if (Occurrences > 1)
                    {
                        this.InsertAndFormatUrlCell(ws, msDoc);

                        this.InsertAndFormatContentCell(ws, Occurrences);

                        this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title));

                        ws.NextRecord();
                    }
                }
            }
        }
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            foreach (KeyValuePair <string, string> RemarkPair in msDoc.IterateRemarks())
            {
                ListViewItem lvItem     = null;
                string       PairKey    = string.Join(@"::::", Url, RemarkPair.Value);
                string       StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string       Status     = msDoc.GetStatusCode().ToString();

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    try
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                        lvItem.SubItems[ColUrl].Text         = Url;
                        lvItem.SubItems[ColStatusCode].Text  = StatusCode;
                        lvItem.SubItems[ColStatus].Text      = Status;
                        lvItem.SubItems[ColObservation].Text = RemarkPair.Value;
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayRemarks 1: {0}", ex.Message));
                    }
                }
                else
                {
                    try
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        lvItem.SubItems[ColUrl].Text = Url;
                        lvItem.SubItems.Add(StatusCode);
                        lvItem.SubItems.Add(Status);
                        lvItem.SubItems.Add(RemarkPair.Value);

                        ListViewItems.Add(lvItem);
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayRemarks 2: {0}", ex.Message));
                    }
                }

                if (lvItem != null)
                {
                    lvItem.ForeColor = Color.Blue;

                    // URL -------------------------------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                    }
                }
            }
        }
示例#12
0
        /** Override Render One ***************************************************/

        abstract protected void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            );
示例#13
0
        /** Render DocCollection Filtered by URL Fragment *************************/

        public void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            string UrlFragment
            )
        {
            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments());

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)DocCollection.CountDocuments();
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.ControlBox = false;

                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                Application.DoEvents();

                if (msDoc != null)
                {
                    string Url = msDoc.GetUrl();
                    if (Url.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0)
                    {
                        this.RenderListView(
                            ListViewItems: ListViewItems,
                            DocCollection: DocCollection,
                            msDoc: msDoc,
                            Url: Url
                            );
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
示例#14
0
        /** Render Filtered DocCollection *******************************************/

        public void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            MacroscopeConstants.DocumentType DocumentType
            )
        {
            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments());

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)DocCollection.CountDocuments();
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.ControlBox = false;

                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                Application.DoEvents();

                if (msDoc != null)
                {
                    switch (DocumentType)
                    {
                    case MacroscopeConstants.DocumentType.INTERNALURL:
                        if (msDoc.GetIsInternal())
                        {
                            this.RenderListView(
                                ListViewItems: ListViewItems,
                                DocCollection: DocCollection,
                                msDoc: msDoc,
                                Url: msDoc.GetUrl()
                                );
                        }
                        break;

                    case MacroscopeConstants.DocumentType.EXTERNALURL:
                        if (msDoc.GetIsExternal())
                        {
                            this.RenderListView(
                                ListViewItems: ListViewItems,
                                DocCollection: DocCollection,
                                msDoc: msDoc,
                                Url: msDoc.GetUrl()
                                );
                        }
                        break;

                    default:
                        if (
                            (msDoc.GetDocumentType() == DocumentType) ||
                            (DocumentType == MacroscopeConstants.DocumentType.ALL))
                        {
                            this.RenderListView(
                                ListViewItems: ListViewItems,
                                DocCollection: DocCollection,
                                msDoc: msDoc,
                                Url: msDoc.GetUrl()
                                );
                        }
                        break;
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            List <string> UrlList,
            MacroscopeDataExtractorRegexes DataExtractor
            )
        {
            MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem> ();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )DocCollection.CountDocuments();
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc      = DocCollection.GetDocument(Url: Url);
                string             DocUrl     = msDoc.GetUrl();
                string             StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string             Status     = msDoc.GetStatusCode().ToString();
                string             MimeType   = msDoc.GetMimeType();

                if (!DataExtractor.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedRegexes())
                {
                    ListViewItem lvItem         = null;
                    string       RegexLabel     = DataExtractedPair.Key;
                    string       ExtractedValue = DataExtractedPair.Value;
                    string       PairKey        = null;

                    if (
                        string.IsNullOrEmpty(RegexLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    PairKey = string.Join(
                        "::",
                        DocUrl,
                        Macroscope.GetStringDigest(Text: RegexLabel),
                        Macroscope.GetStringDigest(Text: ExtractedValue)
                        );

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                    }
                    else
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        for (int i = 0; i < 6; i++)
                        {
                            lvItem.SubItems.Add("");
                        }

                        ListViewItems.Add(lvItem);
                    }

                    if (lvItem != null)
                    {
                        try
                        {
                            lvItem.SubItems[ColUrl].Text            = DocUrl;
                            lvItem.SubItems[ColStatusCode].Text     = StatusCode;
                            lvItem.SubItems[ColStatus].Text         = Status;
                            lvItem.SubItems[ColMimeType].Text       = MimeType;
                            lvItem.SubItems[ColRegexLabel].Text     = RegexLabel;
                            lvItem.SubItems[ColExtractedValue].Text = ExtractedValue;
                        }
                        catch (Exception ex)
                        {
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorRegexes: {0}", ex.Message));
                            DebugMsg(string.Format("MacroscopeDisplayDataExtractorRegexes: {0}", ex.StackTrace));
                        }
                    }
                    else
                    {
                        DebugMsg(string.Format("MacroscopeDisplayDataExtractorRegexes MISSING: {0}", PairKey));
                    }

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                    }

                    if (Regex.IsMatch(StatusCode, "^[2]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Green;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[3]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Goldenrod;
                    }
                    else
                    if (Regex.IsMatch(StatusCode, "^[45]"))
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Red;
                    }
                    else
                    {
                        lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue;
                        lvItem.SubItems[ColStatus].ForeColor     = Color.Blue;
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DeduplicateListView(DuplicatedListView: this.DisplayListView);

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 100;
            this.DisplayListView.Columns[ColStatus].Width     = 100;
            this.DisplayListView.Columns[ColMimeType].Width   = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
示例#16
0
        /**************************************************************************/

        private void BuildWorksheetKeywordsPresence(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel,
            MacroscopeDocumentCollection DocCollection
            )
        {
            var     ws       = wb.Worksheets.Add(WorksheetLabel);
            int     iRow     = 1;
            int     iCol     = 1;
            int     iColMax  = 1;
            decimal DocCount = 0;
            decimal DocTotal = (decimal)DocCollection.CountDocuments();

            {
                ws.Cell(iRow, iCol).Value = "Presence";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Keyword";
                iCol++;

                ws.Cell(iRow, iCol).Value = "URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                List <KeyValuePair <string, MacroscopeIntenseKeywordAnalysis.KEYWORD_STATUS> > KeywordPresence;

                KeywordPresence = DocCollection.GetIntenseKeywordAnalysis(msDoc: msDoc);

                if (DocCount > 0)
                {
                    this.ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: -1,
                        ProgressLabelMajor: null,
                        MinorPercentage: ((decimal)100 / DocTotal) * (decimal)DocCount,
                        ProgressLabelMinor: "Documents Processed"
                        );
                }

                if (KeywordPresence != null)
                {
                    foreach (KeyValuePair <string, MacroscopeIntenseKeywordAnalysis.KEYWORD_STATUS> Pair in KeywordPresence)
                    {
                        MacroscopeIntenseKeywordAnalysis.KEYWORD_STATUS Present = Pair.Value;
                        string Keyword = Pair.Key;

                        iCol = 1;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, Pair.Value.ToString());

                        switch (Pair.Value)
                        {
                        case MacroscopeIntenseKeywordAnalysis.KEYWORD_STATUS.KEYWORDS_METATAG_EMPTY:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            break;

                        case MacroscopeIntenseKeywordAnalysis.KEYWORD_STATUS.MISSING_IN_BODY_TEXT:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            break;

                        case MacroscopeIntenseKeywordAnalysis.KEYWORD_STATUS.PRESENT_IN_BODY_TEXT:
                            ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            break;

                        default:
                            break;
                        }

                        iCol++;

                        this.InsertAndFormatContentCell(ws, iRow, iCol, Keyword);

                        iCol++;

                        this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc.GetUrl());

                        iRow++;
                    }
                }

                DocCount++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageBrokenLinks(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Origin URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Destination URL";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                MacroscopeHyperlinksIn HyperlinksIn = DocCollection.GetDocumentHyperlinksIn(msDoc.GetUrl());
                int    StatusCode = ( int )msDoc.GetStatusCode();
                string Status     = msDoc.GetStatusCode().ToString();

                if (
                    (StatusCode >= 400) &&
                    (StatusCode <= 599) &&
                    (HyperlinksIn != null))
                {
                    foreach (MacroscopeHyperlinkIn HyperlinkIn in HyperlinksIn.IterateLinks())
                    {
                        string OriginUrl = HyperlinkIn.GetSourceUrl();

                        if (
                            (OriginUrl != null) &&
                            (OriginUrl.Length > 0))
                        {
                            iCol = 1;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, StatusCode.ToString());

                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                            }

                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, Status);

                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Blue);
                            }

                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, OriginUrl);

                            if (AllowedHosts.IsInternalUrl(Url: OriginUrl))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iCol++;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                            if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl()))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iRow++;
                        }
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetOverview(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status Code";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Status";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Redirect";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Robots";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Duration";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Crawled Date";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Server Date";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Modified Date";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Expires Date";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Content-Type";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Charset";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Locale";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Canonical";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Page Depth";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Links In";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Links Out";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Hyperlinks In";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Hyperlinks Out";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Ratio In";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Ratio Out";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Author";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title Length";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Description";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Description Length";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Keywords";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Keywords Length";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Keywords Count";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Error Condition";

                for (int i = 1; i <= iCol; i++)
                {
                    ws.Cell(iRow, i).Style.Font.SetBold();
                }
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                List <decimal> HyperlinkRatio = DocCollection.GetDocumentHyperlinksRatio(Url: msDoc.GetUrl());

                iCol = 1;

                this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);
                iCol++;

                this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetStatusCode().ToString()));
                iCol++;

                this.InsertAndFormatRedirectCell(ws, iRow, iCol, msDoc);
                iCol++;

                this.InsertAndFormatRobotsCell(ws, iRow, iCol, msDoc);
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDurationInSecondsFormatted());
                iCol++;

                this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetCrawledDate());
                iCol++;

                this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateServer());
                iCol++;

                this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateModified());
                iCol++;

                this.InsertAndFormatDateCell(ws, iRow, iCol, msDoc.GetDateExpires());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetMimeType()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetCharacterSet()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetLocale()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetIsoLanguageCode()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetCanonical()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDepth().ToString());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountInlinks());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountOutlinks());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountHyperlinksIn());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.CountHyperlinksOut());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, string.Format("{0:0.00}%", HyperlinkRatio[0]));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, string.Format("{0:0.00}%", HyperlinkRatio[1]));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetAuthor());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetTitle()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetTitleLength().ToString());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetDescription()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetDescriptionLength());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetKeywords()));
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetKeywordsLength().ToString());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetKeywordsCount().ToString());
                iCol++;

                this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(msDoc.GetErrorCondition()));

                iRow++;
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#19
0
        /**************************************************************************/

        private void BuildWorksheetPageDescriptions(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.WriteField("URL");
                ws.WriteField("Page Language");
                ws.WriteField("Detected Language");
                ws.WriteField("Occurrences");
                ws.WriteField("Description");
                ws.WriteField("Description Length");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                switch (msDoc.GetDocumentType())
                {
                case MacroscopeConstants.DocumentType.HTML:
                    Proceed = true;
                    break;

                case MacroscopeConstants.DocumentType.PDF:
                    Proceed = true;
                    break;

                default:
                    break;
                }

                if (Proceed)
                {
                    string Description       = msDoc.GetDescription();
                    string PageLanguage      = msDoc.GetIsoLanguageCode();
                    string DetectedLanguage  = msDoc.GetTitleLanguage();
                    int    Occurrences       = 0;
                    int    DescriptionLength = msDoc.GetDescriptionLength();

                    if (DescriptionLength > 0)
                    {
                        Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc);
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLanguage));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DetectedLanguage));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Occurrences.ToString()));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Description));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DescriptionLength.ToString()));

                    ws.NextRecord();
                }
            }
        }
示例#20
0
        /**************************************************************************/

        private void BuildWorksheetXpaths(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.Cell(iRow, iCol).Value = MacroscopeConstants.Url;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.StatusCode;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.Status;
                iCol++;

                ws.Cell(iRow, iCol).Value = MacroscopeConstants.ContentType;
                iCol++;

                ws.Cell(iRow, iCol).Value = "Extracted Label";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Extracted Value";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string DocUrl     = msDoc.GetUrl();
                string StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string Status     = msDoc.GetStatusCode().ToString();
                string MimeType   = msDoc.GetMimeType();

                if (!this.DataExtractorRegexes.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedXpaths())
                {
                    string ExtractedLabel = DataExtractedPair.Key;
                    string ExtractedValue = DataExtractedPair.Value;

                    if (
                        string.IsNullOrEmpty(ExtractedLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    iCol = 1;

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (msDoc.GetIsInternal())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, msDoc);

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Status));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(MimeType));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(ExtractedLabel));

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(ExtractedValue));

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
示例#21
0
        /**************************************************************************/

        private void BuildWorksheetPageTitles(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Page Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Detected Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurrences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Title Length";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Pixel Width";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                switch (msDoc.GetDocumentType())
                {
                case MacroscopeConstants.DocumentType.HTML:
                    Proceed = true;
                    break;

                case MacroscopeConstants.DocumentType.PDF:
                    Proceed = true;
                    break;

                default:
                    break;
                }

                if (Proceed)
                {
                    iCol = 1;

                    string PageLanguage     = msDoc.GetIsoLanguageCode();
                    string DetectedLanguage = msDoc.GetTitleLanguage();
                    string Title            = msDoc.GetTitle();
                    int    Occurrences      = 0;
                    int    TitleLength      = msDoc.GetTitleLength();
                    int    TitlePixelWidth  = msDoc.GetTitlePixelWidth();

                    if (TitleLength > 0)
                    {
                        Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc);
                    }

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (msDoc.GetIsInternal())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    switch (msDoc.GetDocumentType())
                    {
                    case MacroscopeConstants.DocumentType.HTML:
                        this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLanguage));
                        break;

                    case MacroscopeConstants.DocumentType.PDF:
                        this.InsertAndFormatContentCell(ws, iRow, iCol, PageLanguage);
                        break;

                    default:
                        break;
                    }

                    if (PageLanguage != DetectedLanguage)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DetectedLanguage));

                    if (PageLanguage != DetectedLanguage)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Occurrences.ToString()));

                    if (Occurrences > 1)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Title));

                    if (TitleLength <= 0)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                        ws.Cell(iRow, iCol).Value = "MISSING";
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, TitleLength);

                    if (TitleLength < MacroscopePreferencesManager.GetTitleMinLen())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    if (TitleLength > MacroscopePreferencesManager.GetTitleMaxLen())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, TitlePixelWidth);

                    if (TitlePixelWidth > MacroscopePreferencesManager.GetTitleMaxPixelWidth())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    if (TitlePixelWidth >= (MacroscopePreferencesManager.GetTitleMaxPixelWidth() - 20))
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    if (TitlePixelWidth <= 0)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            bool Proceed = false;

            if (msDoc.GetIsExternal())
            {
                return;
            }

            if (msDoc.GetIsRedirect())
            {
                return;
            }

            switch (msDoc.GetDocumentType())
            {
            case MacroscopeConstants.DocumentType.HTML:
                Proceed = true;
                break;

            case MacroscopeConstants.DocumentType.PDF:
                Proceed = true;
                break;

            default:
                break;
            }

            if (Proceed)
            {
                ListViewItem lvItem = null;

                string Text           = msDoc.GetKeywords();
                int    Occurrences    = 0;
                int    KeywordsLength = msDoc.GetKeywordsLength();
                int    TextNumber     = msDoc.GetKeywordsCount();

                string PairKey = string.Join("", Url, Text);

                if (KeywordsLength > 0)
                {
                    Occurrences = DocCollection.GetStatsKeywordsCount(msDoc);
                }

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    try
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                        lvItem.SubItems[0].Text = Url;
                        lvItem.SubItems[1].Text = Occurrences.ToString();
                        lvItem.SubItems[2].Text = Text;
                        lvItem.SubItems[3].Text = KeywordsLength.ToString();
                        lvItem.SubItems[4].Text = TextNumber.ToString();
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayKeywords 1: {0}", ex.Message));
                    }
                }
                else
                {
                    try
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        lvItem.SubItems[0].Text = Url;
                        lvItem.SubItems.Add(Occurrences.ToString());
                        lvItem.SubItems.Add(Text);
                        lvItem.SubItems.Add(KeywordsLength.ToString());
                        lvItem.SubItems.Add(TextNumber.ToString());

                        ListViewItems.Add(lvItem);
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayKeywords 2: {0}", ex.Message));
                    }
                }

                if (lvItem != null)
                {
                    lvItem.ForeColor = Color.Blue;

                    // URL -------------------------------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[0].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[0].ForeColor = Color.Gray;
                    }

                    // Check Missing Text ----------------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        if (KeywordsLength <= 0)
                        {
                            lvItem.SubItems[1].ForeColor = Color.Red;
                            lvItem.SubItems[2].ForeColor = Color.Red;
                            lvItem.SubItems[3].ForeColor = Color.Red;
                            lvItem.SubItems[4].ForeColor = Color.Red;
                        }
                        else
                        {
                            lvItem.SubItems[1].ForeColor = Color.Green;
                            lvItem.SubItems[2].ForeColor = Color.Green;
                            lvItem.SubItems[3].ForeColor = Color.Green;
                            lvItem.SubItems[4].ForeColor = Color.Green;
                        }
                    }
                    else
                    {
                        lvItem.SubItems[1].ForeColor = Color.Gray;
                        lvItem.SubItems[2].ForeColor = Color.Gray;
                        lvItem.SubItems[3].ForeColor = Color.Gray;
                        lvItem.SubItems[4].ForeColor = Color.Gray;
                    }
                }
            }
        }
示例#23
0
        /**************************************************************************/

        private void BuildWorksheetPageTitles(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.WriteField("URL");
                ws.WriteField("Page Language");
                ws.WriteField("Detected Language");
                ws.WriteField("Occurrences");
                ws.WriteField("Title");
                ws.WriteField("Title Length");
                ws.WriteField("Pixel Width");
                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                if (msDoc.GetIsHtml())
                {
                    Proceed = true;
                }
                else
                if (msDoc.GetIsPdf())
                {
                    Proceed = true;
                }

                if (Proceed)
                {
                    string Title            = msDoc.GetTitle();
                    string PageLanguage     = msDoc.GetIsoLanguageCode();
                    string DetectedLanguage = msDoc.GetTitleLanguage();
                    int    Occurrences      = 0;
                    int    TitleLength      = msDoc.GetTitleLength();
                    int    TitlePixelWidth  = msDoc.GetTitlePixelWidth();

                    if (TitleLength > 0)
                    {
                        Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc);
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(PageLanguage));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(DetectedLanguage));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Occurrences.ToString()));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(TitleLength.ToString()));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(TitlePixelWidth.ToString()));

                    ws.NextRecord();
                }
            }
        }
示例#24
0
        /**************************************************************************/

        private void BuildWorksheetOverview(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.WriteField("URL");
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Redirect");
                ws.WriteField("Robots");
                ws.WriteField("Duration");
                ws.WriteField("Crawled Date");
                ws.WriteField("Server Date");
                ws.WriteField("Modified Date");
                ws.WriteField("Expires Date");
                ws.WriteField("Content-Type");
                ws.WriteField("Charset");
                ws.WriteField("Locale");
                ws.WriteField("Language");
                ws.WriteField("Canonical");
                ws.WriteField("Page Depth");
                ws.WriteField("Links In");
                ws.WriteField("Links Out");
                ws.WriteField("Hyperlinks In");
                ws.WriteField("Hyperlinks Out");
                ws.WriteField("Ration In");
                ws.WriteField("Ratio Out");
                ws.WriteField("Author");
                ws.WriteField("Title");
                ws.WriteField("Title Length");
                ws.WriteField("Description");
                ws.WriteField("Description Length");
                ws.WriteField("Keywords");
                ws.WriteField("Keywords Length");
                ws.WriteField("Keywords Count");
                ws.WriteField("Error Condition");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                List <decimal> HyperlinkRatio = DocCollection.GetDocumentHyperlinksRatio(Url: msDoc.GetUrl());

                this.InsertAndFormatUrlCell(ws, msDoc);

                this.InsertAndFormatStatusCodeCell(ws, msDoc);

                this.InsertAndFormatContentCell(ws, msDoc.GetStatusCode().ToString());

                this.InsertAndFormatRedirectCell(ws, msDoc);

                this.InsertAndFormatRobotsCell(ws, msDoc);

                this.InsertAndFormatContentCell(ws, msDoc.GetDurationInSecondsFormatted());

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetCrawledDate()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateServer()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateModified()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDateExpires()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetMimeType()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetCharacterSet()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetLocale()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetIsoLanguageCode()));

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetCanonical()));

                this.InsertAndFormatContentCell(ws, msDoc.GetDepth().ToString());

                this.InsertAndFormatContentCell(ws, msDoc.CountInlinks().ToString());
                this.InsertAndFormatContentCell(ws, msDoc.CountOutlinks().ToString());

                this.InsertAndFormatContentCell(ws, msDoc.CountHyperlinksIn().ToString());
                this.InsertAndFormatContentCell(ws, msDoc.CountHyperlinksOut().ToString());

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(string.Format("{0:0.00}%", HyperlinkRatio[0])));
                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(string.Format("{0:0.00}%", HyperlinkRatio[1])));

                this.InsertAndFormatContentCell(ws, msDoc.GetAuthor());

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetTitle()));

                this.InsertAndFormatContentCell(ws, msDoc.GetTitleLength().ToString());

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetDescription()));
                this.InsertAndFormatContentCell(ws, msDoc.GetDescriptionLength().ToString());

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetKeywords()));
                this.InsertAndFormatContentCell(ws, msDoc.GetKeywordsLength().ToString());
                this.InsertAndFormatContentCell(ws, msDoc.GetKeywordsCount().ToString());

                this.InsertAndFormatContentCell(ws, this.FormatIfMissing(msDoc.GetErrorCondition()));

                ws.NextRecord();
            }
        }
        /**************************************************************************/

        private void BuildWorksheetPageAuthors(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Page Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Detected Language";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Author";
            }

            iColMax = iCol;

            iRow++;

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                switch (msDoc.GetDocumentType())
                {
                case MacroscopeConstants.DocumentType.HTML:
                    Proceed = true;
                    break;

                case MacroscopeConstants.DocumentType.PDF:
                    Proceed = true;
                    break;

                default:
                    break;
                }

                if (Proceed)
                {
                    iCol = 1;

                    string PageLanguage     = msDoc.GetIsoLanguageCode();
                    string DetectedLanguage = msDoc.GetTitleLanguage();
                    string Author           = msDoc.GetAuthor();

                    this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                    if (msDoc.GetIsInternal())
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(PageLanguage));

                    if (PageLanguage != DetectedLanguage)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(DetectedLanguage));

                    if (PageLanguage != DetectedLanguage)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red);
                    }
                    else
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iCol++;

                    this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Author));

                    if (Author.Length > 0)
                    {
                        ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                    }

                    iRow++;
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }
        /**************************************************************************/

        private void RenderListView(
            MacroscopeDocumentCollection DocCollection,
            Dictionary <string, string> LocalesList
            )
        {
            MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts();

            SortedDictionary <string, int> LocaleColsTable = new SortedDictionary <string, int> ();

            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem> ();

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )DocCollection.CountDocuments();
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            this.DisplayListView.Items.Clear();
            this.DisplayListView.Columns.Clear();

            {
                int LocaleColCount = 5;

                this.DisplayListView.Columns.Add("URL", "URL");
                this.DisplayListView.Columns.Add("Status Code", "Status Code");
                this.DisplayListView.Columns.Add("Site Locale", "Site Locale");
                this.DisplayListView.Columns.Add("HrefLang Present", "HrefLang Present");
                this.DisplayListView.Columns.Add("Title", "Title");

                foreach (string Locale in LocalesList.Keys)
                {
                    string LocaleLabel       = Locale.ToUpper();
                    string DateServerLabel   = string.Format("{0} Date Server", Locale.ToUpper());
                    string DateModifiedLabel = string.Format("{0} Date Modified", Locale.ToUpper());

                    this.DisplayListView.Columns.Add(LocaleLabel, LocaleLabel);
                    this.DisplayListView.Columns.Add(DateServerLabel, DateServerLabel);
                    this.DisplayListView.Columns.Add(DateModifiedLabel, DateModifiedLabel);

                    LocaleColsTable[Locale] = LocaleColCount;
                    LocaleColCount++;

                    LocaleColsTable[DateServerLabel] = LocaleColCount;
                    LocaleColCount++;

                    LocaleColsTable[DateModifiedLabel] = LocaleColCount;
                    LocaleColCount++;
                }
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                bool Proceed = false;

                if (msDoc.GetIsInternal())
                {
                    Proceed = true;

                    if (msDoc.GetIsRedirect())
                    {
                        Proceed = false;
                    }

                    if (!msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML))
                    {
                        Proceed = false;
                    }
                }

                if (Proceed)
                {
                    Dictionary <string, MacroscopeHrefLang> HrefLangsTable = msDoc.GetHrefLangs();
                    string         DocUrl        = msDoc.GetUrl();
                    string         PairKey       = UrlToDigest(DocUrl).ToString();
                    HttpStatusCode StatusCode    = msDoc.GetStatusCode();
                    int            StatusCodeNum = ( int )StatusCode;
                    MacroscopeConstants.Specifiers HrefLangPresent = MacroscopeConstants.Specifiers.UNSPECIFIED;
                    string       DocLocale = msDoc.GetLocale();
                    string       DocTitle  = msDoc.GetTitle();
                    ListViewItem lvItem    = null;

                    if (
                        (HrefLangsTable != null) &&
                        (HrefLangsTable.Count > 1))
                    {
                        HrefLangPresent = MacroscopeConstants.Specifiers.SPECIFIED;
                    }
                    else
                    {
                        HrefLangPresent = MacroscopeConstants.Specifiers.UNSPECIFIED;
                    }

                    if (this.DisplayListView.Items.ContainsKey(PairKey))
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                    }
                    else
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        lvItem.SubItems.Add("");
                        lvItem.SubItems.Add("");
                        lvItem.SubItems.Add("");
                        lvItem.SubItems.Add("");
                        lvItem.SubItems.Add("");

                        for (int i = 0; i < LocalesList.Keys.Count; i++)
                        {
                            lvItem.SubItems.Add("");
                            lvItem.SubItems.Add("");
                            lvItem.SubItems.Add("");
                        }

                        ListViewItems.Add(lvItem);
                    }

                    if (lvItem != null)
                    {
                        try
                        {
                            lvItem.SubItems[ColUrl].Text             = DocUrl;
                            lvItem.SubItems[ColStatusCode].Text      = StatusCode.ToString();
                            lvItem.SubItems[ColSiteLocale].Text      = DocLocale;
                            lvItem.SubItems[ColHrefLangPresent].Text = "";
                            lvItem.SubItems[ColTitle].Text           = DocTitle;

                            switch (HrefLangPresent)
                            {
                            case MacroscopeConstants.Specifiers.SPECIFIED:
                                lvItem.SubItems[ColHrefLangPresent].ForeColor = Color.Green;
                                lvItem.SubItems[ColHrefLangPresent].Text      = "SPECIFIED";
                                break;

                            default:
                                lvItem.SubItems[ColHrefLangPresent].ForeColor = Color.Red;
                                lvItem.SubItems[ColHrefLangPresent].Text      = "UNSPECIFIED";
                                break;
                            }

                            if (AllowedHosts.IsInternalUrl(DocUrl))
                            {
                                lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                            }
                            else
                            {
                                lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                            }

                            if ((StatusCodeNum >= 100) && (StatusCodeNum <= 299))
                            {
                                lvItem.SubItems[ColStatusCode].ForeColor = Color.Green;
                            }
                            else
                            if ((StatusCodeNum >= 300) && (StatusCodeNum <= 399))
                            {
                                lvItem.SubItems[ColStatusCode].ForeColor = Color.Orange;
                            }
                            else
                            if ((StatusCodeNum >= 400) && (StatusCodeNum <= 599))
                            {
                                lvItem.SubItems[ColStatusCode].ForeColor = Color.Red;
                            }
                            else
                            {
                                lvItem.SubItems[ColSiteLocale].ForeColor = Color.Gray;
                            }

                            foreach (string Locale in LocalesList.Keys)
                            {
                                if (!string.IsNullOrEmpty(Locale))
                                {
                                    string   HrefLangUrl          = null;
                                    DateTime HrefLangDateServer   = new DateTime();
                                    DateTime HrefLangDateModified = new DateTime();
                                    int      LocaleCol            = LocaleColsTable[Locale];

                                    if (
                                        (HrefLangsTable != null) &&
                                        (HrefLangsTable.Count > 0))
                                    {
                                        if (HrefLangsTable.ContainsKey(Locale))
                                        {
                                            MacroscopeHrefLang HrefLangAlternate = HrefLangsTable[Locale];

                                            if (HrefLangAlternate != null)
                                            {
                                                HrefLangUrl          = HrefLangAlternate.GetUrl();
                                                HrefLangDateServer   = HrefLangAlternate.GetDateServer();
                                                HrefLangDateModified = HrefLangAlternate.GetDateModified();
                                            }
                                        }
                                    }

                                    if (!string.IsNullOrEmpty(HrefLangUrl))
                                    {
                                        lvItem.SubItems[LocaleCol].ForeColor = Color.Blue;

                                        lvItem.SubItems[LocaleCol].Text = HrefLangUrl;

                                        lvItem.SubItems[LocaleCol + 1].Text = HrefLangDateServer.ToString();
                                        lvItem.SubItems[LocaleCol + 2].Text = HrefLangDateModified.ToString();
                                    }
                                    else
                                    {
                                        lvItem.SubItems[LocaleCol].ForeColor = Color.Red;

                                        lvItem.SubItems[LocaleCol].Text     = "NOT SPECIFIED";
                                        lvItem.SubItems[LocaleCol + 1].Text = "NOT SPECIFIED";
                                        lvItem.SubItems[LocaleCol + 2].Text = "NOT SPECIFIED";
                                    }
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            DebugMsg(string.Format("MacroscopeDisplayHrefLang: {0}", ex.Message));
                            DebugMsg(string.Format("MacroscopeDisplayHrefLang: {0}", ex.StackTrace));
                        }
                    }
                    else
                    {
                        DebugMsg(string.Format("MacroscopeDisplayHrefLang NOT SPECIFIED: {0}", PairKey));
                    }
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent);

            this.DisplayListView.Columns[ColUrl].Width        = 300;
            this.DisplayListView.Columns[ColStatusCode].Width = 80;
            this.DisplayListView.Columns[ColSiteLocale].Width = 100;
            this.DisplayListView.Columns[ColTitle].Width      = 100;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
示例#27
0
        /**************************************************************************/

        public void RenderListViewSearchSourceUrls(
            MacroscopeDocumentCollection DocCollection,
            string UrlFragment
            )
        {
            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments());

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count     = 0;
            decimal TotalDocs = (decimal)DocCollection.CountDocuments();

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Displaying Links",
                    Message: "Processing links in document collection for display:",
                    MajorPercentage: ((decimal)100 / TotalDocs) * Count,
                    ProgressLabelMajor: "Documents Processed"
                    );
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Url = msDoc.GetUrl();

                if (Url.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0)
                {
                    this.RenderListView(
                        ListViewItems: ListViewItems,
                        DocCollection: DocCollection,
                        msDoc: msDoc,
                        Url: Url
                        );
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    TotalDocs = (decimal)DocCollection.CountDocuments();

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: ((decimal)100 / TotalDocs) * Count,
                        ProgressLabelMajor: null
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
示例#28
0
        /**************************************************************************/

        private void InitializeJobMaster(MacroscopeConstants.RunTimeMode JobRunTimeMode)
        {
            GC.Collect();

            /*
             * {
             * this.JobMasterLog = new EventLog ();
             * this.JobMasterLog.Source = MacroscopeConstants.MainEventLogSourceName;
             * this.JobGuid = Guid.NewGuid();
             * this.LogEntry( string.Format( "Starting Job" ) );
             * }
             */

            this.RunTimeMode = JobRunTimeMode;

            if (this.TaskController != null)
            {
                this.CredentialsHttp = this.TaskController.IGetCredentialsHttp();
            }

            this.DocCollection = new MacroscopeDocumentCollection(JobMaster: this);
            this.AllowedHosts  = new MacroscopeAllowedHosts();

            /** BEGIN: Named Queues *************************************************/

            this.NamedQueueJobItems = new MacroscopeNamedQueue <MacroscopeJobItem> ();

            this.NamedQueueJobItems.CreateNamedQueue(
                Name: MacroscopeConstants.NamedQueueUrlList,
                QueueMode: MacroscopeNamedQueue <MacroscopeJobItem> .MODE.USE_HISTORY
                );

            this.NamedQueue = new MacroscopeNamedQueue <string> ();

            {
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayQueue);

                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayStructure);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayStructureLinkCounts);

                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayHierarchy);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayCanonicalAnalysis);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayHrefLang);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayErrors);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayHostnames);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayRedirectsAudit);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayLinks);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayHyperlinks);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayUriAnalysis);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageTitles);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageDescriptions);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageKeywords);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageHeadings);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayPageText);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayStylesheets);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayImages);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayJavascripts);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayAudios);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayVideos);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplaySitemaps);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayEmailAddresses);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayTelephoneNumbers);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayCustomFilters);

                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayDataExtractorsCssSelectors);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayDataExtractorsRegexes);
                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayDataExtractorsXpaths);

                this.NamedQueue.CreateNamedQueue(Name: MacroscopeConstants.NamedQueueDisplayRemarks);
            }

            /** END: Named Queues ***************************************************/

            this.CrawlDelay = 0;

            this.AdjustThreadsMax();
            this.ThreadsRunning = 0;
            this.ThreadsStop    = false;
            this.ThreadsDict    = new Dictionary <int, Boolean> ();

            this.SemaphoreWorkers = new Semaphore(0, this.ThreadsMax);
            this.SemaphoreWorkers.Release(this.ThreadsMax);

            this.Depth          = MacroscopePreferencesManager.GetDepth();
            this.PageLimit      = MacroscopePreferencesManager.GetPageLimit();
            this.PageLimitCount = 0;

            this.PagesFound = 0;

            {
                this.ParentStartingDirectory = "";
                this.ChildStartingDirectory  = "";
            }

            this.JobHistory = new MacroscopeJobHistory();

            this.InitProgress();

            this.Locales = new Dictionary <string, string> (32);

            this.Robots          = new MacroscopeRobots();
            this.BlockedByRobots = new Dictionary <string, Boolean> ();
        }
示例#29
0
        /**************************************************************************/

        private void BuildWorksheetPageDuplicateEtags(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            decimal CountOuter = 0;
            decimal CountInner = 0;
            decimal DocCount   = 0;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments());
            Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments());

            DocCount = ( decimal )DocCollection.CountDocuments();

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Etag = msDoc.GetEtag();

                if ((Etag != null) && (Etag.Length > 0))
                {
                    if (!DuplicatesDocList.ContainsKey(msDoc.GetUrl()))
                    {
                        DuplicatesDocList.Add(msDoc.GetUrl(), msDoc);
                    }

                    if (DuplicatesList.ContainsKey(Etag))
                    {
                        DuplicatesList[Etag] = DuplicatesList[Etag] + 1;
                    }
                    else
                    {
                        DuplicatesList.Add(Etag, 1);
                    }
                }
            }

            {
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Occurrences");
                ws.WriteField("ETag");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (string Etag in DuplicatesList.Keys)
            {
                CountOuter++;
                CountInner = 0;

                if (DuplicatesList[Etag] > 1)
                {
                    foreach (MacroscopeDocument msDoc in  DuplicatesDocList.Values)
                    {
                        CountInner++;

                        if (DocCount > 0)
                        {
                            this.ProgressForm.UpdatePercentages(
                                Title: null,
                                Message: null,
                                MajorPercentage: -1,
                                ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter),
                                MinorPercentage: (( decimal )100 / DocCount) * CountOuter,
                                ProgressLabelMinor: Etag,
                                SubMinorPercentage: (( decimal )100 / DocCount) * CountInner,
                                ProgressLabelSubMinor: msDoc.GetUrl()
                                );
                        }

                        if (msDoc.GetEtag() == Etag)
                        {
                            int            StatusCode  = ( int )msDoc.GetStatusCode();
                            HttpStatusCode Status      = msDoc.GetStatusCode();
                            int            Occurrences = DuplicatesList[Etag];

                            this.InsertAndFormatStatusCodeCell(ws, StatusCode);

                            this.InsertAndFormatStatusCodeCell(ws, Status);

                            this.InsertAndFormatContentCell(ws, Occurrences);

                            this.InsertAndFormatContentCell(ws, msDoc.GetEtag());

                            this.InsertAndFormatUrlCell(ws, msDoc);

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
        /**************************************************************************/

        private void RenderListViewSitemapErrors(MacroscopeDocumentCollection DocCollection)
        {
            List <ListViewItem> ListViewItems = new List <ListViewItem>(1);
            List <Dictionary <string, string> > CompiledTable = DocCollection.GetSitemapErrorsAsTable();

            foreach (Dictionary <string, string> Entry in CompiledTable)
            {
                string SitemapUrl = Entry["sitemap_url"];
                string StatusCode = Entry["status_code"];
                string Robots     = Entry["robots"];
                string TargetUrl  = Entry["target_url"];

                string PairKey = string.Join("::::::::", SitemapUrl, TargetUrl);

                MacroscopeDocument msDoc       = DocCollection.GetDocumentByUrl(Url: SitemapUrl);
                MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl);

                ListViewItem lvItem = null;

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    try
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                        lvItem.SubItems[0].Text = SitemapUrl;
                        lvItem.SubItems[1].Text = StatusCode;
                        lvItem.SubItems[2].Text = Robots;
                        lvItem.SubItems[3].Text = TargetUrl;
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("RenderListViewSitemapErrors 1: {0}", ex.Message));
                    }
                }
                else
                {
                    try
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        lvItem.SubItems[0].Text = SitemapUrl;
                        lvItem.SubItems.Add(StatusCode);
                        lvItem.SubItems.Add(Robots);
                        lvItem.SubItems.Add(TargetUrl);

                        ListViewItems.Add(lvItem);
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("RenderListViewSitemapErrors 2: {0}", ex.Message));
                    }
                }

                if (lvItem != null)
                {
                    lvItem.ForeColor = Color.Blue;

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[0].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[0].ForeColor = Color.Gray;
                    }


                    if (!msDocLinked.GetAllowedByRobots())
                    {
                        lvItem.SubItems[2].ForeColor = Color.Red;
                    }
                    else
                    {
                        lvItem.SubItems[2].ForeColor = Color.Green;
                    }

                    if (msDocLinked.GetIsInternal())
                    {
                        lvItem.SubItems[3].ForeColor = Color.Green;
                    }
                }
                else
                {
                    lvItem.SubItems[3].ForeColor = Color.Gray;
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            return;
        }