/**************************************************************************/

        public void SetPrefsFormControlFields()
        {
            { // Configure Display Options
                this.checkBoxPauseDisplayDuringScan.Checked = MacroscopePreferencesManager.GetPauseDisplayDuringScan();
                this.checkBoxShowProgressDialogues.Checked  = MacroscopePreferencesManager.GetShowProgressDialogues();
            }

            { //Configure Form Fields
              /** Spidering Control ---------------------------------------------- **/

                this.numericUpDownDepth.Minimum = -1;
                this.numericUpDownDepth.Maximum = 10000;

                this.numericUpDownPageLimit.Minimum = -1;
                this.numericUpDownPageLimit.Maximum = 10000;

                this.numericUpDownCrawlDelay.Minimum = 0;
                this.numericUpDownCrawlDelay.Maximum = 60;

                this.numericUpDownMaxRetries.Minimum = 0;
                this.numericUpDownMaxRetries.Maximum = 10;
            }

            {
                /** WebProxy Options ----------------------------------------------- **/

                this.comboBoxProxyType.SelectedIndex = (int)MacroscopePreferencesManager.GetProxyType();

                /** Server Certificate Options --------------------------------------- **/

                this.checkBoxServerCertificateValidation.Checked = MacroscopePreferencesManager.GetServerCertificateValidation();

                /** Spidering Control ---------------------------------------------- **/

                this.numericUpDownMaxThreads.Value     = MacroscopePreferencesManager.GetMaxThreads();
                this.numericUpDownDepth.Value          = MacroscopePreferencesManager.GetDepth();
                this.numericUpDownPageLimit.Value      = MacroscopePreferencesManager.GetPageLimit();
                this.numericUpDownCrawlDelay.Value     = MacroscopePreferencesManager.GetCrawlDelay();
                this.numericUpDownRequestTimeout.Value = (Decimal)MacroscopePreferencesManager.GetRequestTimeout();
                this.numericUpDownMaxRetries.Value     = (Decimal)MacroscopePreferencesManager.GetMaxRetries();

                this.checkBoxCrawlStrictUrlCheck.Checked = MacroscopePreferencesManager.GetCrawlStrictUrlCheck();

                this.checkBoxCheckExternalLinks.Checked = MacroscopePreferencesManager.GetCheckExternalLinks();
                this.checkBoxFetchExternalLinks.Checked = MacroscopePreferencesManager.GetFetchExternalLinks();

                this.checkBoxFollowRobotsProtocol.Checked = MacroscopePreferencesManager.GetFollowRobotsProtocol();
                this.checkBoxFollowSitemapLinks.Checked   = MacroscopePreferencesManager.GetFollowSitemapLinks();
                this.checkBoxProbeHumansText.Checked      = MacroscopePreferencesManager.GetProbeHumansText();

                this.checkBoxCheckRedirects.Checked  = MacroscopePreferencesManager.GetCheckRedirects();
                this.checkBoxFollowRedirects.Checked = MacroscopePreferencesManager.GetFollowRedirects();

                this.checkBoxFollowNoFollow.Checked       = MacroscopePreferencesManager.GetFollowNoFollow();
                this.checkBoxIgnoreQueries.Checked        = MacroscopePreferencesManager.GetIgnoreQueries();
                this.checkBoxIgnoreHashFragments.Checked  = MacroscopePreferencesManager.GetIgnoreHashFragments();
                this.checkBoxFollowCanonicalLinks.Checked = MacroscopePreferencesManager.GetFollowCanonicalLinks();
                this.checkBoxFollowAlternateLinks.Checked = MacroscopePreferencesManager.GetFollowAlternateLinks();
                this.checkBoxFollowHrefLangLinks.Checked  = MacroscopePreferencesManager.GetFollowHrefLangLinks();

                this.checkBoxFetchStylesheets.Checked = MacroscopePreferencesManager.GetFetchStylesheets();
                this.checkBoxFetchJavascripts.Checked = MacroscopePreferencesManager.GetFetchJavascripts();
                this.checkBoxFetchImages.Checked      = MacroscopePreferencesManager.GetFetchImages();
                this.checkBoxFetchAudio.Checked       = MacroscopePreferencesManager.GetFetchAudio();
                this.checkBoxFetchVideo.Checked       = MacroscopePreferencesManager.GetFetchVideo();
                this.checkBoxFetchXml.Checked         = MacroscopePreferencesManager.GetFetchXml();
                this.checkBoxFetchBinaries.Checked    = MacroscopePreferencesManager.GetFetchBinaries();

                this.checkBoxScanSitesInList.Checked = MacroscopePreferencesManager.GetScanSitesInList();

                this.checkBoxProbeParentFolderUrls.Checked = MacroscopePreferencesManager.GetProbeParentFolderUrls();

                /** Analysis Options ----------------------------------------------- **/

                this.checkBoxResolveAddresses.Checked = MacroscopePreferencesManager.GetResolveAddresses();

                this.checkBoxCheckHreflangs.Checked = MacroscopePreferencesManager.GetCheckHreflangs();
                this.checkBoxDetectLanguage.Checked = MacroscopePreferencesManager.GetDetectLanguage();

                this.checkBoxProcessStylesheets.Checked = MacroscopePreferencesManager.GetProcessStylesheets();
                this.checkBoxProcessJavascripts.Checked = MacroscopePreferencesManager.GetProcessJavascripts();
                this.checkBoxProcessImages.Checked      = MacroscopePreferencesManager.GetProcessImages();
                this.checkBoxProcessPdfs.Checked        = MacroscopePreferencesManager.GetProcessPdfs();
                this.checkBoxProcessAudio.Checked       = MacroscopePreferencesManager.GetProcessAudio();
                this.checkBoxProcessVideo.Checked       = MacroscopePreferencesManager.GetProcessVideo();
                this.checkBoxProcessXml.Checked         = MacroscopePreferencesManager.GetProcessXml();
                this.checkBoxProcessBinaries.Checked    = MacroscopePreferencesManager.GetProcessBinaries();

                this.numericUpDownRedirectChainsMaxHops.Value = MacroscopePreferencesManager.GetRedirectChainsMaxHops();

                this.checkBoxWarnAboutInsecureLinks.Checked = MacroscopePreferencesManager.GetWarnAboutInsecureLinks();

                this.checkBoxEnableTextIndexing.Checked        = MacroscopePreferencesManager.GetEnableTextIndexing();
                this.checkBoxCaseSensitiveTextIndexing.Checked = MacroscopePreferencesManager.GetCaseSensitiveTextIndexing();

                this.checkBoxDisregardHtml5ElementNav.Checked    = MacroscopePreferencesManager.GetDisregardHtml5ElementNav();
                this.checkBoxDisregardHtml5ElementHeader.Checked = MacroscopePreferencesManager.GetDisregardHtml5ElementHeader();
                this.checkBoxDisregardHtml5ElementFooter.Checked = MacroscopePreferencesManager.GetDisregardHtml5ElementFooter();

                this.checkBoxDetectQrCodeInImage.Checked = MacroscopePreferencesManager.GetDetectQrCodeInImage();

                /** SEO Options ---------------------------------------------------- **/

                this.numericUpDownTitleMinLen.Value        = MacroscopePreferencesManager.GetTitleMinLen();
                this.numericUpDownTitleMaxLen.Value        = MacroscopePreferencesManager.GetTitleMaxLen();
                this.numericUpDownTitleMinWords.Value      = MacroscopePreferencesManager.GetTitleMinWords();
                this.numericUpDownTitleMaxWords.Value      = MacroscopePreferencesManager.GetTitleMaxWords();
                this.numericUpDownTitleMaxPixelWidth.Value = MacroscopePreferencesManager.GetTitleMaxPixelWidth();

                this.numericUpDownDescriptionMinLen.Value   = MacroscopePreferencesManager.GetDescriptionMinLen();
                this.numericUpDownDescriptionMaxLen.Value   = MacroscopePreferencesManager.GetDescriptionMaxLen();
                this.numericUpDownDescriptionMinWords.Value = MacroscopePreferencesManager.GetDescriptionMinWords();
                this.numericUpDownDescriptionMaxWords.Value = MacroscopePreferencesManager.GetDescriptionMaxWords();

                this.numericUpDownMaxHeadingDepth.Value = MacroscopePreferencesManager.GetMaxHeadingDepth();

                this.checkBoxAnalyzeKeywordsInText.Checked  = MacroscopePreferencesManager.GetAnalyzeKeywordsInText();
                this.checkBoxAnalyzeTextReadability.Checked = MacroscopePreferencesManager.GetAnalyzeTextReadability();
                this.comboBoxAnalyzeTextReadabilityEnglishAlgorithm.SelectedIndex = (int)MacroscopePreferencesManager.GetAnalyzeTextReadabilityEnglishAlgorithm();

                this.checkBoxEnableLevenshteinDeduplication.Checked  = MacroscopePreferencesManager.GetEnableLevenshteinDeduplication();
                this.comboBoxLevenshteinAnalysisLevel.SelectedIndex  = (int)MacroscopePreferencesManager.GetLevenshteinAnalysisLevel();
                this.numericUpDownMaxLevenshteinSizeDifference.Value = MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference();
                this.numericUpDownMaxLevenshteinDistance.Value       = MacroscopePreferencesManager.GetMaxLevenshteinDistance();

                this.checkBoxAnalyzeClickPaths.Checked = MacroscopePreferencesManager.GetAnalyzeClickPaths();

                // TODO: Finish implementing click path analysis:
#if DEBUG
                this.groupBoxPageNavigationAnalysis.Visible = true;
#else
                this.groupBoxPageNavigationAnalysis.Visible = false;
#endif

                /** Custom Filter Options ------------------------------------------ **/

                this.checkBoxCustomFiltersEnable.Checked      = MacroscopePreferencesManager.GetCustomFiltersEnable();
                this.numericUpDownCustomFiltersMaxItems.Value = MacroscopePreferencesManager.GetCustomFiltersMaxItems();

                this.checkBoxCustomFiltersApplyToHtml.Checked        = MacroscopePreferencesManager.GetCustomFiltersApplyToHtml();
                this.checkBoxCustomFiltersApplyToCss.Checked         = MacroscopePreferencesManager.GetCustomFiltersApplyToCss();
                this.checkBoxCustomFiltersApplyToJavascripts.Checked = MacroscopePreferencesManager.GetCustomFiltersApplyToJavascripts();
                this.checkBoxCustomFiltersApplyToText.Checked        = MacroscopePreferencesManager.GetCustomFiltersApplyToText();
                this.checkBoxCustomFiltersApplyToXml.Checked         = MacroscopePreferencesManager.GetCustomFiltersApplyToXml();

                /** Extractor Options ---------------------------------------------- **/

                this.checkBoxDataExtractorsEnable.Checked          = MacroscopePreferencesManager.GetDataExtractorsEnable();
                this.checkBoxDataExtractorsCleanWhiteSpace.Checked = MacroscopePreferencesManager.GetDataExtractorsCleanWhiteSpace();

                this.numericUpDownDataExtractorsMaxItemsCssSelectors.Value = MacroscopePreferencesManager.GetDataExtractorsMaxItemsCssSelectors();
                this.numericUpDownDataExtractorsMaxItemsRegexes.Value      = MacroscopePreferencesManager.GetDataExtractorsMaxItemsRegexes();
                this.numericUpDownDataExtractorsMaxItemsXpaths.Value       = MacroscopePreferencesManager.GetDataExtractorsMaxItemsXpaths();

                this.checkBoxDataExtractorsApplyToHtml.Checked        = MacroscopePreferencesManager.GetDataExtractorsApplyToHtml();
                this.checkBoxDataExtractorsApplyToCss.Checked         = MacroscopePreferencesManager.GetDataExtractorsApplyToCss();
                this.checkBoxDataExtractorsApplyToJavascripts.Checked = MacroscopePreferencesManager.GetDataExtractorsApplyToJavascripts();
                this.checkBoxDataExtractorsApplyToText.Checked        = MacroscopePreferencesManager.GetDataExtractorsApplyToText();
                this.checkBoxDataExtractorsApplyToPdf.Checked         = MacroscopePreferencesManager.GetDataExtractorsApplyToPdf();
                this.checkBoxDataExtractorsApplyToXml.Checked         = MacroscopePreferencesManager.GetDataExtractorsApplyToXml();

                /** Export Options ------------------------------------------------- **/

                this.checkBoxSitemapIncludeLinkedPdfs.Checked = MacroscopePreferencesManager.GetSitemapIncludeLinkedPdfs();

                /** Ignore Errors Settings ----------------------------------------- **/

                this.checkBoxIgnoreErrors410.Checked = MacroscopePreferencesManager.GetIgnoreErrors410();
                this.checkBoxIgnoreErrors451.Checked = MacroscopePreferencesManager.GetIgnoreErrors451();
            }
        }
Beispiel #2
0
        /**************************************************************************/

        private void BuildWorksheetPageHeadings(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.WriteField("URL");
                ws.WriteField("Occurences");
                ws.WriteField("Order");

                for (int i = 1; i <= 6; i++)
                {
                    ws.WriteField(string.Format("H{0}", i));
                }

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                if (msDoc.GetIsHtml())
                {
                    Proceed = true;
                }

                if (Proceed)
                {
                    for (ushort HeadingLevel = 1; HeadingLevel <= MacroscopePreferencesManager.GetMaxHeadingDepth(); HeadingLevel++)
                    {
                        List <string> HeadingsList = msDoc.GetHeadings(HeadingLevel);

                        for (int Order = 0; Order < HeadingsList.Count; Order++)
                        {
                            int Occurences = DocCollection.GetStatsHeadingsCount(HeadingLevel: HeadingLevel, Text: HeadingsList[Order]);

                            this.InsertAndFormatUrlCell(ws, msDoc);

                            this.InsertAndFormatContentCell(ws, Occurences.ToString());

                            this.InsertAndFormatContentCell(ws, this.FormatIfMissing((Order + 1).ToString()));

                            this.InsertAndFormatContentCell(ws, this.FormatIfMissing(HeadingsList[Order]));

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            Boolean Proceed = false;

            if (msDoc.GetIsExternal())
            {
                return;
            }

            if (msDoc.GetIsRedirect())
            {
                return;
            }

            if (msDoc.GetIsHtml())
            {
                Proceed = true;
            }

            if (Proceed)
            {
                MacroscopeDocumentCollection DocCollection = this.MainForm.GetJobMaster().GetDocCollection();

                for (ushort HeadingLevel = 1; HeadingLevel <= MacroscopePreferencesManager.GetMaxHeadingDepth(); HeadingLevel++)
                {
                    List <string> HeadingsList = msDoc.GetHeadings(HeadingLevel);

                    for (int Order = 0; Order < HeadingsList.Count; Order++)
                    {
                        ListViewItem lvItem          = null;
                        string       PairKey         = string.Join("::", Url, HeadingLevel, Order);
                        int          HeadingColIndex = HeadingLevel + ColH1Offset;
                        string       TextLabel       = HeadingsList[Order];
                        int          Occurences      = DocCollection.GetStatsHeadingsCount(HeadingLevel: HeadingLevel, Text: TextLabel);

                        if (this.DisplayListView.Items.ContainsKey(PairKey))
                        {
                            try
                            {
                                lvItem = this.DisplayListView.Items[PairKey];
                                lvItem.SubItems[ColUrl].Text          = Url;
                                lvItem.SubItems[ColOccurences].Text   = Occurences.ToString();
                                lvItem.SubItems[ColOrder].Text        = (Order + 1).ToString();
                                lvItem.SubItems[HeadingColIndex].Text = TextLabel;
                            }
                            catch (Exception ex)
                            {
                                DebugMsg(string.Format("MacroscopeDisplayHeadings 1: {0}", ex.Message));
                            }
                        }
                        else
                        {
                            try
                            {
                                lvItem = new ListViewItem(PairKey);
                                lvItem.UseItemStyleForSubItems = false;

                                lvItem.Name = PairKey;

                                lvItem.SubItems[ColUrl].Text = Url;
                                lvItem.SubItems.Add(Occurences.ToString());
                                lvItem.SubItems.Add((Order + 1).ToString());

                                for (ushort k = 1; k <= 6; k++)
                                {
                                    lvItem.SubItems.Add("");
                                }

                                lvItem.SubItems[HeadingColIndex].Text = TextLabel;

                                ListViewItems.Add(lvItem);
                            }
                            catch (Exception ex)
                            {
                                DebugMsg(string.Format("MacroscopeDisplayHeadings 2: {0}", ex.Message));
                            }
                        }

                        if (lvItem != null)
                        {
                            lvItem.ForeColor = Color.Blue;

                            // URL -----------------------------------------------------------//

                            if (msDoc.GetIsInternal())
                            {
                                lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                            }
                            else
                            {
                                lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                            }

                            // Occurences ----------------------------------------------------//

                            if ((Occurences > 1) && (msDoc.GetIsInternal()))
                            {
                                lvItem.SubItems[ColOccurences].ForeColor = Color.Orange;
                            }
                            else
                            if (msDoc.GetIsInternal())
                            {
                                lvItem.SubItems[ColOccurences].ForeColor = Color.Green;
                            }
                            else
                            {
                                lvItem.SubItems[ColOccurences].ForeColor = Color.Gray;
                            }

                            // Check Missing H1 ----------------------------------------------//

                            if ((HeadingLevel == 1) && string.IsNullOrEmpty(TextLabel))
                            {
                                lvItem.SubItems[HeadingColIndex].Text      = "MISSING";
                                lvItem.SubItems[HeadingColIndex].ForeColor = Color.Red;
                            }
                            else
                            {
                                lvItem.SubItems[HeadingColIndex].ForeColor = Color.Green;
                            }
                        }
                    }
                }
            }
        }
Beispiel #4
0
        /**************************************************************************/

        private void BuildWorksheetPageHeadings(
            MacroscopeJobMaster JobMaster,
            XLWorkbook wb,
            string WorksheetLabel
            )
        {
            var ws = wb.Worksheets.Add(WorksheetLabel);

            int iRow    = 1;
            int iCol    = 1;
            int iColMax = 1;

            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();

            {
                ws.Cell(iRow, iCol).Value = "URL";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Occurences";
                iCol++;

                ws.Cell(iRow, iCol).Value = "Order";

                for (int i = 1; i <= 6; i++)
                {
                    iCol++;
                    ws.Cell(iRow, iCol).Value = string.Format("H{0}", i);
                }
            }

            iColMax = iCol;

            iRow++;

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc   = DocCollection.GetDocument(Url);
                Boolean            Proceed = false;

                if (msDoc.GetIsExternal())
                {
                    continue;
                }

                if (msDoc.GetIsRedirect())
                {
                    continue;
                }

                if (msDoc.GetIsHtml())
                {
                    Proceed = true;
                }

                if (Proceed)
                {
                    for (ushort HeadingLevel = 1; HeadingLevel <= MacroscopePreferencesManager.GetMaxHeadingDepth(); HeadingLevel++)
                    {
                        List <string> HeadingsList = msDoc.GetHeadings(HeadingLevel);

                        for (int Order = 0; Order < HeadingsList.Count; Order++)
                        {
                            int Occurences = DocCollection.GetStatsHeadingsCount(HeadingLevel: HeadingLevel, Text: HeadingsList[Order]);

                            iCol = 1;

                            this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc);

                            if (msDoc.GetIsInternal())
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, Occurences);

                            if ((Occurences > 1) && (msDoc.GetIsInternal()))
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange);
                            }
                            else
                            if (msDoc.GetIsInternal())
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green);
                            }
                            else
                            {
                                ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray);
                            }

                            iCol++;

                            this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing((Order + 1).ToString()));

                            this.InsertAndFormatContentCell(ws, iRow, ( int )(HeadingLevel + iCol), this.FormatIfMissing(HeadingsList[Order]));

                            iRow++;
                        }
                    }
                }
            }

            {
                var rangeData  = ws.Range(1, 1, iRow - 1, iColMax);
                var excelTable = rangeData.CreateTable();
            }
        }