/**************************************************************************/ public void SetPrefsFormControlFields() { { // Configure Display Options this.checkBoxPauseDisplayDuringScan.Checked = MacroscopePreferencesManager.GetPauseDisplayDuringScan(); this.checkBoxShowProgressDialogues.Checked = MacroscopePreferencesManager.GetShowProgressDialogues(); } { //Configure Form Fields /** Spidering Control ---------------------------------------------- **/ this.numericUpDownDepth.Minimum = -1; this.numericUpDownDepth.Maximum = 10000; this.numericUpDownPageLimit.Minimum = -1; this.numericUpDownPageLimit.Maximum = 10000; this.numericUpDownCrawlDelay.Minimum = 0; this.numericUpDownCrawlDelay.Maximum = 60; this.numericUpDownMaxRetries.Minimum = 0; this.numericUpDownMaxRetries.Maximum = 10; } { /** WebProxy Options ----------------------------------------------- **/ this.comboBoxProxyType.SelectedIndex = (int)MacroscopePreferencesManager.GetProxyType(); /** Server Certificate Options --------------------------------------- **/ this.checkBoxServerCertificateValidation.Checked = MacroscopePreferencesManager.GetServerCertificateValidation(); /** Spidering Control ---------------------------------------------- **/ this.numericUpDownMaxThreads.Value = MacroscopePreferencesManager.GetMaxThreads(); this.numericUpDownDepth.Value = MacroscopePreferencesManager.GetDepth(); this.numericUpDownPageLimit.Value = MacroscopePreferencesManager.GetPageLimit(); this.numericUpDownCrawlDelay.Value = MacroscopePreferencesManager.GetCrawlDelay(); this.numericUpDownRequestTimeout.Value = (Decimal)MacroscopePreferencesManager.GetRequestTimeout(); this.numericUpDownMaxRetries.Value = (Decimal)MacroscopePreferencesManager.GetMaxRetries(); this.checkBoxCrawlStrictUrlCheck.Checked = MacroscopePreferencesManager.GetCrawlStrictUrlCheck(); this.checkBoxCheckExternalLinks.Checked = MacroscopePreferencesManager.GetCheckExternalLinks(); this.checkBoxFetchExternalLinks.Checked = MacroscopePreferencesManager.GetFetchExternalLinks(); this.checkBoxFollowRobotsProtocol.Checked = MacroscopePreferencesManager.GetFollowRobotsProtocol(); this.checkBoxFollowSitemapLinks.Checked = MacroscopePreferencesManager.GetFollowSitemapLinks(); this.checkBoxProbeHumansText.Checked = MacroscopePreferencesManager.GetProbeHumansText(); this.checkBoxCheckRedirects.Checked = MacroscopePreferencesManager.GetCheckRedirects(); this.checkBoxFollowRedirects.Checked = MacroscopePreferencesManager.GetFollowRedirects(); this.checkBoxFollowNoFollow.Checked = MacroscopePreferencesManager.GetFollowNoFollow(); this.checkBoxIgnoreQueries.Checked = MacroscopePreferencesManager.GetIgnoreQueries(); this.checkBoxIgnoreHashFragments.Checked = MacroscopePreferencesManager.GetIgnoreHashFragments(); this.checkBoxFollowCanonicalLinks.Checked = MacroscopePreferencesManager.GetFollowCanonicalLinks(); this.checkBoxFollowAlternateLinks.Checked = MacroscopePreferencesManager.GetFollowAlternateLinks(); this.checkBoxFollowHrefLangLinks.Checked = MacroscopePreferencesManager.GetFollowHrefLangLinks(); this.checkBoxFetchStylesheets.Checked = MacroscopePreferencesManager.GetFetchStylesheets(); this.checkBoxFetchJavascripts.Checked = MacroscopePreferencesManager.GetFetchJavascripts(); this.checkBoxFetchImages.Checked = MacroscopePreferencesManager.GetFetchImages(); this.checkBoxFetchAudio.Checked = MacroscopePreferencesManager.GetFetchAudio(); this.checkBoxFetchVideo.Checked = MacroscopePreferencesManager.GetFetchVideo(); this.checkBoxFetchXml.Checked = MacroscopePreferencesManager.GetFetchXml(); this.checkBoxFetchBinaries.Checked = MacroscopePreferencesManager.GetFetchBinaries(); this.checkBoxScanSitesInList.Checked = MacroscopePreferencesManager.GetScanSitesInList(); this.checkBoxProbeParentFolderUrls.Checked = MacroscopePreferencesManager.GetProbeParentFolderUrls(); /** Analysis Options ----------------------------------------------- **/ this.checkBoxResolveAddresses.Checked = MacroscopePreferencesManager.GetResolveAddresses(); this.checkBoxCheckHreflangs.Checked = MacroscopePreferencesManager.GetCheckHreflangs(); this.checkBoxDetectLanguage.Checked = MacroscopePreferencesManager.GetDetectLanguage(); this.checkBoxProcessStylesheets.Checked = MacroscopePreferencesManager.GetProcessStylesheets(); this.checkBoxProcessJavascripts.Checked = MacroscopePreferencesManager.GetProcessJavascripts(); this.checkBoxProcessImages.Checked = MacroscopePreferencesManager.GetProcessImages(); this.checkBoxProcessPdfs.Checked = MacroscopePreferencesManager.GetProcessPdfs(); this.checkBoxProcessAudio.Checked = MacroscopePreferencesManager.GetProcessAudio(); this.checkBoxProcessVideo.Checked = MacroscopePreferencesManager.GetProcessVideo(); this.checkBoxProcessXml.Checked = MacroscopePreferencesManager.GetProcessXml(); this.checkBoxProcessBinaries.Checked = MacroscopePreferencesManager.GetProcessBinaries(); this.numericUpDownRedirectChainsMaxHops.Value = MacroscopePreferencesManager.GetRedirectChainsMaxHops(); this.checkBoxWarnAboutInsecureLinks.Checked = MacroscopePreferencesManager.GetWarnAboutInsecureLinks(); this.checkBoxEnableTextIndexing.Checked = MacroscopePreferencesManager.GetEnableTextIndexing(); this.checkBoxCaseSensitiveTextIndexing.Checked = MacroscopePreferencesManager.GetCaseSensitiveTextIndexing(); this.checkBoxDisregardHtml5ElementNav.Checked = MacroscopePreferencesManager.GetDisregardHtml5ElementNav(); this.checkBoxDisregardHtml5ElementHeader.Checked = MacroscopePreferencesManager.GetDisregardHtml5ElementHeader(); this.checkBoxDisregardHtml5ElementFooter.Checked = MacroscopePreferencesManager.GetDisregardHtml5ElementFooter(); this.checkBoxDetectQrCodeInImage.Checked = MacroscopePreferencesManager.GetDetectQrCodeInImage(); /** SEO Options ---------------------------------------------------- **/ this.numericUpDownTitleMinLen.Value = MacroscopePreferencesManager.GetTitleMinLen(); this.numericUpDownTitleMaxLen.Value = MacroscopePreferencesManager.GetTitleMaxLen(); this.numericUpDownTitleMinWords.Value = MacroscopePreferencesManager.GetTitleMinWords(); this.numericUpDownTitleMaxWords.Value = MacroscopePreferencesManager.GetTitleMaxWords(); this.numericUpDownTitleMaxPixelWidth.Value = MacroscopePreferencesManager.GetTitleMaxPixelWidth(); this.numericUpDownDescriptionMinLen.Value = MacroscopePreferencesManager.GetDescriptionMinLen(); this.numericUpDownDescriptionMaxLen.Value = MacroscopePreferencesManager.GetDescriptionMaxLen(); this.numericUpDownDescriptionMinWords.Value = MacroscopePreferencesManager.GetDescriptionMinWords(); this.numericUpDownDescriptionMaxWords.Value = MacroscopePreferencesManager.GetDescriptionMaxWords(); this.numericUpDownMaxHeadingDepth.Value = MacroscopePreferencesManager.GetMaxHeadingDepth(); this.checkBoxAnalyzeKeywordsInText.Checked = MacroscopePreferencesManager.GetAnalyzeKeywordsInText(); this.checkBoxAnalyzeTextReadability.Checked = MacroscopePreferencesManager.GetAnalyzeTextReadability(); this.comboBoxAnalyzeTextReadabilityEnglishAlgorithm.SelectedIndex = (int)MacroscopePreferencesManager.GetAnalyzeTextReadabilityEnglishAlgorithm(); this.checkBoxEnableLevenshteinDeduplication.Checked = MacroscopePreferencesManager.GetEnableLevenshteinDeduplication(); this.comboBoxLevenshteinAnalysisLevel.SelectedIndex = (int)MacroscopePreferencesManager.GetLevenshteinAnalysisLevel(); this.numericUpDownMaxLevenshteinSizeDifference.Value = MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference(); this.numericUpDownMaxLevenshteinDistance.Value = MacroscopePreferencesManager.GetMaxLevenshteinDistance(); this.checkBoxAnalyzeClickPaths.Checked = MacroscopePreferencesManager.GetAnalyzeClickPaths(); // TODO: Finish implementing click path analysis: #if DEBUG this.groupBoxPageNavigationAnalysis.Visible = true; #else this.groupBoxPageNavigationAnalysis.Visible = false; #endif /** Custom Filter Options ------------------------------------------ **/ this.checkBoxCustomFiltersEnable.Checked = MacroscopePreferencesManager.GetCustomFiltersEnable(); this.numericUpDownCustomFiltersMaxItems.Value = MacroscopePreferencesManager.GetCustomFiltersMaxItems(); this.checkBoxCustomFiltersApplyToHtml.Checked = MacroscopePreferencesManager.GetCustomFiltersApplyToHtml(); this.checkBoxCustomFiltersApplyToCss.Checked = MacroscopePreferencesManager.GetCustomFiltersApplyToCss(); this.checkBoxCustomFiltersApplyToJavascripts.Checked = MacroscopePreferencesManager.GetCustomFiltersApplyToJavascripts(); this.checkBoxCustomFiltersApplyToText.Checked = MacroscopePreferencesManager.GetCustomFiltersApplyToText(); this.checkBoxCustomFiltersApplyToXml.Checked = MacroscopePreferencesManager.GetCustomFiltersApplyToXml(); /** Extractor Options ---------------------------------------------- **/ this.checkBoxDataExtractorsEnable.Checked = MacroscopePreferencesManager.GetDataExtractorsEnable(); this.checkBoxDataExtractorsCleanWhiteSpace.Checked = MacroscopePreferencesManager.GetDataExtractorsCleanWhiteSpace(); this.numericUpDownDataExtractorsMaxItemsCssSelectors.Value = MacroscopePreferencesManager.GetDataExtractorsMaxItemsCssSelectors(); this.numericUpDownDataExtractorsMaxItemsRegexes.Value = MacroscopePreferencesManager.GetDataExtractorsMaxItemsRegexes(); this.numericUpDownDataExtractorsMaxItemsXpaths.Value = MacroscopePreferencesManager.GetDataExtractorsMaxItemsXpaths(); this.checkBoxDataExtractorsApplyToHtml.Checked = MacroscopePreferencesManager.GetDataExtractorsApplyToHtml(); this.checkBoxDataExtractorsApplyToCss.Checked = MacroscopePreferencesManager.GetDataExtractorsApplyToCss(); this.checkBoxDataExtractorsApplyToJavascripts.Checked = MacroscopePreferencesManager.GetDataExtractorsApplyToJavascripts(); this.checkBoxDataExtractorsApplyToText.Checked = MacroscopePreferencesManager.GetDataExtractorsApplyToText(); this.checkBoxDataExtractorsApplyToPdf.Checked = MacroscopePreferencesManager.GetDataExtractorsApplyToPdf(); this.checkBoxDataExtractorsApplyToXml.Checked = MacroscopePreferencesManager.GetDataExtractorsApplyToXml(); /** Export Options ------------------------------------------------- **/ this.checkBoxSitemapIncludeLinkedPdfs.Checked = MacroscopePreferencesManager.GetSitemapIncludeLinkedPdfs(); /** Ignore Errors Settings ----------------------------------------- **/ this.checkBoxIgnoreErrors410.Checked = MacroscopePreferencesManager.GetIgnoreErrors410(); this.checkBoxIgnoreErrors451.Checked = MacroscopePreferencesManager.GetIgnoreErrors451(); } }
/**************************************************************************/ private void BuildWorksheetPageHeadings( MacroscopeJobMaster JobMaster, CsvWriter ws ) { MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.WriteField("URL"); ws.WriteField("Occurences"); ws.WriteField("Order"); for (int i = 1; i <= 6; i++) { ws.WriteField(string.Format("H{0}", i)); } ws.NextRecord(); } foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } if (msDoc.GetIsHtml()) { Proceed = true; } if (Proceed) { for (ushort HeadingLevel = 1; HeadingLevel <= MacroscopePreferencesManager.GetMaxHeadingDepth(); HeadingLevel++) { List <string> HeadingsList = msDoc.GetHeadings(HeadingLevel); for (int Order = 0; Order < HeadingsList.Count; Order++) { int Occurences = DocCollection.GetStatsHeadingsCount(HeadingLevel: HeadingLevel, Text: HeadingsList[Order]); this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, Occurences.ToString()); this.InsertAndFormatContentCell(ws, this.FormatIfMissing((Order + 1).ToString())); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(HeadingsList[Order])); ws.NextRecord(); } } } } }
/**************************************************************************/ protected override void RenderListView( List <ListViewItem> ListViewItems, MacroscopeDocument msDoc, string Url ) { Boolean Proceed = false; if (msDoc.GetIsExternal()) { return; } if (msDoc.GetIsRedirect()) { return; } if (msDoc.GetIsHtml()) { Proceed = true; } if (Proceed) { MacroscopeDocumentCollection DocCollection = this.MainForm.GetJobMaster().GetDocCollection(); for (ushort HeadingLevel = 1; HeadingLevel <= MacroscopePreferencesManager.GetMaxHeadingDepth(); HeadingLevel++) { List <string> HeadingsList = msDoc.GetHeadings(HeadingLevel); for (int Order = 0; Order < HeadingsList.Count; Order++) { ListViewItem lvItem = null; string PairKey = string.Join("::", Url, HeadingLevel, Order); int HeadingColIndex = HeadingLevel + ColH1Offset; string TextLabel = HeadingsList[Order]; int Occurences = DocCollection.GetStatsHeadingsCount(HeadingLevel: HeadingLevel, Text: TextLabel); if (this.DisplayListView.Items.ContainsKey(PairKey)) { try { lvItem = this.DisplayListView.Items[PairKey]; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems[ColOccurences].Text = Occurences.ToString(); lvItem.SubItems[ColOrder].Text = (Order + 1).ToString(); lvItem.SubItems[HeadingColIndex].Text = TextLabel; } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayHeadings 1: {0}", ex.Message)); } } else { try { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems.Add(Occurences.ToString()); lvItem.SubItems.Add((Order + 1).ToString()); for (ushort k = 1; k <= 6; k++) { lvItem.SubItems.Add(""); } lvItem.SubItems[HeadingColIndex].Text = TextLabel; ListViewItems.Add(lvItem); } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayHeadings 2: {0}", ex.Message)); } } if (lvItem != null) { lvItem.ForeColor = Color.Blue; // URL -----------------------------------------------------------// if (msDoc.GetIsInternal()) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } // Occurences ----------------------------------------------------// if ((Occurences > 1) && (msDoc.GetIsInternal())) { lvItem.SubItems[ColOccurences].ForeColor = Color.Orange; } else if (msDoc.GetIsInternal()) { lvItem.SubItems[ColOccurences].ForeColor = Color.Green; } else { lvItem.SubItems[ColOccurences].ForeColor = Color.Gray; } // Check Missing H1 ----------------------------------------------// if ((HeadingLevel == 1) && string.IsNullOrEmpty(TextLabel)) { lvItem.SubItems[HeadingColIndex].Text = "MISSING"; lvItem.SubItems[HeadingColIndex].ForeColor = Color.Red; } else { lvItem.SubItems[HeadingColIndex].ForeColor = Color.Green; } } } } } }
/**************************************************************************/ private void BuildWorksheetPageHeadings( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Occurences"; iCol++; ws.Cell(iRow, iCol).Value = "Order"; for (int i = 1; i <= 6; i++) { iCol++; ws.Cell(iRow, iCol).Value = string.Format("H{0}", i); } } iColMax = iCol; iRow++; foreach (string Url in DocCollection.DocumentKeys()) { MacroscopeDocument msDoc = DocCollection.GetDocument(Url); Boolean Proceed = false; if (msDoc.GetIsExternal()) { continue; } if (msDoc.GetIsRedirect()) { continue; } if (msDoc.GetIsHtml()) { Proceed = true; } if (Proceed) { for (ushort HeadingLevel = 1; HeadingLevel <= MacroscopePreferencesManager.GetMaxHeadingDepth(); HeadingLevel++) { List <string> HeadingsList = msDoc.GetHeadings(HeadingLevel); for (int Order = 0; Order < HeadingsList.Count; Order++) { int Occurences = DocCollection.GetStatsHeadingsCount(HeadingLevel: HeadingLevel, Text: HeadingsList[Order]); iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Occurences); if ((Occurences > 1) && (msDoc.GetIsInternal())) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange); } else if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing((Order + 1).ToString())); this.InsertAndFormatContentCell(ws, iRow, ( int )(HeadingLevel + iCol), this.FormatIfMissing(HeadingsList[Order])); iRow++; } } } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }