/**************************************************************************/ public void RenderListViewSearchTargetUrls( MacroscopeDocumentCollection DocCollection, string UrlFragment ) { List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments()); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)DocCollection.CountDocuments(); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Displaying Links", Message: "Processing links in document collection for display:", MajorPercentage: ((decimal)100 / TotalDocs) * Count, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Url = msDoc.GetUrl(); if (msDoc != null) { this.RenderListViewSearchTargetUrls( ListViewItems: ListViewItems, msDoc: msDoc, Url: Url, UrlFragment: UrlFragment ); } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: ((decimal)100 / TotalDocs) * Count, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/** Render Entire DocCollection *******************************************/ public void RenderTreeView(MacroscopeDocumentCollection DocCollection) { if (DocCollection.CountDocuments() == 0) { return; } MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = ( decimal )DocCollection.CountDocuments(); decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { Application.DoEvents(); if (msDoc == null) { continue; } string Url = msDoc.GetUrl(); this.RenderTreeView(msDoc, Url); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = (( decimal )100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ public void RenderListViewSearchSourceUrls( MacroscopeDocumentCollection DocCollection, string UrlFragment ) { List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments()); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)DocCollection.CountDocuments(); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Displaying Links", Message: "Processing links in document collection for display:", MajorPercentage: ((decimal)100 / TotalDocs) * Count, ProgressLabelMajor: "Documents Processed" ); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Url = msDoc.GetUrl(); if (Url.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0) { this.RenderListView( ListViewItems: ListViewItems, DocCollection: DocCollection, msDoc: msDoc, Url: Url ); } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; TotalDocs = (decimal)DocCollection.CountDocuments(); ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: ((decimal)100 / TotalDocs) * Count, ProgressLabelMajor: null ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } ProgressForm.Dispose(); }
public async Task TestNHeadRequests() { MacroscopeJobMaster JobMaster = new MacroscopeJobMaster(JobRunTimeMode: MacroscopeConstants.RunTimeMode.LIVE, TaskController: this); MacroscopeDocumentCollection DocCollection = new MacroscopeDocumentCollection(JobMaster: JobMaster); Assert.AreEqual(0, DocCollection.CountDocuments()); foreach (string Url in this.Urls) { MacroscopeDocument msDoc = DocCollection.CreateDocument(Url: Url); await msDoc.ExecuteHeadRequest(); } Assert.AreEqual(this.MaxUrls, DocCollection.CountDocuments()); }
/**************************************************************************/ public void RefreshDataRedirectChains(MacroscopeDocumentCollection DocCollection) { if (DocCollection.CountDocuments() <= 0) { return; } if (this.MainForm.InvokeRequired) { this.MainForm.Invoke( new MethodInvoker( delegate { Cursor.Current = Cursors.WaitCursor; this.DisplayListView.BeginUpdate(); this.RenderListViewRedirectChains(DocCollection); this.RenderUrlCount(); this.DisplayListView.EndUpdate(); Cursor.Current = Cursors.Default; } ) ); } else { Cursor.Current = Cursors.WaitCursor; this.DisplayListView.BeginUpdate(); this.RenderListViewRedirectChains(DocCollection); this.RenderUrlCount(); this.DisplayListView.EndUpdate(); Cursor.Current = Cursors.Default; } }
/**************************************************************************/ public void RefreshKeywordAnalysisData(MacroscopeDocumentCollection DocCollection) { if (DocCollection.CountDocuments() == 0) { return; } if (this.MainForm.InvokeRequired) { this.MainForm.Invoke( new MethodInvoker( delegate { Cursor.Current = Cursors.WaitCursor; this.RefreshKeywordAnalysisDataProgress(DocCollection: DocCollection); Cursor.Current = Cursors.Default; } ) ); } else { Cursor.Current = Cursors.WaitCursor; this.RefreshKeywordAnalysisDataProgress(DocCollection: DocCollection); Cursor.Current = Cursors.Default; } }
/**************************************************************************/ public void RefreshData( MacroscopeDocumentCollection DocCollection, string UrlFragment ) { try { if (DocCollection.CountDocuments() <= 0) { return; } if (this.MainForm.InvokeRequired) { this.MainForm.Invoke( new MethodInvoker( delegate { Cursor.Current = Cursors.WaitCursor; this.DisplayListView.BeginUpdate(); this.RenderListView( DocCollection: DocCollection, UrlFragment: UrlFragment ); this.RenderUrlCount(); this.DisplayListView.EndUpdate(); Cursor.Current = Cursors.Default; } ) ); } else { Cursor.Current = Cursors.WaitCursor; this.DisplayListView.BeginUpdate(); this.RenderListView( DocCollection: DocCollection, UrlFragment: UrlFragment ); this.RenderUrlCount(); this.DisplayListView.EndUpdate(); Cursor.Current = Cursors.Default; } } catch (Exception ex) { this.DebugMsg(string.Format("RefreshData: {0}", ex.Message)); } }
/**************************************************************************/ public void RefreshDataSearchTargetUrls( MacroscopeDocumentCollection DocCollection, string UrlFragment ) { if (DocCollection.CountDocuments() <= 0) { return; } if (this.MainForm.InvokeRequired) { this.MainForm.Invoke( new MethodInvoker( delegate { Cursor.Current = Cursors.WaitCursor; this.RenderListViewSearchTargetUrls( DocCollection: DocCollection, UrlFragment: UrlFragment ); this.RenderUrlCount(); Cursor.Current = Cursors.Default; } ) ); } else { Cursor.Current = Cursors.WaitCursor; this.RenderListViewSearchTargetUrls( DocCollection: DocCollection, UrlFragment: UrlFragment ); this.RenderUrlCount(); Cursor.Current = Cursors.Default; } }
/**************************************************************************/ public void RenderListViewSearchSourceUrls( MacroscopeDocumentCollection DocCollection, string UrlFragment ) { List <ListViewItem> ListViewItems = new List <ListViewItem> (DocCollection.CountDocuments()); foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Url = msDoc.GetUrl(); if (Url.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0) { this.RenderListView( ListViewItems: ListViewItems, msDoc: msDoc, Url: Url ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); }
/**************************************************************************/ public void RenderListViewSearchTargetUrls( MacroscopeDocumentCollection DocCollection, string UrlFragment ) { List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments()); foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Url = msDoc.GetUrl(); if (msDoc != null) { this.RenderListViewSearchTargetUrls( ListViewItems: ListViewItems, msDoc: msDoc, Url: Url, UrlFragment: UrlFragment ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); }
/**************************************************************************/ public override void RenderListView(MacroscopeDocumentCollection DocCollection) { if (DocCollection.CountDocuments() == 0) { return; } List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments()); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)DocCollection.CountDocuments(); decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { bool Proceed = false; if (((int)msDoc.GetStatusCode() >= 400) && ((int)msDoc.GetStatusCode() <= 499)) { Proceed = true; } else if (((int)msDoc.GetStatusCode() >= 500) && ((int)msDoc.GetStatusCode() <= 599)) { Proceed = true; } if (MacroscopePreferencesManager.GetIgnoreErrors410() && ((int)msDoc.GetStatusCode() == 410)) { Proceed = false; } if (MacroscopePreferencesManager.GetIgnoreErrors451() && ((int)msDoc.GetStatusCode() == 451)) { Proceed = false; } if (Proceed) { this.RenderListView( ListViewItems: ListViewItems, DocCollection: DocCollection, msDoc: msDoc, Url: msDoc.GetUrl() ); } else { this.RemoveFromListView(Url: msDoc.GetUrl()); } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; TotalDocs = (decimal)DocCollection.CountDocuments(); MajorPercentage = ((decimal)100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ private void BuildWorksheetPageDuplicateTitles( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; decimal Count = 0; decimal DocCount = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); DocCount = ( decimal )DocCollection.CountDocuments(); { ws.Cell(iRow, iCol).Value = "URL"; iCol++; ws.Cell(iRow, iCol).Value = "Occurrences"; iCol++; ws.Cell(iRow, iCol).Value = "Title"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { bool Proceed = false; if (DocCount > 0) { Count++; this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", Count), MinorPercentage: (( decimal )100 / DocCount) * Count, ProgressLabelMinor: msDoc.GetUrl(), SubMinorPercentage: -1, ProgressLabelSubMinor: null ); } if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl())) { switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: Proceed = false; break; } } if (Proceed) { string Title = msDoc.GetTitle(); int Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc); if (Occurrences > 1) { iCol = 1; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); if (msDoc.GetIsInternal()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences); if (Occurrences > 1) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, this.FormatIfMissing(Title)); iRow++; } } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void RenderTreeView( MacroscopeDocumentCollection DocCollection, List <string> UrlList ) { if (DocCollection.CountDocuments() == 0) { return; } if (UrlList.Count == 0) { return; } MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = ( decimal )UrlList.Count; decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } this.tvTreeView.BeginUpdate(); DebugMsg(string.Format("HIERARCHY: {0}", "BASE")); foreach (string Url in UrlList) { MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url); if (msDoc != null) { this.RenderTreeView(msDoc, Url); } Count++; MajorPercentage = (( decimal )100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } this.tvTreeView.ExpandAll(); this.tvTreeView.EndUpdate(); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ private void RenderListView( MacroscopeDocumentCollection DocCollection, List <string> UrlList, MacroscopeCustomFilters CustomFilter ) { if (this.FilterColOffset == -1) { throw (new Exception("this.FilterColOffset invalid")); } if (DocCollection.CountDocuments() == 0) { return; } MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); Dictionary <string, int> FilterColsTable = new Dictionary <string, int>(CustomFilter.GetSize()); List <ListViewItem> ListViewItems = new List <ListViewItem>(); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)DocCollection.CountDocuments(); decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { string FilterPattern = CustomFilter.GetPattern(Slot).Key; if (FilterColsTable.ContainsKey(FilterPattern)) { FilterColsTable.Add(string.Format("EMPTY{0}", Slot + 1), Slot + 1); } else { FilterColsTable.Add(FilterPattern, Slot + 1); } } foreach (string Url in UrlList) { MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url); ListViewItem lvItem = null; string DocUrl; string PairKey; string StatusCode; string Status; string MimeType; if (msDoc == null) { continue; } else { DocUrl = msDoc.GetUrl(); PairKey = DocUrl; StatusCode = ((int)msDoc.GetStatusCode()).ToString(); Status = msDoc.GetStatusCode().ToString(); MimeType = msDoc.GetMimeType(); } if (!CustomFilter.CanApplyCustomFiltersToDocument(msDoc: msDoc)) { continue; } if (this.DisplayListView.Items.ContainsKey(PairKey)) { lvItem = this.DisplayListView.Items[PairKey]; } else { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { lvItem.SubItems.Add(""); } ListViewItems.Add(lvItem); } if (lvItem != null) { try { lvItem.SubItems[ColUrl].Text = DocUrl; lvItem.SubItems[ColStatusCode].Text = StatusCode; lvItem.SubItems[ColStatus].Text = Status; lvItem.SubItems[ColMimeType].Text = MimeType; for (int Slot = 0; Slot < CustomFilter.GetSize(); Slot++) { string FilterPattern = CustomFilter.GetPattern(Slot: Slot).Key; KeyValuePair <string, MacroscopeConstants.TextPresence> Pair = msDoc.GetCustomFilteredItem(Text: FilterPattern); int ColOffset = this.FilterColOffset + FilterColsTable[FilterPattern]; if ((Pair.Key != null) && (Pair.Value != MacroscopeConstants.TextPresence.UNDEFINED)) { lvItem.SubItems[ColOffset].Text = MacroscopeConstants.TextPresenceLabels[Pair.Value]; switch (Pair.Value) { case MacroscopeConstants.TextPresence.CONTAINS_STRING: lvItem.SubItems[ColOffset].ForeColor = Color.Green; break; case MacroscopeConstants.TextPresence.NOT_CONTAINS_STRING: lvItem.SubItems[ColOffset].ForeColor = Color.Green; break; case MacroscopeConstants.TextPresence.MUST_CONTAIN_STRING: lvItem.SubItems[ColOffset].ForeColor = Color.Red; break; case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_STRING: lvItem.SubItems[ColOffset].ForeColor = Color.Red; break; case MacroscopeConstants.TextPresence.CONTAINS_REGEX: lvItem.SubItems[ColOffset].ForeColor = Color.Green; break; case MacroscopeConstants.TextPresence.NOT_CONTAINS_REGEX: lvItem.SubItems[ColOffset].ForeColor = Color.Green; break; case MacroscopeConstants.TextPresence.MUST_CONTAIN_REGEX: lvItem.SubItems[ColOffset].ForeColor = Color.Red; break; case MacroscopeConstants.TextPresence.SHOULD_NOT_CONTAIN_REGEX: lvItem.SubItems[ColOffset].ForeColor = Color.Red; break; default: lvItem.SubItems[ColOffset].ForeColor = Color.Gray; break; } } } } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.Message)); DebugMsg(string.Format("MacroscopeDisplayCustomFilters: {0}", ex.StackTrace)); } } else { DebugMsg(string.Format("MacroscopeDisplayCustomFilters MISSING: {0}", PairKey)); } if (msDoc.GetIsInternal()) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if (Regex.IsMatch(StatusCode, "^[2]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Green; lvItem.SubItems[ColStatus].ForeColor = Color.Green; } else if (Regex.IsMatch(StatusCode, "^[3]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod; lvItem.SubItems[ColStatus].ForeColor = Color.Goldenrod; } else if (Regex.IsMatch(StatusCode, "^[45]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Red; lvItem.SubItems[ColStatus].ForeColor = Color.Red; } else { lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue; lvItem.SubItems[ColStatus].ForeColor = Color.Blue; } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = ((decimal)100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize); this.DisplayListView.Columns[ColUrl].Width = 300; this.DisplayListView.Columns[ColStatusCode].Width = 100; this.DisplayListView.Columns[ColStatus].Width = 100; this.DisplayListView.Columns[ColMimeType].Width = 100; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ private void RenderTreeViewSummary(MacroscopeDocumentCollection DocCollection) { MacroscopeJobMaster JobMaster = this.MainForm.GetJobMaster(); this.tvTreeView.BeginUpdate(); try { { SortedDictionary <string, double> DataPoints = new SortedDictionary <string, double>(); { TreeNode[] Leaf = this.tvTreeView.Nodes.Find("UrlsFound", true); int Count = JobMaster.GetPagesFound(); if (Leaf.Length > 0) { Leaf[0].Text = string.Format("Total URLs Found: {0}", Count); } DataPoints.Add("URLs Found", (double)Count); } { TreeNode[] Leaf = this.tvTreeView.Nodes.Find("UrlsCrawled", true); int Count = DocCollection.CountDocuments(); if (Leaf.Length > 0) { Leaf[0].Text = string.Format("Total URLs Crawled: {0}", Count); } DataPoints.Add("URLs Crawled", (double)Count); } { TreeNode[] Leaf = this.tvTreeView.Nodes.Find("UrlsInternal", true); ulong Count = DocCollection.CountUrlsInternal(); if (Leaf.Length > 0) { Leaf[0].Text = string.Format("Total Internal URLs: {0}", Count); } DataPoints.Add("Internal URLs", (double)Count); } { TreeNode[] Leaf = this.tvTreeView.Nodes.Find("UrlsExternal", true); ulong Count = DocCollection.CountUrlsExternal(); if (Leaf.Length > 0) { Leaf[0].Text = string.Format("Total External URLs: {0}", Count); } DataPoints.Add("External URLs", (double)Count); } this.SiteStructurePanelCharts.UpdateSiteSummary(DataPoints: DataPoints); } { SortedDictionary <string, double> DataPoints = new SortedDictionary <string, double>(); decimal Fastest = DocCollection.GetStatsDurationsFastest(); decimal Slowest = DocCollection.GetStatsDurationsSlowest(); decimal Average = DocCollection.GetStatsDurationAverage(); { TreeNode[] Leaf = this.tvTreeView.Nodes.Find("FASTESTPAGERESPONSE", true); if (Leaf.Length > 0) { Leaf[0].Text = string.Format("Fastest Page Response: {0:0.00} secs", Fastest); DataPoints.Add("Fastest Page Response", (double)Fastest); } else { DataPoints.Add("Fastest Page Response", 0); } } { TreeNode[] Leaf = this.tvTreeView.Nodes.Find("SLOWESTPAGERESPONSE", true); if (Leaf.Length > 0) { Leaf[0].Text = string.Format("Slowest Page Response: {0:0.00} secs", Slowest); DataPoints.Add("Slowest Page Response", (double)Slowest); } else { DataPoints.Add("Slowest Page Response", 0); } } { TreeNode[] Leaf = this.tvTreeView.Nodes.Find("AVERAGEPAGEDURATION", true); if (Leaf.Length > 0) { Leaf[0].Text = string.Format("Average Page Duration: {0:0.00} secs", Average); DataPoints.Add("Average Page Duration", (double)Average); } else { DataPoints.Add("Average Page Duration", 0); } } this.SiteStructurePanelCharts.UpdateResponseTimes(DataPoints: DataPoints); } { TreeNode[] Leaf = this.tvTreeView.Nodes.Find("UrlsRobotsBlocked", true); int Count = JobMaster.GetBlockedByRobotsList().Count; if (Leaf.Length > 0) { Leaf[0].Text = string.Format("URLs Blocked by Robots.txt: {0}", Count); } } { TreeNode[] Leaf = this.tvTreeView.Nodes.Find("SitemapsFound", true); ulong Count = DocCollection.CountUrlsSitemaps(); if (Leaf.Length > 0) { Leaf[0].Text = string.Format("Sitemaps Found: {0}", Count); } } { TreeNode[] Leaves = this.tvTreeView.Nodes.Find("FETCH_WARNINGS", true); if (Leaves.Length > 0) { TreeNode Leaf = Leaves[0]; if (Leaf != null) { Dictionary <string, int> dicMessages = DocCollection.GetStatsWarningsCount(); Leaf.Nodes.Clear(); foreach (string MessagesKey in dicMessages.Keys) { Leaf.Nodes.Add(string.Format("{0}: {1}", MessagesKey, dicMessages[MessagesKey])); } } } } { TreeNode[] Leaves = this.tvTreeView.Nodes.Find("FETCH_ERRORS", true); if (Leaves.Length > 0) { TreeNode Leaf = Leaves[0]; if (Leaf != null) { Dictionary <string, int> dicMessages = DocCollection.GetStatsErrorsCount(); Leaf.Nodes.Clear(); foreach (string MessagesKey in dicMessages.Keys) { Leaf.Nodes.Add(string.Format("{0}: {1}", MessagesKey, dicMessages[MessagesKey])); } } } } { Dictionary <bool, int> Canonicals = DocCollection.GetStatsCanonicalsCount(); { TreeNode[] Leaf = this.tvTreeView.Nodes.Find("CANONICALS_SPECIFIED_SPECIFIED", true); if ((Leaf.Length > 0) && (Canonicals.ContainsKey(true))) { Leaf[0].Text = string.Format("Specified: {0}", Canonicals[true]); } else { Leaf[0].Text = string.Format("Specified: {0}", 0); } } { TreeNode[] Leaf = this.tvTreeView.Nodes.Find("CANONICALS_SPECIFIED_NOT_SPECIFIED", true); if ((Leaf.Length > 0) && (Canonicals.ContainsKey(false))) { Leaf[0].Text = string.Format("Not Specified: {0}", Canonicals[false]); } else { Leaf[0].Text = string.Format("Not Specified: {0}", 0); } } } { { TreeNode[] Leaves = this.tvTreeView.Nodes.Find("DOCUMENT_TYPES_FOUND_INTERNAL", true); if (Leaves.Length > 0) { TreeNode Leaf = Leaves[0]; if (Leaf != null) { Dictionary <string, int> Stats = DocCollection.GetStatsDocumentTypesInternalCount(); Leaf.Nodes.Clear(); foreach (string ContentKey in Stats.Keys) { TreeNode LeafNode = Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, Stats[ContentKey])); LeafNode.Tag = "DOCUMENT_TYPES_FOUND_INTERNAL_TYPE"; // For graph selection click } } } } { TreeNode[] Leaves = this.tvTreeView.Nodes.Find("DOCUMENT_TYPES_FOUND_EXTERNAL", true); if (Leaves.Length > 0) { TreeNode Leaf = Leaves[0]; if (Leaf != null) { Dictionary <string, int> Stats = DocCollection.GetStatsDocumentTypesExternalCount(); Leaf.Nodes.Clear(); foreach (string ContentKey in Stats.Keys) { TreeNode LeafNode = Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, Stats[ContentKey])); LeafNode.Tag = "DOCUMENT_TYPES_FOUND_EXTERNAL_TYPE"; // For graph selection click } } } } } { SortedDictionary <string, double> DataPoints = new SortedDictionary <string, double>(); TreeNode[] Leaves = this.tvTreeView.Nodes.Find("LANGUAGES_SPECIFIED_PAGES", true); if (Leaves.Length > 0) { TreeNode Leaf = Leaves[0]; if (Leaf != null) { Dictionary <string, int> dicContents = DocCollection.GetStatsLanguagesPagesCount(); Leaf.Nodes.Clear(); foreach (string ContentKey in dicContents.Keys) { TreeNode LeafNode = Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, dicContents[ContentKey])); LeafNode.Tag = "LANGUAGES_SPECIFIED_PAGES_LANG"; DataPoints.Add(ContentKey, (double)dicContents[ContentKey]); } } this.SiteStructurePanelCharts.UpdateLanguagesSpecified(DataPoints: DataPoints); } } { TreeNode[] Leaves = this.tvTreeView.Nodes.Find("LANGUAGES_DETECTED_TITLES", true); if (Leaves.Length > 0) { TreeNode Leaf = Leaves[0]; if (Leaf != null) { Dictionary <string, int> dicContents = DocCollection.GetStatsLanguagesTitlesCount(); Leaf.Nodes.Clear(); foreach (string ContentKey in dicContents.Keys) { Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, dicContents[ContentKey])); } } } } { TreeNode[] Leaves = this.tvTreeView.Nodes.Find("LANGUAGES_DETECTED_DESCRIPTIONS", true); if (Leaves.Length > 0) { TreeNode Leaf = Leaves[0]; if (Leaf != null) { Dictionary <string, int> dicContents = DocCollection.GetStatsLanguagesDescriptionsCount(); Leaf.Nodes.Clear(); foreach (string ContentKey in dicContents.Keys) { Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, dicContents[ContentKey])); } } } } { TreeNode[] Leaves = this.tvTreeView.Nodes.Find("LANGUAGES_DETECTED_BODYTEXTS", true); if (Leaves.Length > 0) { TreeNode Leaf = Leaves[0]; if (Leaf != null) { Dictionary <string, int> dicContents = DocCollection.GetStatsLanguagesBodyTextsCount(); Leaf.Nodes.Clear(); foreach (string ContentKey in dicContents.Keys) { Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, dicContents[ContentKey])); } } } } { SortedDictionary <string, double> DataPoints = new SortedDictionary <string, double>(); TreeNode[] Leaves = this.tvTreeView.Nodes.Find("TEXT_READABILITY", true); if (Leaves.Length > 0) { TreeNode Leaf = Leaves[0]; if (Leaf != null) { SortedDictionary <string, int> dicContents = DocCollection.GetStatsReadabilityGradeStringsCount(); Leaf.Nodes.Clear(); foreach (string ContentKey in dicContents.Keys) { TreeNode LeafLeaf = Leaf.Nodes.Add(string.Format("{0}: {1}", ContentKey, dicContents[ContentKey])); LeafLeaf.Tag = "TEXT_READABILITY_NODE"; DataPoints.Add(ContentKey, (double)dicContents[ContentKey]); } } this.SiteStructurePanelCharts.UpdateReadability(DataPoints: DataPoints); } } } catch (Exception ex) { this.DebugMsg(ex.Message); this.DebugMsg(ex.Source); } this.tvTreeView.EndUpdate(); }
/**************************************************************************/ private void BuildWorksheetPageDuplicatePages( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; decimal DocCount = 0; decimal DocListCount = 0; decimal CountOuter = 0; decimal CountInner = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, bool> CrossCheckList; CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList( Capacity: DocCollection.CountDocuments() ); DocCount = ( decimal )DocCollection.CountDocuments(); { ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Origin URL"; iCol++; ws.Cell(iRow, iCol).Value = "Distance"; iCol++; ws.Cell(iRow, iCol).Value = "Similar URL"; } iColMax = iCol; iRow++; foreach (string UrlLeft in DocCollection.DocumentUrls()) { MacroscopeDocument msDocLeft = DocCollection.GetDocumentByUrl(Url: UrlLeft); MacroscopeLevenshteinAnalysis LevenshteinAnalysis = null; CountOuter++; CountInner = 0; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: UrlLeft, SubMinorPercentage: 0, ProgressLabelSubMinor: "" ); } if (msDocLeft.GetIsExternal()) { continue; } if (!msDocLeft.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML)) { continue; } LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis( msDoc: msDocLeft, SizeDifference: MacroscopePreferencesManager.GetMaxLevenshteinSizeDifference(), Threshold: MacroscopePreferencesManager.GetMaxLevenshteinDistance(), CrossCheckList: CrossCheckList, IPercentageDone: this ); Dictionary <MacroscopeDocument, int> DocList; DocList = LevenshteinAnalysis.AnalyzeDocCollection( DocCollection: DocCollection ); DocListCount = ( decimal )DocList.Count; foreach (MacroscopeDocument msDocDuplicate in DocList.Keys) { int StatusCode = ( int )msDocLeft.GetStatusCode(); HttpStatusCode Status = msDocLeft.GetStatusCode(); string UrlDuplicate = msDocDuplicate.GetUrl(); int Distance = DocList[msDocDuplicate]; CountInner++; iCol = 1; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: UrlLeft, SubMinorPercentage: (( decimal )100 / DocListCount) * CountInner, ProgressLabelSubMinor: UrlDuplicate ); } this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode); iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, UrlLeft); if (AllowedHosts.IsInternalUrl(Url: UrlLeft)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Distance.ToString()); if (Distance <= MacroscopePreferencesManager.GetMaxLevenshteinDistance()) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, UrlDuplicate); if (AllowedHosts.IsInternalUrl(Url: UrlDuplicate)) { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); } else { ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Gray); } iRow++; if (this.ProgressForm.Cancelled()) { break; } } if (this.ProgressForm.Cancelled()) { break; } //Thread.Yield(); } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
public void TestDifferent() { const string StartUrl = "https://nazuke.github.io/SEOMacroscope/"; MacroscopeJobMaster JobMaster = new MacroscopeJobMaster( JobRunTimeMode: MacroscopeConstants.RunTimeMode.LIVE, TaskController: this ); MacroscopeDocumentCollection DocCollection = new MacroscopeDocumentCollection(JobMaster: JobMaster); Dictionary <string, Boolean> CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList(Capacity: DocCollection.CountDocuments()); MacroscopeDocument msDoc = DocCollection.CreateDocument(StartUrl); msDoc.Execute(); DocCollection.AddDocument(msDoc); DebugMsg(string.Format("msDoc: {0}", msDoc.GetStatusCode())); MacroscopeLevenshteinAnalysis LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis( msDoc: msDoc, SizeDifference: 64, Threshold: 16, CrossCheckList: CrossCheckList ); List <string> TargetUrls = new List <string> () { { "https://nazuke.github.io/SEOMacroscope/blog/" }, { "https://nazuke.github.io/SEOMacroscope/downloads/" }, { "https://nazuke.github.io/SEOMacroscope/manual/" } }; foreach (string TargetUrl in TargetUrls) { MacroscopeDocument msDocTarget = DocCollection.CreateDocument(TargetUrl); msDocTarget.Execute(); DocCollection.AddDocument(msDocTarget); DebugMsg(string.Format("msDocTarget: {0}", msDocTarget.GetStatusCode())); } for (int i = 1; i <= 10; i++) { Dictionary <MacroscopeDocument, int> DocList; DocList = LevenshteinAnalysis.AnalyzeDocCollection( DocCollection: DocCollection ); DebugMsg(string.Format("DocList: {0}", DocList.Count)); foreach (MacroscopeDocument msDocAnalyzed in DocList.Keys) { DebugMsg(string.Format("msDocAnalyzed: {0} => {1}", DocList[msDocAnalyzed], msDocAnalyzed.GetUrl())); Assert.AreNotEqual( DocList[msDocAnalyzed], 0, string.Format( "FAIL: {0} => {1}", DocList[msDocAnalyzed], msDocAnalyzed.GetUrl() ) ); } } }
/**************************************************************************/ public void RefreshKeywordAnalysisDataProgress(MacroscopeDocumentCollection DocCollection) { MacroscopeDoublePercentageProgressForm ProgressForm = new MacroscopeDoublePercentageProgressForm(this.MainForm); decimal MajorPercentage = 0; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing keyword terms collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: "", MinorPercentage: 0, ProgressLabelMinor: "" ); } try { ProgressForm.TopMost = true; } catch (Exception ex) { DebugMsg(string.Format("ProgressForm.Show(): {0}", ex.Message)); } for (int i = 0; i <= 3; i++) { List <ListViewItem> ListViewItems = new List <ListViewItem> (DocCollection.CountDocuments()); Application.DoEvents(); if (!ProgressForm.Cancelled()) { Dictionary <string, int> DicTerms = DocCollection.GetDeepKeywordAnalysisAsDictonary(Words: i + 1); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { MajorPercentage = (( decimal )100 / ( decimal )4) * ( decimal )(i + 1); ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("{0} Word Keywords", i + 1), MinorPercentage: 0, ProgressLabelMinor: "" ); } this.TargetListViews[i].BeginUpdate(); this.RenderKeywordAnalysisListView( ListViewItems: ListViewItems, TargetListView: this.TargetListViews[i], DicTerms: DicTerms, ProgressForm: ProgressForm ); this.TargetListViews[i].Items.AddRange(ListViewItems.ToArray()); this.TargetListViews[i].EndUpdate(); } } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ public void RefreshSiteSpeedData(MacroscopeDocumentCollection DocCollection) { if (this.lvListViewFastest.IsDisposed || this.lvListViewSlowest.IsDisposed) { return; } if (DocCollection.CountDocuments() > 0) { const int MeasurePages = 20; decimal Average = 0; int Count = 0; decimal Maximus = 0; SortedList <decimal, string> SortedListAll = new SortedList <decimal, string> (DocCollection.CountDocuments(), this.DecimalSorterAscending); SortedList <decimal, string> SortedListSlowest = new SortedList <decimal, string> (MeasurePages, this.DecimalSorterDescending); SortedList <decimal, string> SortedListFastest = new SortedList <decimal, string> (MeasurePages, this.DecimalSorterAscending); foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Url = msDoc.GetUrl(); decimal Duration = msDoc.GetDurationInSeconds(); if (msDoc.GetIsInternal() && msDoc.GetWasDownloaded()) { Count++; Maximus += Duration; if (SortedListAll.ContainsKey(Duration)) { SortedListAll[Duration] = Url; } else { SortedListAll.Add(Duration, Url); } } } foreach (decimal Duration in SortedListAll.Keys.Take(MeasurePages)) { SortedListFastest.Add(Duration, SortedListAll[Duration]); } foreach (decimal Duration in SortedListAll.Keys.Reverse().Take(MeasurePages)) { SortedListSlowest.Add(Duration, SortedListAll[Duration]); } if (Count > 0) { Average = Maximus / Count; } if (this.MainForm.InvokeRequired) { this.MainForm.Invoke( new MethodInvoker( delegate { Cursor.Current = Cursors.WaitCursor; this.RenderSiteSpeedListView(this.lvListViewSlowest, SortedListSlowest); this.RenderSiteSpeedListView(this.lvListViewFastest, SortedListFastest); this.UpdateAverageLabel(Average); Cursor.Current = Cursors.Default; } ) ); } else { Cursor.Current = Cursors.WaitCursor; this.RenderSiteSpeedListView(this.lvListViewSlowest, SortedListSlowest); this.RenderSiteSpeedListView(this.lvListViewFastest, SortedListFastest); this.UpdateAverageLabel(Average); Cursor.Current = Cursors.Default; } } }
/** Render DocCollection Filtered by URL Fragment *************************/ public void RenderListView( MacroscopeDocumentCollection DocCollection, string UrlFragment ) { if (DocCollection.CountDocuments() == 0) { return; } List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments()); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)DocCollection.CountDocuments(); decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.ControlBox = false; ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { Application.DoEvents(); if (msDoc != null) { string Url = msDoc.GetUrl(); if (Url.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0) { this.RenderListView( ListViewItems: ListViewItems, DocCollection: DocCollection, msDoc: msDoc, Url: Url ); } } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = ((decimal)100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/** Render Filtered DocCollection *******************************************/ public void RenderListView( MacroscopeDocumentCollection DocCollection, MacroscopeConstants.DocumentType DocumentType ) { if (DocCollection.CountDocuments() == 0) { return; } List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments()); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)DocCollection.CountDocuments(); decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.ControlBox = false; ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { Application.DoEvents(); if (msDoc != null) { switch (DocumentType) { case MacroscopeConstants.DocumentType.INTERNALURL: if (msDoc.GetIsInternal()) { this.RenderListView( ListViewItems: ListViewItems, DocCollection: DocCollection, msDoc: msDoc, Url: msDoc.GetUrl() ); } break; case MacroscopeConstants.DocumentType.EXTERNALURL: if (msDoc.GetIsExternal()) { this.RenderListView( ListViewItems: ListViewItems, DocCollection: DocCollection, msDoc: msDoc, Url: msDoc.GetUrl() ); } break; default: if ( (msDoc.GetDocumentType() == DocumentType) || (DocumentType == MacroscopeConstants.DocumentType.ALL)) { this.RenderListView( ListViewItems: ListViewItems, DocCollection: DocCollection, msDoc: msDoc, Url: msDoc.GetUrl() ); } break; } } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = ((decimal)100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ private void RenderListViewRedirectChains(MacroscopeDocumentCollection DocCollection) { List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments()); List <List <MacroscopeRedirectChainDocStruct> > RedirectChains = DocCollection.GetMacroscopeRedirectChains(); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)DocCollection.CountDocuments(); decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.ControlBox = false; ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (List <MacroscopeRedirectChainDocStruct> DocList in RedirectChains) { Application.DoEvents(); if (DocList.Count > 0) { try { this.RenderListViewRedirectChains( ListViewItems: ListViewItems, DocCollection: DocCollection, DocList: DocList ); } catch (Exception ex) { this.DebugMsg(string.Format("RenderListViewRedirectChains 1: {0}", ex.Message)); } } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = ((decimal)100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent); this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } ProgressForm.Dispose(); }
/**************************************************************************/ private void BuildWorksheetPageDuplicateEtags( MacroscopeJobMaster JobMaster, CsvWriter ws ) { decimal CountOuter = 0; decimal CountInner = 0; decimal DocCount = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments()); Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments()); DocCount = ( decimal )DocCollection.CountDocuments(); foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Etag = msDoc.GetEtag(); if ((Etag != null) && (Etag.Length > 0)) { if (!DuplicatesDocList.ContainsKey(msDoc.GetUrl())) { DuplicatesDocList.Add(msDoc.GetUrl(), msDoc); } if (DuplicatesList.ContainsKey(Etag)) { DuplicatesList[Etag] = DuplicatesList[Etag] + 1; } else { DuplicatesList.Add(Etag, 1); } } } { ws.WriteField("Status Code"); ws.WriteField("Status"); ws.WriteField("Occurrences"); ws.WriteField("ETag"); ws.WriteField("URL"); ws.NextRecord(); } foreach (string Etag in DuplicatesList.Keys) { CountOuter++; CountInner = 0; if (DuplicatesList[Etag] > 1) { foreach (MacroscopeDocument msDoc in DuplicatesDocList.Values) { CountInner++; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: Etag, SubMinorPercentage: (( decimal )100 / DocCount) * CountInner, ProgressLabelSubMinor: msDoc.GetUrl() ); } if (msDoc.GetEtag() == Etag) { int StatusCode = ( int )msDoc.GetStatusCode(); HttpStatusCode Status = msDoc.GetStatusCode(); int Occurrences = DuplicatesList[Etag]; this.InsertAndFormatStatusCodeCell(ws, StatusCode); this.InsertAndFormatStatusCodeCell(ws, Status); this.InsertAndFormatContentCell(ws, Occurrences); this.InsertAndFormatContentCell(ws, msDoc.GetEtag()); this.InsertAndFormatUrlCell(ws, msDoc); ws.NextRecord(); } } } } }
public async Task TestDuplicate() { const string StartUrl = "https://nazuke.github.io/SEOMacroscope/"; const string DupeUrl = "https://nazuke.github.io/SEOMacroscope/index.html"; MacroscopeJobMaster JobMaster; MacroscopeDocumentCollection DocCollection; Dictionary <string, bool> CrossCheckList; MacroscopeDocument msDoc; MacroscopeDocument msDocDifferent; JobMaster = new MacroscopeJobMaster( JobRunTimeMode: MacroscopeConstants.RunTimeMode.LIVE, TaskController: this ); DocCollection = new MacroscopeDocumentCollection(JobMaster: JobMaster); CrossCheckList = MacroscopeLevenshteinAnalysis.GetCrossCheckList(Capacity: DocCollection.CountDocuments()); msDoc = DocCollection.CreateDocument(StartUrl); msDocDifferent = DocCollection.CreateDocument(DupeUrl); await msDoc.Execute(); await msDocDifferent.Execute(); DebugMsg(string.Format("msDoc: {0}", msDoc.GetStatusCode())); DebugMsg(string.Format("msDocDifferent: {0}", msDocDifferent.GetStatusCode())); for (int i = 1; i <= 100; i++) { MacroscopeLevenshteinAnalysis LevenshteinAnalysis; Dictionary <MacroscopeDocument, int> DocList; LevenshteinAnalysis = new MacroscopeLevenshteinAnalysis( msDoc: msDoc, SizeDifference: 64, Threshold: 16, CrossCheckList: CrossCheckList ); DocList = LevenshteinAnalysis.AnalyzeDocCollection(DocCollection: DocCollection); DebugMsg(string.Format("DocList: {0}", DocList.Count)); foreach (MacroscopeDocument msDocAnalyzed in DocList.Keys) { DebugMsg(string.Format("msDocAnalyzed: {0} => {1}", DocList[msDocAnalyzed], msDocAnalyzed.GetUrl())); Assert.AreEqual( DocList[msDocAnalyzed], 0, string.Format("FAIL: {0} => {1}", DocList[msDocAnalyzed], msDocAnalyzed.GetUrl()) ); } } }
/**************************************************************************/ private void BuildWorksheetPageDuplicateTitles( MacroscopeJobMaster JobMaster, CsvWriter ws ) { decimal Count = 0; decimal DocCount = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); DocCount = ( decimal )DocCollection.CountDocuments(); { ws.WriteField("URL"); ws.WriteField("Occurrences"); ws.WriteField("Title"); ws.NextRecord(); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { bool Proceed = false; if (DocCount > 0) { Count++; this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", Count), MinorPercentage: (( decimal )100 / DocCount) * Count, ProgressLabelMinor: msDoc.GetUrl(), SubMinorPercentage: -1, ProgressLabelSubMinor: null ); } if (AllowedHosts.IsInternalUrl(Url: msDoc.GetUrl())) { switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: Proceed = false; break; } } if (Proceed) { string Title = msDoc.GetTitle(); int Occurrences = DocCollection.GetStatsTitleCount(msDoc: msDoc); if (Occurrences > 1) { this.InsertAndFormatUrlCell(ws, msDoc); this.InsertAndFormatContentCell(ws, Occurrences); this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Title)); ws.NextRecord(); } } } }
/**************************************************************************/ private void BuildWorksheetKeywordsPresence( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel, MacroscopeDocumentCollection DocCollection ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; decimal DocCount = 0; decimal DocTotal = (decimal)DocCollection.CountDocuments(); { ws.Cell(iRow, iCol).Value = "Presence"; iCol++; ws.Cell(iRow, iCol).Value = "Keyword"; iCol++; ws.Cell(iRow, iCol).Value = "URL"; } iColMax = iCol; iRow++; foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { List <KeyValuePair <string, MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS> > KeywordPresence; KeywordPresence = DocCollection.GetKeywordPresenceAnalysis(msDoc: msDoc); if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: null, MinorPercentage: ((decimal)100 / DocTotal) * (decimal)DocCount, ProgressLabelMinor: "Documents Processed" ); } if (KeywordPresence != null) { foreach (KeyValuePair <string, MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS> Pair in KeywordPresence) { MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS Present = Pair.Value; string Keyword = Pair.Key; iCol = 1; this.InsertAndFormatContentCell(ws, iRow, iCol, Pair.Value.ToString()); switch (Pair.Value) { case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.KEYWORDS_METATAG_EMPTY: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); break; case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.MALFORMED_KEYWORDS_METATAG: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); break; case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.PRESENT_IN_TITLE: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); break; case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.MISSING_IN_TITLE: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); break; case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.PRESENT_IN_DESCRIPTION: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); break; case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.MISSING_IN_DESCRIPTION: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Orange); break; case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.PRESENT_IN_BODY: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Green); break; case MacroscopeKeywordPresenceAnalysis.KEYWORD_STATUS.MISSING_IN_BODY: ws.Cell(iRow, iCol).Style.Font.SetFontColor(XLColor.Red); break; default: break; } iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Keyword); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc.GetUrl()); iRow++; } } DocCount++; } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/**************************************************************************/ private void BuildWorksheetPageDuplicateChecksums( MacroscopeJobMaster JobMaster, XLWorkbook wb, string WorksheetLabel ) { var ws = wb.Worksheets.Add(WorksheetLabel); int iRow = 1; int iCol = 1; int iColMax = 1; decimal CountOuter = 0; decimal CountInner = 0; decimal DocCount = 0; MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection(); MacroscopeAllowedHosts AllowedHosts = JobMaster.GetAllowedHosts(); Dictionary <string, int> DuplicatesList = new Dictionary <string, int> (DocCollection.CountDocuments()); Dictionary <string, MacroscopeDocument> DuplicatesDocList = new Dictionary <string, MacroscopeDocument> (DocCollection.CountDocuments()); DocCount = ( decimal )DocCollection.CountDocuments(); foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Checksum = msDoc.GetChecksum(); if ((Checksum != null) && (Checksum.Length > 0)) { if (!DuplicatesDocList.ContainsKey(msDoc.GetUrl())) { DuplicatesDocList.Add(msDoc.GetUrl(), msDoc); } if (DuplicatesList.ContainsKey(Checksum)) { DuplicatesList[Checksum] = DuplicatesList[Checksum] + 1; } else { DuplicatesList.Add(Checksum, 1); } } } { ws.Cell(iRow, iCol).Value = "Status Code"; iCol++; ws.Cell(iRow, iCol).Value = "Status"; iCol++; ws.Cell(iRow, iCol).Value = "Occurrences"; iCol++; ws.Cell(iRow, iCol).Value = "Checksum"; iCol++; ws.Cell(iRow, iCol).Value = "URL"; } iColMax = iCol; iRow++; foreach (string Checksum in DuplicatesList.Keys) { CountOuter++; CountInner = 0; if (DuplicatesList[Checksum] > 1) { foreach (MacroscopeDocument msDoc in DuplicatesDocList.Values) { CountInner++; if (DocCount > 0) { this.ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: -1, ProgressLabelMajor: string.Format("Documents Processed: {0}", CountOuter), MinorPercentage: (( decimal )100 / DocCount) * CountOuter, ProgressLabelMinor: Checksum, SubMinorPercentage: (( decimal )100 / DocCount) * CountInner, ProgressLabelSubMinor: msDoc.GetUrl() ); } if (msDoc.GetChecksum() == Checksum) { iCol = 1; int StatusCode = ( int )msDoc.GetStatusCode(); HttpStatusCode Status = msDoc.GetStatusCode(); int Occurrences = DuplicatesList[Checksum]; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, StatusCode); iCol++; this.InsertAndFormatStatusCodeCell(ws, iRow, iCol, Status); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, Occurrences); iCol++; this.InsertAndFormatContentCell(ws, iRow, iCol, msDoc.GetChecksum()); iCol++; this.InsertAndFormatUrlCell(ws, iRow, iCol, msDoc); iRow++; } } } } { var rangeData = ws.Range(1, 1, iRow - 1, iColMax); var excelTable = rangeData.CreateTable(); } }
/** XML Sitemap Generators ************************************************/ public XmlDocument GenerateXmlSitemap(string Host) { Dictionary <string, bool> Dedupe = new Dictionary <string, bool>(DocCollection.CountDocuments()); XmlDocument SitemapXml = new XmlDocument(); XmlDeclaration SitemapXmlDeclaration = SitemapXml.CreateXmlDeclaration("1.0", "UTF-8", null); XmlElement RootNode = SitemapXml.DocumentElement; XmlElement UrlSetNode = SitemapXml.CreateElement(string.Empty, "urlset", MacroscopeSitemapGenerator.XmlNamespace); SitemapXml.InsertBefore(SitemapXmlDeclaration, RootNode); SitemapXml.AppendChild(UrlSetNode); foreach (MacroscopeDocument msDoc in this.DocCollection.IterateDocuments()) { bool Proceed = false; if (!msDoc.GetStatusCode().Equals(HttpStatusCode.OK)) { continue; } if ( (!msDoc.GetIsInternal()) || (msDoc.GetIsRedirect())) { continue; } switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: break; } if (!string.IsNullOrEmpty(Host)) { if (msDoc.GetHostAndPort().Equals(Host)) { Proceed = true; } else { Proceed = false; } } if (Proceed) { XmlElement UrlNode = SitemapXml.CreateElement(string.Empty, "url", MacroscopeSitemapGenerator.XmlNamespace); UrlSetNode.AppendChild(UrlNode); { XmlElement EntryNode = SitemapXml.CreateElement(string.Empty, "loc", MacroscopeSitemapGenerator.XmlNamespace); XmlText TextNode = SitemapXml.CreateTextNode(msDoc.GetUrl()); UrlNode.AppendChild(EntryNode); EntryNode.AppendChild(TextNode); } { XmlElement EntryNode = SitemapXml.CreateElement(string.Empty, "lastmod", MacroscopeSitemapGenerator.XmlNamespace); XmlText TextNode = SitemapXml.CreateTextNode(msDoc.GetDateModifiedForSitemapXml()); UrlNode.AppendChild(EntryNode); EntryNode.AppendChild(TextNode); } { XmlElement EntryNode = SitemapXml.CreateElement(string.Empty, "changefreq", MacroscopeSitemapGenerator.XmlNamespace); XmlText TextNode = SitemapXml.CreateTextNode("daily"); UrlNode.AppendChild(EntryNode); EntryNode.AppendChild(TextNode); } { XmlElement EntryNode = SitemapXml.CreateElement(string.Empty, "priority", MacroscopeSitemapGenerator.XmlNamespace); XmlText TextNode = SitemapXml.CreateTextNode("1.0"); UrlNode.AppendChild(EntryNode); EntryNode.AppendChild(TextNode); } if ( MacroscopePreferencesManager.GetSitemapIncludeLinkedPdfs() && msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML)) { this.GenerateXmlSitemapPdfEntries( msDoc: msDoc, SitemapXml: SitemapXml, UrlSetNode: UrlSetNode, Dedupe: Dedupe ); } } } return(SitemapXml); }
/**************************************************************************/ private void RenderListView( MacroscopeDocumentCollection DocCollection, List <string> UrlList, MacroscopeDataExtractorCssSelectors DataExtractor ) { MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); if (DocCollection.CountDocuments() == 0) { return; } List <ListViewItem> ListViewItems = new List <ListViewItem> (); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = ( decimal )DocCollection.CountDocuments(); decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (string Url in UrlList) { MacroscopeDocument msDoc = DocCollection.GetDocumentByUrl(Url: Url); string DocUrl = msDoc.GetUrl(); string StatusCode = (( int )msDoc.GetStatusCode()).ToString(); string Status = msDoc.GetStatusCode().ToString(); string MimeType = msDoc.GetMimeType(); if (!DataExtractor.CanApplyDataExtractorsToDocument(msDoc: msDoc)) { continue; } foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedCssSelectors()) { ListViewItem lvItem = null; string CssSelectorLabel = DataExtractedPair.Key; string ExtractedValue = DataExtractedPair.Value; string PairKey = null; if ( string.IsNullOrEmpty(CssSelectorLabel) || string.IsNullOrEmpty(ExtractedValue)) { continue; } PairKey = string.Join( ":", UrlToDigest(DocUrl), UrlToDigest(Macroscope.GetStringDigest(Text: CssSelectorLabel)), UrlToDigest(Macroscope.GetStringDigest(Text: ExtractedValue)) ); if (this.DisplayListView.Items.ContainsKey(PairKey)) { lvItem = this.DisplayListView.Items[PairKey]; } else { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; for (int i = 0; i < 6; i++) { lvItem.SubItems.Add(""); } ListViewItems.Add(lvItem); } if (lvItem != null) { try { lvItem.SubItems[ColUrl].Text = DocUrl; lvItem.SubItems[ColStatusCode].Text = StatusCode; lvItem.SubItems[ColStatus].Text = Status; lvItem.SubItems[ColMimeType].Text = MimeType; lvItem.SubItems[ColCssSelectorLabel].Text = CssSelectorLabel; lvItem.SubItems[ColExtractedValue].Text = ExtractedValue; } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors: {0}", ex.Message)); DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors: {0}", ex.StackTrace)); } } else { DebugMsg(string.Format("MacroscopeDisplayDataExtractorCssSelectors MISSING: {0}", PairKey)); } if (msDoc.GetIsInternal()) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if (Regex.IsMatch(StatusCode, "^[2]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Green; lvItem.SubItems[ColStatus].ForeColor = Color.Green; } else if (Regex.IsMatch(StatusCode, "^[3]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Goldenrod; lvItem.SubItems[ColStatus].ForeColor = Color.Goldenrod; } else if (Regex.IsMatch(StatusCode, "^[45]")) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Red; lvItem.SubItems[ColStatus].ForeColor = Color.Red; } else { lvItem.SubItems[ColStatusCode].ForeColor = Color.Blue; lvItem.SubItems[ColStatus].ForeColor = Color.Blue; } } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = (( decimal )100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent); this.DisplayListView.Columns[ColUrl].Width = 300; this.DisplayListView.Columns[ColStatusCode].Width = 100; this.DisplayListView.Columns[ColStatus].Width = 100; this.DisplayListView.Columns[ColMimeType].Width = 100; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ private void RenderListView( MacroscopeDocumentCollection DocCollection, Dictionary <string, string> LocalesList ) { MacroscopeAllowedHosts AllowedHosts = this.MainForm.GetJobMaster().GetAllowedHosts(); SortedDictionary <string, int> LocaleColsTable = new SortedDictionary <string, int> (); if (DocCollection.CountDocuments() == 0) { return; } List <ListViewItem> ListViewItems = new List <ListViewItem> (); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = ( decimal )DocCollection.CountDocuments(); decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } this.DisplayListView.Items.Clear(); this.DisplayListView.Columns.Clear(); { int LocaleColCount = 5; this.DisplayListView.Columns.Add("URL", "URL"); this.DisplayListView.Columns.Add("Status Code", "Status Code"); this.DisplayListView.Columns.Add("Site Locale", "Site Locale"); this.DisplayListView.Columns.Add("HrefLang Present", "HrefLang Present"); this.DisplayListView.Columns.Add("Title", "Title"); foreach (string Locale in LocalesList.Keys) { string LocaleLabel = Locale.ToUpper(); string DateServerLabel = string.Format("{0} Date Server", Locale.ToUpper()); string DateModifiedLabel = string.Format("{0} Date Modified", Locale.ToUpper()); this.DisplayListView.Columns.Add(LocaleLabel, LocaleLabel); this.DisplayListView.Columns.Add(DateServerLabel, DateServerLabel); this.DisplayListView.Columns.Add(DateModifiedLabel, DateModifiedLabel); LocaleColsTable[Locale] = LocaleColCount; LocaleColCount++; LocaleColsTable[DateServerLabel] = LocaleColCount; LocaleColCount++; LocaleColsTable[DateModifiedLabel] = LocaleColCount; LocaleColCount++; } } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { bool Proceed = false; if (msDoc.GetIsInternal()) { Proceed = true; if (msDoc.GetIsRedirect()) { Proceed = false; } if (!msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML)) { Proceed = false; } } if (Proceed) { Dictionary <string, MacroscopeHrefLang> HrefLangsTable = msDoc.GetHrefLangs(); string DocUrl = msDoc.GetUrl(); string PairKey = UrlToDigest(DocUrl).ToString(); HttpStatusCode StatusCode = msDoc.GetStatusCode(); int StatusCodeNum = ( int )StatusCode; MacroscopeConstants.Specifiers HrefLangPresent = MacroscopeConstants.Specifiers.UNSPECIFIED; string DocLocale = msDoc.GetLocale(); string DocTitle = msDoc.GetTitle(); ListViewItem lvItem = null; if ( (HrefLangsTable != null) && (HrefLangsTable.Count > 1)) { HrefLangPresent = MacroscopeConstants.Specifiers.SPECIFIED; } else { HrefLangPresent = MacroscopeConstants.Specifiers.UNSPECIFIED; } if (this.DisplayListView.Items.ContainsKey(PairKey)) { lvItem = this.DisplayListView.Items[PairKey]; } else { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); for (int i = 0; i < LocalesList.Keys.Count; i++) { lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); lvItem.SubItems.Add(""); } ListViewItems.Add(lvItem); } if (lvItem != null) { try { lvItem.SubItems[ColUrl].Text = DocUrl; lvItem.SubItems[ColStatusCode].Text = StatusCode.ToString(); lvItem.SubItems[ColSiteLocale].Text = DocLocale; lvItem.SubItems[ColHrefLangPresent].Text = ""; lvItem.SubItems[ColTitle].Text = DocTitle; switch (HrefLangPresent) { case MacroscopeConstants.Specifiers.SPECIFIED: lvItem.SubItems[ColHrefLangPresent].ForeColor = Color.Green; lvItem.SubItems[ColHrefLangPresent].Text = "SPECIFIED"; break; default: lvItem.SubItems[ColHrefLangPresent].ForeColor = Color.Red; lvItem.SubItems[ColHrefLangPresent].Text = "UNSPECIFIED"; break; } if (AllowedHosts.IsInternalUrl(DocUrl)) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } if ((StatusCodeNum >= 100) && (StatusCodeNum <= 299)) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Green; } else if ((StatusCodeNum >= 300) && (StatusCodeNum <= 399)) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Orange; } else if ((StatusCodeNum >= 400) && (StatusCodeNum <= 599)) { lvItem.SubItems[ColStatusCode].ForeColor = Color.Red; } else { lvItem.SubItems[ColSiteLocale].ForeColor = Color.Gray; } foreach (string Locale in LocalesList.Keys) { if (!string.IsNullOrEmpty(Locale)) { string HrefLangUrl = null; DateTime HrefLangDateServer = new DateTime(); DateTime HrefLangDateModified = new DateTime(); int LocaleCol = LocaleColsTable[Locale]; if ( (HrefLangsTable != null) && (HrefLangsTable.Count > 0)) { if (HrefLangsTable.ContainsKey(Locale)) { MacroscopeHrefLang HrefLangAlternate = HrefLangsTable[Locale]; if (HrefLangAlternate != null) { HrefLangUrl = HrefLangAlternate.GetUrl(); HrefLangDateServer = HrefLangAlternate.GetDateServer(); HrefLangDateModified = HrefLangAlternate.GetDateModified(); } } } if (!string.IsNullOrEmpty(HrefLangUrl)) { lvItem.SubItems[LocaleCol].ForeColor = Color.Blue; lvItem.SubItems[LocaleCol].Text = HrefLangUrl; lvItem.SubItems[LocaleCol + 1].Text = HrefLangDateServer.ToString(); lvItem.SubItems[LocaleCol + 2].Text = HrefLangDateModified.ToString(); } else { lvItem.SubItems[LocaleCol].ForeColor = Color.Red; lvItem.SubItems[LocaleCol].Text = "NOT SPECIFIED"; lvItem.SubItems[LocaleCol + 1].Text = "NOT SPECIFIED"; lvItem.SubItems[LocaleCol + 2].Text = "NOT SPECIFIED"; } } } } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayHrefLang: {0}", ex.Message)); DebugMsg(string.Format("MacroscopeDisplayHrefLang: {0}", ex.StackTrace)); } } else { DebugMsg(string.Format("MacroscopeDisplayHrefLang NOT SPECIFIED: {0}", PairKey)); } } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = (( decimal )100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.ColumnContent); this.DisplayListView.Columns[ColUrl].Width = 300; this.DisplayListView.Columns[ColStatusCode].Width = 80; this.DisplayListView.Columns[ColSiteLocale].Width = 100; this.DisplayListView.Columns[ColTitle].Width = 100; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }