/** TEXT Sitemap Generators ***********************************************/ public List <string> GenerateTextSitemap(string Host) { Dictionary <string, Boolean> Dedupe = new Dictionary <string, Boolean> (DocCollection.CountDocuments()); List <string> SitemapText = new List <string> (this.DocCollection.CountDocuments()); foreach (MacroscopeDocument msDoc in this.DocCollection.IterateDocuments()) { Boolean Proceed = false; if (!msDoc.GetStatusCode().Equals(HttpStatusCode.OK)) { continue; } if ( (!msDoc.GetIsInternal()) || (msDoc.GetIsRedirect())) { continue; } if ( msDoc.GetIsHtml() || msDoc.GetIsPdf()) { Proceed = true; } if (!string.IsNullOrEmpty(Host)) { if (msDoc.GetHostAndPort().Equals(Host)) { Proceed = true; } else { Proceed = false; } } if (Proceed) { SitemapText.Add(msDoc.GetUrl()); if ( MacroscopePreferencesManager.GetSitemapIncludeLinkedPdfs() && msDoc.GetIsHtml()) { this.GenerateTextSitemapPdfEntries( msDoc: msDoc, SitemapText: SitemapText, Dedupe: Dedupe ); } } } return(SitemapText); }
/** Sitemaps **************************************************************/ public async Task <List <string> > GetSitemapsAsList(string Url) { List <string> SitemapsList = new List <string>(); if (MacroscopePreferencesManager.GetFollowRobotsProtocol()) { Robots robot = await this.FetchRobot(Url : Url); try { if ((robot != null) && (robot.Sitemaps != null)) { foreach (Sitemap SitemapEntry in robot.Sitemaps) { string SitemapUrl = SitemapEntry.Url.ToString(); string SitemapUrlAbs = MacroscopeHttpUrlUtils.MakeUrlAbsolute(BaseUrl: Url, Url: SitemapUrl); SitemapsList.Add(SitemapUrlAbs); this.DebugMsg(string.Format("ROBOTS SitemapUrl: {0}", SitemapUrl)); } } } catch (Exception ex) { this.DebugMsg(ex.Message); } } return(SitemapsList); }
/** -------------------------------------------------------------------- **/ public static IMacroscopeAnalyzeReadability AnalyzerFactory(string IsoLanguageCode) { IMacroscopeAnalyzeReadability Analyzer = null; if (IsoLanguageCode.Equals("x-default")) { IsoLanguageCode = "en"; } switch (IsoLanguageCode) { case "en": switch (MacroscopePreferencesManager.GetAnalyzeTextReadabilityEnglishAlgorithm()) { case MacroscopeAnalyzeReadability.AnalyzeReadabilityEnglishAlgorithm.FLESCH_KINCAID: Analyzer = new MacroscopeAnalyzeReadabilityFleschKincaid(); break; case MacroscopeAnalyzeReadability.AnalyzeReadabilityEnglishAlgorithm.SMOG: Analyzer = new MacroscopeAnalyzeReadabilitySmog(); break; default: break; } break; default: break; } return(Analyzer); }
/**************************************************************************/ public void RenderListView(MacroscopeJobMaster JobMaster) { Dictionary <String, bool> Blocked = JobMaster.GetBlockedByRobotsList(); if (Blocked.Count == 0) { return; } List <ListViewItem> ListViewItems = new List <ListViewItem>(1); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)Blocked.Count; decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (string Url in Blocked.Keys) { bool IsInternal = JobMaster.GetAllowedHosts().IsInternalUrl(Url); this.RenderListView( ListViewItems: ListViewItems, Url: Url, IsBlocked: Blocked[Url], IsInternal: IsInternal ); Count++; MajorPercentage = ((decimal)100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ public static void HtmlAndLinkedAssets() { MacroscopePreferencesManager.SetDefaultValues(); HtmlOnly(); MacroscopePreferencesManager.SetFetchStylesheets(true); MacroscopePreferencesManager.SetFetchJavascripts(true); MacroscopePreferencesManager.SetFetchImages(true); MacroscopePreferencesManager.SetFetchAudio(true); MacroscopePreferencesManager.SetFetchVideo(true); MacroscopePreferencesManager.SetFetchXml(true); MacroscopePreferencesManager.SetFetchBinaries(false); MacroscopePreferencesManager.SetProcessAudio(true); MacroscopePreferencesManager.SetProcessBinaries(false); MacroscopePreferencesManager.SetProcessImages(true); MacroscopePreferencesManager.SetProcessJavascripts(true); MacroscopePreferencesManager.SetProcessPdfs(false); MacroscopePreferencesManager.SetProcessStylesheets(true); MacroscopePreferencesManager.SetProcessVideo(false); MacroscopePreferencesManager.SetProcessXml(false); MacroscopePreferencesManager.SavePreferences(); }
public void TestDetectLanguage() { List <string> UrlList = new List <string> (); UrlList.Add("https://nazuke.github.io/SEOMacroscope/"); MacroscopePreferencesManager.SetDetectLanguage(Enabled: true); MacroscopePreferencesManager.SetRequestTimeout(Seconds: 10); for (int i = 0; i < 10; i++) { foreach (string Url in UrlList) { MacroscopeDocument msDoc = new MacroscopeDocument(Url: Url); Assert.IsNotNull(msDoc, string.Format("FAIL: {0}", Url)); Assert.IsTrue(msDoc.Execute(), string.Format("FAIL: {0}", "Execute()")); Assert.IsTrue(msDoc.GetIsHtml(), string.Format("FAIL: {0}", Url)); Assert.IsNotNullOrEmpty(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle())); string LanguageTitle = msDoc.GetTitleLanguage(); string LanguageDescription = msDoc.GetDescriptionLanguage(); string LanguageBodyText = msDoc.GetDocumentTextLanguage(); Assert.AreEqual("en", LanguageTitle, string.Format("FAIL: {0} :: {1}", "LanguageTitle", LanguageTitle)); Assert.AreEqual("en", LanguageDescription, string.Format("FAIL: {0} :: {1}", "LanguageDescription", LanguageDescription)); Assert.AreEqual("en", LanguageBodyText, string.Format("FAIL: {0} :: {1}", "LanguageBodyText", LanguageBodyText)); } } }
/**************************************************************************/ public void RenderListViewSearchTargetUrls( MacroscopeDocumentCollection DocCollection, string UrlFragment ) { List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments()); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)DocCollection.CountDocuments(); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Displaying Links", Message: "Processing links in document collection for display:", MajorPercentage: ((decimal)100 / TotalDocs) * Count, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Url = msDoc.GetUrl(); if (msDoc != null) { this.RenderListViewSearchTargetUrls( ListViewItems: ListViewItems, msDoc: msDoc, Url: Url, UrlFragment: UrlFragment ); } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: ((decimal)100 / TotalDocs) * Count, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ private MacroscopeLink AddSitemapTextOutlink( string AbsoluteUrl, MacroscopeConstants.InOutLinkType LinkType, Boolean Follow ) { MacroscopeLink OutLink = null; if (!MacroscopePreferencesManager.GetCheckExternalLinks()) { MacroscopeAllowedHosts AllowedHosts = this.DocCollection.GetAllowedHosts(); if (AllowedHosts != null) { if (!AllowedHosts.IsAllowedFromUrl(Url: AbsoluteUrl)) { return(OutLink); } } } OutLink = new MacroscopeLink( SourceUrl: this.GetUrl(), TargetUrl: AbsoluteUrl, LinkType: LinkType, Follow: Follow ); this.Outlinks.Add(OutLink); return(OutLink); }
/** -------------------------------------------------------------------- **/ private void _ReconfigureFileMenuRecentUrlsItems() { List <string> CrawlHistory = MacroscopePreferencesManager.GetCrawlHistory(); ToolStripItemCollection RecentUrlItems = this.recentURLsToolStripMenuItem.DropDownItems; RecentUrlItems.Clear(); CrawlHistory.Reverse(); foreach (string Url in CrawlHistory) { string UrlTruncated = Url; ToolStripItem UrlItem = RecentUrlItems.Add(text: "..."); if (Url.Length > 64) { UrlTruncated = Url.Substring(0, 64) + "..."; } UrlTruncated = UrlTruncated.Replace("&", "&&"); UrlItem.Tag = Url; UrlItem.Text = UrlTruncated; UrlItem.Click += ClickCallbackFileMenuRecentUrlsItem; } { ToolStripSeparator separator = new ToolStripSeparator(); RecentUrlItems.Add(separator); } { ToolStripItem UrlItem = RecentUrlItems.Add(text: "Clear Recent URLs"); UrlItem.Click += ClickCallbackFileMenuRecentUrlsClear; } }
/** -------------------------------------------------------------------- **/ public void AddUrlQueueItem(string Url) { string NewUrl = Url; if (MacroscopePreferencesManager.GetIgnoreQueries()) { NewUrl = MacroscopeUrlUtils.StripQueryString(Url: NewUrl); } if (MacroscopePreferencesManager.GetIgnoreHashFragments()) { NewUrl = MacroscopeUrlUtils.StripHashFragment(Url: NewUrl); } if (!this.JobHistory.SeenHistoryItem(Url: NewUrl)) { try { MacroscopeJobItem JobItem; JobItem = new MacroscopeJobItem(Url: NewUrl); this.NamedQueueJobItems.AddToNamedQueue( Name: MacroscopeConstants.NamedQueueUrlList, Item: JobItem ); } catch (MacroscopeNamedQueueException ex) { this.DebugMsg(string.Format("AddUrlQueueItem: {0}", ex.Message)); } } this.AddToProgress(Url: NewUrl); }
/**************************************************************************/ public MacroscopeDisplayRedirectChains(MacroscopeMainForm MainForm, ListView TargetListView) : base(MainForm, TargetListView) { this.MainForm = MainForm; this.DisplayListView = TargetListView; this.DocumentCount = this.MainForm.macroscopeOverviewTabPanelInstance.toolStripLabelRedirectChainsItems; this.MaximumHops = MacroscopePreferencesManager.GetRedirectChainsMaxHops(); if (this.MainForm.InvokeRequired) { this.MainForm.Invoke( new MethodInvoker( delegate { this.ConfigureListView(); } ) ); } else { this.ConfigureListView(); } }
/** Crawl Delay ***********************************************************/ public async Task <int> GetCrawlDelay(string Url) { int Delay = 0; Robots robot; if (!MacroscopePreferencesManager.GetFollowRobotsProtocol()) { return(Delay); } robot = await this.FetchRobot(Url : Url); if (robot != null) { long CrawlDelayTime = robot.CrawlDelay(userAgent: this.UserAgentName()); if (CrawlDelayTime == 0) { CrawlDelayTime = robot.CrawlDelay("*"); } if (CrawlDelayTime > 0) { Delay = (int)(CrawlDelayTime / 1000); } DebugMsg(string.Format("ROBOTS CrawlDelayTime: {0}", CrawlDelayTime)); DebugMsg(string.Format("ROBOTS Delay: {0}", Delay)); } return(Delay); }
/** -------------------------------------------------------------------- **/ private void ScanningControlsEnable() { this.loadUrlListToolStripMenuItem.Enabled = true; this.exportToolStripMenuItem.Enabled = true; this.taskParametersToolStripMenuItem.Enabled = true; this.reportsToolStripMenuItem.Enabled = true; this.textBoxStartUrl.Enabled = true; this.ButtonStart.Enabled = true; this.ButtonStop.Enabled = false; this.ButtonReset.Enabled = false; this.ProgressBarScan.Visible = false; this.toolStripButtonRetryBrokenLinks.Enabled = true; this.toolStripButtonRetryTimedOutLinks.Enabled = true; if (MacroscopePreferencesManager.GetAnalyzeClickPaths()) { this.toolStripButtonRecalculateClickPaths.Enabled = true; } else { this.toolStripButtonRecalculateClickPaths.Enabled = false; } this.ReconfigureReportsMenu(); this.ReconfigureStructureOverviewControls(); this.ReconfigureSearchCollectionControls(); }
/**************************************************************************/ public MacroscopeJobWorker(MacroscopeJobMaster JobMaster) { this.SuppressDebugMsg = true; this.JobMaster = JobMaster; this.DocCollection = this.JobMaster.GetDocCollection(); this.AllowedHosts = this.JobMaster.GetAllowedHosts(); this.IncludeExcludeUrls = this.JobMaster.GetIncludeExcludeUrls(); if (MacroscopePreferencesManager.GetCrawlDelay() > 0) { this.CrawlDelay = MacroscopePreferencesManager.GetCrawlDelay(); } if (MacroscopePreferencesManager.GetFollowRobotsProtocol()) { if (this.JobMaster.GetCrawlDelay() > 0) { this.CrawlDelay = this.JobMaster.GetCrawlDelay(); } } }
/** SEARCH INDEX **********************************************************/ public List <MacroscopeDocument> ExecuteSearchForDocuments( MacroscopeSearchIndex.SearchMode SMode, string [] Terms ) { List <MacroscopeDocument> DocList = null; bool CaseSensitive = MacroscopePreferencesManager.GetCaseSensitiveTextIndexing(); for (int i = 0; i < Terms.Length; i++) { if (!CaseSensitive) { Terms[i] = Terms[i].ToLower(); } } switch (SMode) { case MacroscopeSearchIndex.SearchMode.OR: DocList = this.ExecuteSearchForDocumentsOR(Terms); break; case MacroscopeSearchIndex.SearchMode.AND: DocList = this.ExecuteSearchForDocumentsAND(Terms); break; } return(DocList); }
/** Render One Document *******************************************/ public virtual void RenderListView(MacroscopeDocument msDoc, string Url) { if (msDoc == null) { return; } List <ListViewItem> ListViewItems = new List <ListViewItem> (1); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = ( decimal )1; decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.ControlBox = false; ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } Application.DoEvents(); if (msDoc != null) { this.RenderListView( ListViewItems: ListViewItems, msDoc: msDoc, Url: msDoc.GetUrl() ); } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = (( decimal )100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } ProgressForm.Dispose(); }
/** Render Entire DocCollection *******************************************/ public void RenderTreeView(MacroscopeDocumentCollection DocCollection) { if (DocCollection.CountDocuments() == 0) { return; } MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = ( decimal )DocCollection.CountDocuments(); decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count; if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Preparing Display", Message: "Processing document collection for display:", MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { Application.DoEvents(); if (msDoc == null) { continue; } string Url = msDoc.GetUrl(); this.RenderTreeView(msDoc, Url); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; MajorPercentage = (( decimal )100 / TotalDocs) * Count; ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: MajorPercentage, ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs) ); } } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } if (ProgressForm != null) { ProgressForm.Dispose(); } }
/**************************************************************************/ public void ReconfigureReportsMenu() { if( MacroscopePreferencesManager.GetEnableLevenshteinDeduplication() ) { this.pagesCSVReportToolStripMenuItem.Enabled = true; } else { this.pagesCSVReportToolStripMenuItem.Enabled = false; } }
/**************************************************************************/ private void SetPrefsFormControlFieldToDefaults(object sender, EventArgs e) { Button DefaultsButton = ( Button )sender; MacroscopePreferencesManager.SetDefaultValues(); this.SetPrefsFormControlFields(); }
/**************************************************************************/ public void RenderListViewSearchSourceUrls( MacroscopeDocumentCollection DocCollection, string UrlFragment ) { List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments()); MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm); decimal Count = 0; decimal TotalDocs = (decimal)DocCollection.CountDocuments(); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.UpdatePercentages( Title: "Displaying Links", Message: "Processing links in document collection for display:", MajorPercentage: ((decimal)100 / TotalDocs) * Count, ProgressLabelMajor: "Documents Processed" ); } foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments()) { string Url = msDoc.GetUrl(); if (Url.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0) { this.RenderListView( ListViewItems: ListViewItems, DocCollection: DocCollection, msDoc: msDoc, Url: Url ); } if (MacroscopePreferencesManager.GetShowProgressDialogues()) { Count++; TotalDocs = (decimal)DocCollection.CountDocuments(); ProgressForm.UpdatePercentages( Title: null, Message: null, MajorPercentage: ((decimal)100 / TotalDocs) * Count, ProgressLabelMajor: null ); } } this.DisplayListView.Items.AddRange(ListViewItems.ToArray()); if (MacroscopePreferencesManager.GetShowProgressDialogues()) { ProgressForm.DoClose(); } ProgressForm.Dispose(); }
/**************************************************************************/ private void ProcessOutlinks(MacroscopeDocument msDoc) { if ( (this.JobMaster.GetRunTimeMode() == MacroscopeConstants.RunTimeMode.LISTFILE) || (this.JobMaster.GetRunTimeMode() == MacroscopeConstants.RunTimeMode.LISTTEXT) || (this.JobMaster.GetRunTimeMode() == MacroscopeConstants.RunTimeMode.SITEMAP)) { if (!MacroscopePreferencesManager.GetScanSitesInList()) { return; } } foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks()) { Boolean Proceed = true; if (!Outlink.GetDoFollow()) { continue; } if (Outlink.GetTargetUrl() == null) { continue; } if (this.JobMaster.GetJobHistory().SeenHistoryItem(Outlink.GetTargetUrl())) { continue; } if (this.JobMaster.GetPageLimit() > -1) { if (this.JobMaster.GetPageLimitCount() >= this.JobMaster.GetPageLimit()) { this.DebugMsg( string.Format( "PAGE LIMIT REACHED: {0} :: {1}", this.JobMaster.GetPageLimit(), this.JobMaster.GetPageLimitCount() ) ); Proceed = false; } } if (Proceed) { this.JobMaster.AddUrlQueueItem( Url: Outlink.GetTargetUrl(), Check: true ); } } }
/**************************************************************************/ public static void HtmlAndPdfs() { MacroscopePreferencesManager.SetDefaultValues(); HtmlOnly(); MacroscopePreferencesManager.SetProcessPdfs(true); MacroscopePreferencesManager.SavePreferences(); }
/** -------------------------------------------------------------------- **/ public async Task <bool> ApplyRobotRule(string Url) { bool Allowed = true; if (MacroscopePreferencesManager.GetFollowRobotsProtocol()) { Allowed = await this.CheckRobotRule(Url : Url); } return(Allowed); }
/**************************************************************************/ public void ReconfigureStructureOverviewControls() { if (MacroscopePreferencesManager.GetEnableTextIndexing()) { this.macroscopeOverviewTabPanelInstance.toolStripStructureSearchTextBoxSearch.Enabled = true; } else { this.macroscopeOverviewTabPanelInstance.toolStripStructureSearchTextBoxSearch.Enabled = false; } }
/**************************************************************************/ private void ConfigureListViewColumns() { this.MaximumHops = MacroscopePreferencesManager.GetRedirectChainsMaxHops(); this.DisplayListView.Columns.Clear(); for (int iHop = 1; iHop <= this.MaximumHops; iHop++) { this.DisplayListView.Columns.Add(string.Format("HOP_{0}_URL", iHop), string.Format("Hop {0} URL", iHop)); this.DisplayListView.Columns.Add(string.Format("HOP_{0}_STATUS", iHop), string.Format("Hop {0} Status", iHop)); } this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize); }
/**************************************************************************/ public static void HrefLangMatrix() { MacroscopePreferencesManager.SetDefaultValues(); HtmlOnly(); MacroscopePreferencesManager.SetFollowCanonicalLinks(true); MacroscopePreferencesManager.SetFollowAlternateLinks(true); MacroscopePreferencesManager.SetFollowHrefLangLinks(true); MacroscopePreferencesManager.SavePreferences(); }
/** Generate Robot URL ****************************************************/ public static string GenerateRobotUrl(string Url) { string RobotUrl = null; if (MacroscopePreferencesManager.GetFollowRobotsProtocol()) { DebugMsgStatic(string.Format("ROBOTS Disabled: {0}", Url)); Uri BaseUri = null; string BaseUriPort = ""; Uri RobotsUri = null; string RobotsTxtUrl = null; try { BaseUri = new Uri(Url, UriKind.Absolute); if (BaseUri.Port > 0) { BaseUriPort = string.Format(":{0}", BaseUri.Port); } RobotsUri = new Uri( string.Format( "{0}://{1}{2}{3}", BaseUri.Scheme, BaseUri.Host, BaseUriPort, "/robots.txt" ), UriKind.Absolute ); RobotsTxtUrl = RobotsUri.ToString(); } catch (InvalidOperationException ex) { DebugMsgStatic(string.Format("GenerateRobotUrl: {0}", ex.Message)); } catch (UriFormatException ex) { DebugMsgStatic(string.Format("GenerateRobotUrl: {0}", ex.Message)); } if (!string.IsNullOrEmpty(RobotsTxtUrl)) { RobotUrl = RobotsTxtUrl; } } return(RobotUrl); }
/** -------------------------------------------------------------------- **/ public void ProbeRobotsFile(string Url) { if (MacroscopePreferencesManager.GetFollowSitemapLinks()) { List <string> SitemapList = Robots.GetSitemapsAsList(Url); if (SitemapList.Count > 0) { for (int i = 0; i < SitemapList.Count; i++) { this.AddUrlQueueItem(Url: SitemapList[i]); } } } }
/**************************************************************************/ private Boolean Check() { // TODO: Increase level of detail here. HttpWebRequest req = null; HttpWebResponse res = null; Boolean IsAvailableCheck = false; try { req = WebRequest.CreateHttp(this.Url); req.Method = "HEAD"; req.Timeout = 10000; req.KeepAlive = false; req.Host = MacroscopeUrlUtils.GetHostnameAndPortFromUrl(this.Url); req.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; MacroscopePreferencesManager.EnableHttpProxy(req); using (res = ( HttpWebResponse )req.GetResponse()) { DebugMsg(string.Format("MacroscopeHrefLang Status: {0}", res.StatusCode)); if (res.StatusCode == HttpStatusCode.OK) { IsAvailableCheck = true; this.ProcessResponseHttpHeaders(req: req, res: res); } else { IsAvailableCheck = false; } res.Close(); } } catch (UriFormatException ex) { DebugMsg(string.Format("MacroscopeHrefLang UriFormatException: {0}", ex.Message)); } catch (WebException ex) { DebugMsg(string.Format("MacroscopeHrefLang WebException: {0}", ex.Message)); } return(IsAvailableCheck); }
public async Task TestDetectLanguage() { MacroscopeJobMaster JobMaster; MacroscopeDocumentCollection DocCollection; List <string> UrlList = new List <string>(); UrlList.Add("https://nazuke.github.io/SEOMacroscope/"); MacroscopePreferencesManager.SetDefaultValues(); MacroscopePreferencesManager.SetDetectLanguage(Enabled: true); MacroscopePreferencesManager.SetRequestTimeout(Seconds: 10); JobMaster = new MacroscopeJobMaster( JobRunTimeMode: MacroscopeConstants.RunTimeMode.LIVE, TaskController: this ); DocCollection = new MacroscopeDocumentCollection(JobMaster: JobMaster); for (int i = 0; i < 10; i++) { foreach (string Url in UrlList) { MacroscopeDocument msDoc = DocCollection.CreateDocument(Url: Url); Assert.IsNotNull(msDoc, string.Format("FAIL: {0}", Url)); bool ExecuteResult = await msDoc.Execute(); Assert.IsTrue(ExecuteResult, string.Format("FAIL: {0}", "Execute()")); Assert.IsTrue(msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML), string.Format("FAIL: {0}", Url)); Assert.IsNotNull(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle())); Assert.IsNotEmpty(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle())); string LanguageTitle = msDoc.GetTitleLanguage(); string LanguageDescription = msDoc.GetDescriptionLanguage(); string LanguageBodyText = msDoc.GetDocumentTextLanguage(); Assert.AreEqual("en", LanguageTitle, string.Format("FAIL: {0} :: {1}", "LanguageTitle", LanguageTitle)); Assert.AreEqual("en", LanguageDescription, string.Format("FAIL: {0} :: {1}", "LanguageDescription", LanguageDescription)); Assert.AreEqual("en", LanguageBodyText, string.Format("FAIL: {0} :: {1}", "LanguageBodyText", LanguageBodyText)); } } }