public void TestDetectLanguage() { List <string> UrlList = new List <string> (); UrlList.Add("https://nazuke.github.io/SEOMacroscope/"); MacroscopePreferencesManager.SetDetectLanguage(Enabled: true); MacroscopePreferencesManager.SetRequestTimeout(Seconds: 10); for (int i = 0; i < 10; i++) { foreach (string Url in UrlList) { MacroscopeDocument msDoc = new MacroscopeDocument(Url: Url); Assert.IsNotNull(msDoc, string.Format("FAIL: {0}", Url)); Assert.IsTrue(msDoc.Execute(), string.Format("FAIL: {0}", "Execute()")); Assert.IsTrue(msDoc.GetIsHtml(), string.Format("FAIL: {0}", Url)); Assert.IsNotNullOrEmpty(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle())); string LanguageTitle = msDoc.GetTitleLanguage(); string LanguageDescription = msDoc.GetDescriptionLanguage(); string LanguageBodyText = msDoc.GetDocumentTextLanguage(); Assert.AreEqual("en", LanguageTitle, string.Format("FAIL: {0} :: {1}", "LanguageTitle", LanguageTitle)); Assert.AreEqual("en", LanguageDescription, string.Format("FAIL: {0} :: {1}", "LanguageDescription", LanguageDescription)); Assert.AreEqual("en", LanguageBodyText, string.Format("FAIL: {0} :: {1}", "LanguageBodyText", LanguageBodyText)); } } }
public async Task TestDetectLanguage() { MacroscopeJobMaster JobMaster; MacroscopeDocumentCollection DocCollection; List <string> UrlList = new List <string>(); UrlList.Add("https://nazuke.github.io/SEOMacroscope/"); MacroscopePreferencesManager.SetDefaultValues(); MacroscopePreferencesManager.SetDetectLanguage(Enabled: true); MacroscopePreferencesManager.SetRequestTimeout(Seconds: 10); JobMaster = new MacroscopeJobMaster( JobRunTimeMode: MacroscopeConstants.RunTimeMode.LIVE, TaskController: this ); DocCollection = new MacroscopeDocumentCollection(JobMaster: JobMaster); for (int i = 0; i < 10; i++) { foreach (string Url in UrlList) { MacroscopeDocument msDoc = DocCollection.CreateDocument(Url: Url); Assert.IsNotNull(msDoc, string.Format("FAIL: {0}", Url)); bool ExecuteResult = await msDoc.Execute(); Assert.IsTrue(ExecuteResult, string.Format("FAIL: {0}", "Execute()")); Assert.IsTrue(msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML), string.Format("FAIL: {0}", Url)); Assert.IsNotNull(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle())); Assert.IsNotEmpty(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle())); string LanguageTitle = msDoc.GetTitleLanguage(); string LanguageDescription = msDoc.GetDescriptionLanguage(); string LanguageBodyText = msDoc.GetDocumentTextLanguage(); Assert.AreEqual("en", LanguageTitle, string.Format("FAIL: {0} :: {1}", "LanguageTitle", LanguageTitle)); Assert.AreEqual("en", LanguageDescription, string.Format("FAIL: {0} :: {1}", "LanguageDescription", LanguageDescription)); Assert.AreEqual("en", LanguageBodyText, string.Format("FAIL: {0} :: {1}", "LanguageBodyText", LanguageBodyText)); } } }
/**************************************************************************/ protected override void RenderListView( List <ListViewItem> ListViewItems, MacroscopeDocumentCollection DocCollection, MacroscopeDocument msDoc, string Url ) { bool Proceed = false; if (msDoc.GetIsExternal()) { return; } if (msDoc.GetIsRedirect()) { return; } switch (msDoc.GetDocumentType()) { case MacroscopeConstants.DocumentType.HTML: Proceed = true; break; case MacroscopeConstants.DocumentType.PDF: Proceed = true; break; default: break; } if (Proceed) { ListViewItem lvItem = null; int Occurrences = 0; string PageLanguage = msDoc.GetIsoLanguageCode(); string DetectedLanguage = msDoc.GetDescriptionLanguage(); string Description = msDoc.GetDescription(); int DescriptionLength = msDoc.GetDescriptionLength(); string PairKey = string.Join(":", UrlToDigest(Url), UrlToDigest(Description)); if (string.IsNullOrEmpty(PageLanguage)) { PageLanguage = ""; } if (string.IsNullOrEmpty(DetectedLanguage)) { DetectedLanguage = ""; } if (DescriptionLength > 0) { Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc); } else { Description = "MISSING"; } if (this.DisplayListView.Items.ContainsKey(PairKey)) { try { lvItem = this.DisplayListView.Items[PairKey]; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems[ColPageLanguage].Text = PageLanguage; lvItem.SubItems[ColDetectedLanguage].Text = DetectedLanguage; lvItem.SubItems[ColOccurences].Text = Occurrences.ToString(); lvItem.SubItems[ColDescriptionText].Text = Description; lvItem.SubItems[ColLength].Text = DescriptionLength.ToString(); } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayDescriptions 1: {0}", ex.Message)); } } else { try { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; lvItem.SubItems[ColUrl].Text = Url; lvItem.SubItems.Add(PageLanguage); lvItem.SubItems.Add(DetectedLanguage); lvItem.SubItems.Add(Occurrences.ToString()); lvItem.SubItems.Add(Description); lvItem.SubItems.Add(DescriptionLength.ToString()); ListViewItems.Add(lvItem); } catch (Exception ex) { DebugMsg(string.Format("MacroscopeDisplayDescriptions 2: {0}", ex.Message)); } } if (lvItem != null) { lvItem.ForeColor = Color.Blue; // URL -------------------------------------------------------------// if (msDoc.GetIsInternal()) { lvItem.SubItems[ColUrl].ForeColor = Color.Green; } else { lvItem.SubItems[ColUrl].ForeColor = Color.Gray; } // Description Language --------------------------------------------// if (msDoc.GetIsInternal()) { lvItem.SubItems[ColPageLanguage].ForeColor = Color.Green; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Green; if (DetectedLanguage != PageLanguage) { lvItem.SubItems[ColPageLanguage].ForeColor = Color.Red; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red; } } else { lvItem.SubItems[ColPageLanguage].ForeColor = Color.Gray; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Gray; } // Check Description Length ----------------------------------------// if (msDoc.GetIsInternal()) { if (DescriptionLength < MacroscopePreferencesManager.GetDescriptionMinLen()) { lvItem.SubItems[ColUrl].ForeColor = Color.Red; lvItem.SubItems[ColOccurences].ForeColor = Color.Red; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red; lvItem.SubItems[ColDescriptionText].ForeColor = Color.Red; lvItem.SubItems[ColLength].ForeColor = Color.Red; } else if (DescriptionLength > MacroscopePreferencesManager.GetDescriptionMaxLen()) { lvItem.SubItems[ColUrl].ForeColor = Color.Red; lvItem.SubItems[ColOccurences].ForeColor = Color.Red; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red; lvItem.SubItems[ColDescriptionText].ForeColor = Color.Red; lvItem.SubItems[ColLength].ForeColor = Color.Red; } else { lvItem.SubItems[ColOccurences].ForeColor = Color.Green; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Green; lvItem.SubItems[ColDescriptionText].ForeColor = Color.Green; lvItem.SubItems[ColLength].ForeColor = Color.Green; } } else { lvItem.SubItems[ColOccurences].ForeColor = Color.Gray; lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Gray; lvItem.SubItems[ColDescriptionText].ForeColor = Color.Gray; lvItem.SubItems[ColLength].ForeColor = Color.Gray; } } } }
/** Render One ************************************************************/ protected override void RenderListView( List <ListViewItem> ListViewItems, MacroscopeDocumentCollection DocCollection, MacroscopeDocument msDoc, string Url ) { lock (this.DisplayListViewLock) { Dictionary <string, string> StructureItems = new Dictionary <string, string>(); ListViewItem lvItem = null; string TitleLanguage = msDoc.GetTitleLanguage(); string DescriptionLanguage = msDoc.GetDescriptionLanguage(); string BodyTextLanguage = msDoc.GetDocumentTextLanguage(); int StatusCode = (int)msDoc.GetStatusCode(); string PairKey = UrlToDigest(Url).ToString(); if (string.IsNullOrEmpty(TitleLanguage)) { TitleLanguage = ""; } if (string.IsNullOrEmpty(DescriptionLanguage)) { DescriptionLanguage = ""; } if (string.IsNullOrEmpty(BodyTextLanguage)) { BodyTextLanguage = ""; } // BEGIN: Columns ----------------------------------------------------// StructureItems.Add(MacroscopeConstants.Url, msDoc.GetUrl()); StructureItems.Add(MacroscopeConstants.StatusCode, StatusCode.ToString()); StructureItems.Add(MacroscopeConstants.Status, msDoc.GetStatusCode().ToString()); StructureItems.Add(MacroscopeConstants.IsRedirect, msDoc.GetIsRedirect().ToString()); StructureItems.Add(MacroscopeConstants.RobotsRule, msDoc.GetAllowedByRobotsAsString()); StructureItems.Add(MacroscopeConstants.Duration, msDoc.GetDurationInSecondsFormatted()); StructureItems.Add(MacroscopeConstants.ContentType, msDoc.GetMimeType()); { string Charset = msDoc.GetCharacterSet(); if (string.IsNullOrEmpty(Charset)) { Charset = ""; } StructureItems.Add(MacroscopeConstants.Charset, Charset); } { string LocaleCode = msDoc.GetLocale(); if (string.IsNullOrEmpty(LocaleCode)) { LocaleCode = ""; } StructureItems.Add(MacroscopeConstants.Locale, LocaleCode); } { string LanguageCode = msDoc.GetIsoLanguageCode(); if (string.IsNullOrEmpty(LanguageCode)) { LanguageCode = ""; } StructureItems.Add(MacroscopeConstants.Language, LanguageCode); } StructureItems.Add(MacroscopeConstants.DateCrawled, msDoc.GetCrawledDate()); StructureItems.Add(MacroscopeConstants.DateServer, msDoc.GetDateServer()); StructureItems.Add(MacroscopeConstants.DateModified, msDoc.GetDateModified()); StructureItems.Add(MacroscopeConstants.DateExpires, msDoc.GetDateExpires()); StructureItems.Add(MacroscopeConstants.Canonical, msDoc.GetCanonical()); StructureItems.Add(MacroscopeConstants.PageDepth, msDoc.GetDepth().ToString()); StructureItems.Add(MacroscopeConstants.Inlinks, msDoc.CountInlinks().ToString()); StructureItems.Add(MacroscopeConstants.Outlinks, msDoc.CountOutlinks().ToString()); StructureItems.Add(MacroscopeConstants.HyperlinksIn, msDoc.CountHyperlinksIn().ToString()); StructureItems.Add(MacroscopeConstants.HyperlinksOut, msDoc.CountHyperlinksOut().ToString()); // TODO: This is too slow: /* * { * List<decimal> HyperlinkRatio = DocCollection.GetDocumentHyperlinksRatio( Url: Url ); * StructureItems.Add( MacroscopeConstants.HyperlinksInRatio, string.Format( "{0:0.00}%", HyperlinkRatio[ 0 ] ) ); * StructureItems.Add( MacroscopeConstants.HyperlinksOutRatio, string.Format( "{0:0.00}%", HyperlinkRatio[ 1 ] ) ); * } */ StructureItems.Add(MacroscopeConstants.Author, msDoc.GetAuthor()); StructureItems.Add(MacroscopeConstants.Title, msDoc.GetTitle()); StructureItems.Add(MacroscopeConstants.TitleLen, msDoc.GetTitleLength().ToString()); StructureItems.Add(MacroscopeConstants.TitleLang, TitleLanguage); StructureItems.Add(MacroscopeConstants.Description, msDoc.GetDescription()); StructureItems.Add(MacroscopeConstants.DescriptionLen, msDoc.GetDescriptionLength().ToString()); StructureItems.Add(MacroscopeConstants.DescriptionLang, DescriptionLanguage); StructureItems.Add(MacroscopeConstants.Keywords, msDoc.GetKeywords()); StructureItems.Add(MacroscopeConstants.KeywordsLen, msDoc.GetKeywordsLength().ToString()); StructureItems.Add(MacroscopeConstants.KeywordsCount, msDoc.GetKeywordsCount().ToString()); StructureItems.Add(MacroscopeConstants.BodyTextLang, BodyTextLanguage); for (ushort HeadingLevel = 1; HeadingLevel <= MaxHeadingsDisplayed; HeadingLevel++) { List <string> HeadingList = msDoc.GetHeadings(HeadingLevel: HeadingLevel); string HeadingText = ""; if (HeadingList.Count > 0) { HeadingText = HeadingList[0]; } StructureItems.Add(string.Format(MacroscopeConstants.Hn, HeadingLevel), HeadingText); } StructureItems.Add(MacroscopeConstants.ErrorCondition, msDoc.GetErrorCondition()); // END: Columns ------------------------------------------------------// if (this.DisplayListView.Items.ContainsKey(PairKey)) { lvItem = this.DisplayListView.Items[PairKey]; } else { lvItem = new ListViewItem(PairKey); lvItem.UseItemStyleForSubItems = false; lvItem.Name = PairKey; for (int i = 0; i < this.DisplayListView.Columns.Count; i++) { lvItem.SubItems.Add(""); } ListViewItems.Add(lvItem); } if (lvItem != null) { lvItem.ForeColor = Color.Blue; int StatusCodeColIndex = this.DisplayListView.Columns.IndexOfKey(MacroscopeConstants.StatusCode); int StatusColIndex = this.DisplayListView.Columns.IndexOfKey(MacroscopeConstants.Status); foreach (string ItemsKey in StructureItems.Keys) { int ColIndex = this.DisplayListView.Columns.IndexOfKey(ItemsKey); string Text = StructureItems[ItemsKey]; if (!string.IsNullOrEmpty(StructureItems[ItemsKey])) { lvItem.SubItems[ColIndex].Text = Text; } else { lvItem.SubItems[ColIndex].Text = ""; } if (msDoc.GetIsInternal()) { lvItem.SubItems[ColIndex].ForeColor = Color.Green; } else { lvItem.SubItems[ColIndex].ForeColor = Color.Gray; } if (ItemsKey.Equals(MacroscopeConstants.StatusCode)) { if ((StatusCode >= 200) && (StatusCode <= 299)) { lvItem.SubItems[ColIndex].ForeColor = Color.Green; lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Green; lvItem.SubItems[StatusColIndex].ForeColor = Color.Green; } else if ((StatusCode >= 300) && (StatusCode <= 399)) { lvItem.SubItems[ColIndex].ForeColor = Color.Goldenrod; lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Goldenrod; lvItem.SubItems[StatusColIndex].ForeColor = Color.Goldenrod; } else if ((StatusCode >= 400) && (StatusCode <= 599)) { lvItem.SubItems[ColIndex].ForeColor = Color.Red; lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Red; lvItem.SubItems[StatusColIndex].ForeColor = Color.Red; } else { lvItem.SubItems[ColIndex].ForeColor = Color.Blue; lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Blue; lvItem.SubItems[StatusColIndex].ForeColor = Color.Blue; } if (StatusCode == 410) { lvItem.SubItems[ColIndex].ForeColor = Color.Purple; lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Purple; lvItem.SubItems[StatusColIndex].ForeColor = Color.Purple; } } if (ItemsKey == MacroscopeConstants.RobotsRule) { if (Text.ToLower() == "disallowed") { lvItem.SubItems[ColIndex].ForeColor = Color.Red; } else { lvItem.SubItems[ColIndex].ForeColor = Color.Green; } } if (ItemsKey == MacroscopeConstants.IsRedirect) { if (Text.ToLower() == "true") { lvItem.SubItems[ColIndex].ForeColor = Color.Red; } else { lvItem.SubItems[ColIndex].ForeColor = Color.Gray; } } } } else { DebugMsg(string.Format("MacroscopeDisplayStructure: {0}", "lvItem is NULL")); } } }