Exemplo n.º 1
0
        public void TestDetectLanguage()
        {
            List <string> UrlList = new List <string> ();

            UrlList.Add("https://nazuke.github.io/SEOMacroscope/");

            MacroscopePreferencesManager.SetDetectLanguage(Enabled: true);
            MacroscopePreferencesManager.SetRequestTimeout(Seconds: 10);

            for (int i = 0; i < 10; i++)
            {
                foreach (string Url in UrlList)
                {
                    MacroscopeDocument msDoc = new MacroscopeDocument(Url: Url);

                    Assert.IsNotNull(msDoc, string.Format("FAIL: {0}", Url));

                    Assert.IsTrue(msDoc.Execute(), string.Format("FAIL: {0}", "Execute()"));

                    Assert.IsTrue(msDoc.GetIsHtml(), string.Format("FAIL: {0}", Url));

                    Assert.IsNotNullOrEmpty(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle()));

                    string LanguageTitle       = msDoc.GetTitleLanguage();
                    string LanguageDescription = msDoc.GetDescriptionLanguage();
                    string LanguageBodyText    = msDoc.GetDocumentTextLanguage();

                    Assert.AreEqual("en", LanguageTitle, string.Format("FAIL: {0} :: {1}", "LanguageTitle", LanguageTitle));

                    Assert.AreEqual("en", LanguageDescription, string.Format("FAIL: {0} :: {1}", "LanguageDescription", LanguageDescription));

                    Assert.AreEqual("en", LanguageBodyText, string.Format("FAIL: {0} :: {1}", "LanguageBodyText", LanguageBodyText));
                }
            }
        }
Exemplo n.º 2
0
        public async Task TestDetectLanguage()
        {
            MacroscopeJobMaster          JobMaster;
            MacroscopeDocumentCollection DocCollection;
            List <string> UrlList = new List <string>();

            UrlList.Add("https://nazuke.github.io/SEOMacroscope/");
            MacroscopePreferencesManager.SetDefaultValues();
            MacroscopePreferencesManager.SetDetectLanguage(Enabled: true);
            MacroscopePreferencesManager.SetRequestTimeout(Seconds: 10);

            JobMaster = new MacroscopeJobMaster(
                JobRunTimeMode: MacroscopeConstants.RunTimeMode.LIVE,
                TaskController: this
                );

            DocCollection = new MacroscopeDocumentCollection(JobMaster: JobMaster);

            for (int i = 0; i < 10; i++)
            {
                foreach (string Url in UrlList)
                {
                    MacroscopeDocument msDoc = DocCollection.CreateDocument(Url: Url);

                    Assert.IsNotNull(msDoc, string.Format("FAIL: {0}", Url));

                    bool ExecuteResult = await msDoc.Execute();

                    Assert.IsTrue(ExecuteResult, string.Format("FAIL: {0}", "Execute()"));

                    Assert.IsTrue(msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML), string.Format("FAIL: {0}", Url));

                    Assert.IsNotNull(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle()));

                    Assert.IsNotEmpty(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle()));

                    string LanguageTitle       = msDoc.GetTitleLanguage();
                    string LanguageDescription = msDoc.GetDescriptionLanguage();
                    string LanguageBodyText    = msDoc.GetDocumentTextLanguage();

                    Assert.AreEqual("en", LanguageTitle, string.Format("FAIL: {0} :: {1}", "LanguageTitle", LanguageTitle));

                    Assert.AreEqual("en", LanguageDescription, string.Format("FAIL: {0} :: {1}", "LanguageDescription", LanguageDescription));

                    Assert.AreEqual("en", LanguageBodyText, string.Format("FAIL: {0} :: {1}", "LanguageBodyText", LanguageBodyText));
                }
            }
        }
Exemplo n.º 3
0
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            bool Proceed = false;

            if (msDoc.GetIsExternal())
            {
                return;
            }

            if (msDoc.GetIsRedirect())
            {
                return;
            }

            switch (msDoc.GetDocumentType())
            {
            case MacroscopeConstants.DocumentType.HTML:
                Proceed = true;
                break;

            case MacroscopeConstants.DocumentType.PDF:
                Proceed = true;
                break;

            default:
                break;
            }

            if (Proceed)
            {
                ListViewItem lvItem            = null;
                int          Occurrences       = 0;
                string       PageLanguage      = msDoc.GetIsoLanguageCode();
                string       DetectedLanguage  = msDoc.GetDescriptionLanguage();
                string       Description       = msDoc.GetDescription();
                int          DescriptionLength = msDoc.GetDescriptionLength();

                string PairKey = string.Join(":", UrlToDigest(Url), UrlToDigest(Description));

                if (string.IsNullOrEmpty(PageLanguage))
                {
                    PageLanguage = "";
                }

                if (string.IsNullOrEmpty(DetectedLanguage))
                {
                    DetectedLanguage = "";
                }

                if (DescriptionLength > 0)
                {
                    Occurrences = DocCollection.GetStatsDescriptionCount(msDoc: msDoc);
                }
                else
                {
                    Description = "MISSING";
                }

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    try
                    {
                        lvItem = this.DisplayListView.Items[PairKey];
                        lvItem.SubItems[ColUrl].Text              = Url;
                        lvItem.SubItems[ColPageLanguage].Text     = PageLanguage;
                        lvItem.SubItems[ColDetectedLanguage].Text = DetectedLanguage;
                        lvItem.SubItems[ColOccurences].Text       = Occurrences.ToString();
                        lvItem.SubItems[ColDescriptionText].Text  = Description;
                        lvItem.SubItems[ColLength].Text           = DescriptionLength.ToString();
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayDescriptions 1: {0}", ex.Message));
                    }
                }
                else
                {
                    try
                    {
                        lvItem = new ListViewItem(PairKey);
                        lvItem.UseItemStyleForSubItems = false;
                        lvItem.Name = PairKey;

                        lvItem.SubItems[ColUrl].Text = Url;
                        lvItem.SubItems.Add(PageLanguage);
                        lvItem.SubItems.Add(DetectedLanguage);
                        lvItem.SubItems.Add(Occurrences.ToString());
                        lvItem.SubItems.Add(Description);
                        lvItem.SubItems.Add(DescriptionLength.ToString());

                        ListViewItems.Add(lvItem);
                    }
                    catch (Exception ex)
                    {
                        DebugMsg(string.Format("MacroscopeDisplayDescriptions 2: {0}", ex.Message));
                    }
                }

                if (lvItem != null)
                {
                    lvItem.ForeColor = Color.Blue;

                    // URL -------------------------------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Green;
                    }
                    else
                    {
                        lvItem.SubItems[ColUrl].ForeColor = Color.Gray;
                    }

                    // Description Language --------------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        lvItem.SubItems[ColPageLanguage].ForeColor     = Color.Green;
                        lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Green;

                        if (DetectedLanguage != PageLanguage)
                        {
                            lvItem.SubItems[ColPageLanguage].ForeColor     = Color.Red;
                            lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red;
                        }
                    }
                    else
                    {
                        lvItem.SubItems[ColPageLanguage].ForeColor     = Color.Gray;
                        lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Gray;
                    }

                    // Check Description Length ----------------------------------------//

                    if (msDoc.GetIsInternal())
                    {
                        if (DescriptionLength < MacroscopePreferencesManager.GetDescriptionMinLen())
                        {
                            lvItem.SubItems[ColUrl].ForeColor              = Color.Red;
                            lvItem.SubItems[ColOccurences].ForeColor       = Color.Red;
                            lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red;
                            lvItem.SubItems[ColDescriptionText].ForeColor  = Color.Red;
                            lvItem.SubItems[ColLength].ForeColor           = Color.Red;
                        }
                        else
                        if (DescriptionLength > MacroscopePreferencesManager.GetDescriptionMaxLen())
                        {
                            lvItem.SubItems[ColUrl].ForeColor              = Color.Red;
                            lvItem.SubItems[ColOccurences].ForeColor       = Color.Red;
                            lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Red;
                            lvItem.SubItems[ColDescriptionText].ForeColor  = Color.Red;
                            lvItem.SubItems[ColLength].ForeColor           = Color.Red;
                        }
                        else
                        {
                            lvItem.SubItems[ColOccurences].ForeColor       = Color.Green;
                            lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Green;
                            lvItem.SubItems[ColDescriptionText].ForeColor  = Color.Green;
                            lvItem.SubItems[ColLength].ForeColor           = Color.Green;
                        }
                    }
                    else
                    {
                        lvItem.SubItems[ColOccurences].ForeColor       = Color.Gray;
                        lvItem.SubItems[ColDetectedLanguage].ForeColor = Color.Gray;
                        lvItem.SubItems[ColDescriptionText].ForeColor  = Color.Gray;
                        lvItem.SubItems[ColLength].ForeColor           = Color.Gray;
                    }
                }
            }
        }
Exemplo n.º 4
0
        /** Render One ************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            lock (this.DisplayListViewLock)
            {
                Dictionary <string, string> StructureItems = new Dictionary <string, string>();

                ListViewItem lvItem = null;

                string TitleLanguage       = msDoc.GetTitleLanguage();
                string DescriptionLanguage = msDoc.GetDescriptionLanguage();
                string BodyTextLanguage    = msDoc.GetDocumentTextLanguage();
                int    StatusCode          = (int)msDoc.GetStatusCode();
                string PairKey             = UrlToDigest(Url).ToString();

                if (string.IsNullOrEmpty(TitleLanguage))
                {
                    TitleLanguage = "";
                }

                if (string.IsNullOrEmpty(DescriptionLanguage))
                {
                    DescriptionLanguage = "";
                }

                if (string.IsNullOrEmpty(BodyTextLanguage))
                {
                    BodyTextLanguage = "";
                }

                // BEGIN: Columns ----------------------------------------------------//

                StructureItems.Add(MacroscopeConstants.Url, msDoc.GetUrl());

                StructureItems.Add(MacroscopeConstants.StatusCode, StatusCode.ToString());
                StructureItems.Add(MacroscopeConstants.Status, msDoc.GetStatusCode().ToString());
                StructureItems.Add(MacroscopeConstants.IsRedirect, msDoc.GetIsRedirect().ToString());

                StructureItems.Add(MacroscopeConstants.RobotsRule, msDoc.GetAllowedByRobotsAsString());

                StructureItems.Add(MacroscopeConstants.Duration, msDoc.GetDurationInSecondsFormatted());

                StructureItems.Add(MacroscopeConstants.ContentType, msDoc.GetMimeType());

                {
                    string Charset = msDoc.GetCharacterSet();
                    if (string.IsNullOrEmpty(Charset))
                    {
                        Charset = "";
                    }
                    StructureItems.Add(MacroscopeConstants.Charset, Charset);
                }

                {
                    string LocaleCode = msDoc.GetLocale();
                    if (string.IsNullOrEmpty(LocaleCode))
                    {
                        LocaleCode = "";
                    }
                    StructureItems.Add(MacroscopeConstants.Locale, LocaleCode);
                }

                {
                    string LanguageCode = msDoc.GetIsoLanguageCode();
                    if (string.IsNullOrEmpty(LanguageCode))
                    {
                        LanguageCode = "";
                    }
                    StructureItems.Add(MacroscopeConstants.Language, LanguageCode);
                }

                StructureItems.Add(MacroscopeConstants.DateCrawled, msDoc.GetCrawledDate());

                StructureItems.Add(MacroscopeConstants.DateServer, msDoc.GetDateServer());
                StructureItems.Add(MacroscopeConstants.DateModified, msDoc.GetDateModified());
                StructureItems.Add(MacroscopeConstants.DateExpires, msDoc.GetDateExpires());

                StructureItems.Add(MacroscopeConstants.Canonical, msDoc.GetCanonical());

                StructureItems.Add(MacroscopeConstants.PageDepth, msDoc.GetDepth().ToString());

                StructureItems.Add(MacroscopeConstants.Inlinks, msDoc.CountInlinks().ToString());
                StructureItems.Add(MacroscopeConstants.Outlinks, msDoc.CountOutlinks().ToString());

                StructureItems.Add(MacroscopeConstants.HyperlinksIn, msDoc.CountHyperlinksIn().ToString());
                StructureItems.Add(MacroscopeConstants.HyperlinksOut, msDoc.CountHyperlinksOut().ToString());

                // TODO: This is too slow:

                /*
                 * {
                 * List<decimal> HyperlinkRatio = DocCollection.GetDocumentHyperlinksRatio( Url: Url );
                 * StructureItems.Add( MacroscopeConstants.HyperlinksInRatio, string.Format( "{0:0.00}%", HyperlinkRatio[ 0 ] ) );
                 * StructureItems.Add( MacroscopeConstants.HyperlinksOutRatio, string.Format( "{0:0.00}%", HyperlinkRatio[ 1 ] ) );
                 * }
                 */

                StructureItems.Add(MacroscopeConstants.Author, msDoc.GetAuthor());

                StructureItems.Add(MacroscopeConstants.Title, msDoc.GetTitle());
                StructureItems.Add(MacroscopeConstants.TitleLen, msDoc.GetTitleLength().ToString());
                StructureItems.Add(MacroscopeConstants.TitleLang, TitleLanguage);

                StructureItems.Add(MacroscopeConstants.Description, msDoc.GetDescription());
                StructureItems.Add(MacroscopeConstants.DescriptionLen, msDoc.GetDescriptionLength().ToString());
                StructureItems.Add(MacroscopeConstants.DescriptionLang, DescriptionLanguage);

                StructureItems.Add(MacroscopeConstants.Keywords, msDoc.GetKeywords());
                StructureItems.Add(MacroscopeConstants.KeywordsLen, msDoc.GetKeywordsLength().ToString());
                StructureItems.Add(MacroscopeConstants.KeywordsCount, msDoc.GetKeywordsCount().ToString());

                StructureItems.Add(MacroscopeConstants.BodyTextLang, BodyTextLanguage);

                for (ushort HeadingLevel = 1; HeadingLevel <= MaxHeadingsDisplayed; HeadingLevel++)
                {
                    List <string> HeadingList = msDoc.GetHeadings(HeadingLevel: HeadingLevel);
                    string        HeadingText = "";
                    if (HeadingList.Count > 0)
                    {
                        HeadingText = HeadingList[0];
                    }
                    StructureItems.Add(string.Format(MacroscopeConstants.Hn, HeadingLevel), HeadingText);
                }

                StructureItems.Add(MacroscopeConstants.ErrorCondition, msDoc.GetErrorCondition());

                // END: Columns ------------------------------------------------------//

                if (this.DisplayListView.Items.ContainsKey(PairKey))
                {
                    lvItem = this.DisplayListView.Items[PairKey];
                }
                else
                {
                    lvItem = new ListViewItem(PairKey);
                    lvItem.UseItemStyleForSubItems = false;
                    lvItem.Name = PairKey;

                    for (int i = 0; i < this.DisplayListView.Columns.Count; i++)
                    {
                        lvItem.SubItems.Add("");
                    }

                    ListViewItems.Add(lvItem);
                }

                if (lvItem != null)
                {
                    lvItem.ForeColor = Color.Blue;

                    int StatusCodeColIndex = this.DisplayListView.Columns.IndexOfKey(MacroscopeConstants.StatusCode);
                    int StatusColIndex     = this.DisplayListView.Columns.IndexOfKey(MacroscopeConstants.Status);

                    foreach (string ItemsKey in StructureItems.Keys)
                    {
                        int    ColIndex = this.DisplayListView.Columns.IndexOfKey(ItemsKey);
                        string Text     = StructureItems[ItemsKey];

                        if (!string.IsNullOrEmpty(StructureItems[ItemsKey]))
                        {
                            lvItem.SubItems[ColIndex].Text = Text;
                        }
                        else
                        {
                            lvItem.SubItems[ColIndex].Text = "";
                        }

                        if (msDoc.GetIsInternal())
                        {
                            lvItem.SubItems[ColIndex].ForeColor = Color.Green;
                        }
                        else
                        {
                            lvItem.SubItems[ColIndex].ForeColor = Color.Gray;
                        }

                        if (ItemsKey.Equals(MacroscopeConstants.StatusCode))
                        {
                            if ((StatusCode >= 200) && (StatusCode <= 299))
                            {
                                lvItem.SubItems[ColIndex].ForeColor           = Color.Green;
                                lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Green;
                                lvItem.SubItems[StatusColIndex].ForeColor     = Color.Green;
                            }
                            else
                            if ((StatusCode >= 300) && (StatusCode <= 399))
                            {
                                lvItem.SubItems[ColIndex].ForeColor           = Color.Goldenrod;
                                lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Goldenrod;
                                lvItem.SubItems[StatusColIndex].ForeColor     = Color.Goldenrod;
                            }
                            else
                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                lvItem.SubItems[ColIndex].ForeColor           = Color.Red;
                                lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Red;
                                lvItem.SubItems[StatusColIndex].ForeColor     = Color.Red;
                            }
                            else
                            {
                                lvItem.SubItems[ColIndex].ForeColor           = Color.Blue;
                                lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Blue;
                                lvItem.SubItems[StatusColIndex].ForeColor     = Color.Blue;
                            }

                            if (StatusCode == 410)
                            {
                                lvItem.SubItems[ColIndex].ForeColor           = Color.Purple;
                                lvItem.SubItems[StatusCodeColIndex].ForeColor = Color.Purple;
                                lvItem.SubItems[StatusColIndex].ForeColor     = Color.Purple;
                            }
                        }

                        if (ItemsKey == MacroscopeConstants.RobotsRule)
                        {
                            if (Text.ToLower() == "disallowed")
                            {
                                lvItem.SubItems[ColIndex].ForeColor = Color.Red;
                            }
                            else
                            {
                                lvItem.SubItems[ColIndex].ForeColor = Color.Green;
                            }
                        }

                        if (ItemsKey == MacroscopeConstants.IsRedirect)
                        {
                            if (Text.ToLower() == "true")
                            {
                                lvItem.SubItems[ColIndex].ForeColor = Color.Red;
                            }
                            else
                            {
                                lvItem.SubItems[ColIndex].ForeColor = Color.Gray;
                            }
                        }
                    }
                }
                else
                {
                    DebugMsg(string.Format("MacroscopeDisplayStructure: {0}", "lvItem is NULL"));
                }
            }
        }