public void TestLinksInTextDocs()
        {
            string Url = @"https://nazuke.github.io/dummy.txt";
            MacroscopeJobMaster          JobMaster;
            MacroscopeDocumentCollection DocCollection;

            JobMaster = new MacroscopeJobMaster(
                JobRunTimeMode: MacroscopeConstants.RunTimeMode.LIVE,
                TaskController: this
                );

            DocCollection = new MacroscopeDocumentCollection(JobMaster: JobMaster);

            MacroscopeDocument msDoc = DocCollection.CreateDocument(Url: Url);

            Assert.IsNotNull(msDoc, string.Format("FAIL: {0}", Url));

            msDoc.ProcessPureTextOutlinks(TextDoc: this.TextDoc, LinkType: MacroscopeConstants.InOutLinkType.PURETEXT);

            foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks())
            {
                Assert.Contains(Outlink.GetTargetUrl(), this.TextLinks);
            }

            Assert.AreEqual(5, msDoc.CountOutlinks());
        }
        /** SEARCH INDEX: AND METHOD **********************************************/

        public List <MacroscopeDocument> ExecuteSearchForDocumentsAND(string [] Terms)
        {
            List <MacroscopeDocument> DocList = new List <MacroscopeDocument> ();

            Dictionary <MacroscopeDocument, int> DocListGather = new Dictionary <MacroscopeDocument, int> ();

            for (int i = 0; i < Terms.Length; i++)
            {
                if (InvertedIndex.ContainsKey(Terms[i]))
                {
                    foreach (string Url in InvertedIndex[Terms[i]].Keys)
                    {
                        MacroscopeDocument msDoc = InvertedIndex[Terms[i]][Url];
                        if (DocListGather.ContainsKey(msDoc))
                        {
                            DocListGather[msDoc] = DocListGather[msDoc] + 1;
                        }
                        else
                        {
                            DocListGather.Add(msDoc, 1);
                        }
                    }
                }
            }

            foreach (MacroscopeDocument msDoc in DocListGather.Keys)
            {
                if (DocListGather[msDoc] == Terms.Length)
                {
                    DocList.Add(msDoc);
                }
            }

            return(DocList);
        }
        public void TestGoodKeywords()
        {
            foreach (string HtmlDocKey in this.GoodHtmlDocs.Keys)
            {
                MacroscopeDocument msDoc   = new MacroscopeDocument(Url: "https://nazuke.github.io/");
                string             Html    = this.GoodHtmlDocs[HtmlDocKey];
                HtmlDocument       HtmlDoc = new HtmlDocument();

                msDoc.SetDocumentType(Type: MacroscopeConstants.DocumentType.HTML);

                HtmlDoc.LoadHtml(html: Html);
                List <string> CleanedText = msDoc.GetNodeText(Node: HtmlDoc.DocumentNode);

                string Keywords = HtmlDoc.DocumentNode.SelectSingleNode("//meta[@name='keywords']").GetAttributeValue(name: "content", def: "");
                string BodyText = string.Join(" ", CleanedText.ToArray());

                Assert.IsNotEmpty(Keywords, "Keywords is empty");

                msDoc.SetKeywords(Keywords);

                msDoc.SetDocumentText(Text: BodyText);

                MacroscopeIntenseKeywordAnalysis Analyzer = new MacroscopeIntenseKeywordAnalysis();

                List <KeyValuePair <string, MacroscopeIntenseKeywordAnalysis.KEYWORD_STATUS> > KeywordPresence = Analyzer.AnalyzeKeywordPresence(msDoc: msDoc);

                foreach (KeyValuePair <string, MacroscopeIntenseKeywordAnalysis.KEYWORD_STATUS> Pair in KeywordPresence)
                {
                    Assert.AreEqual(MacroscopeIntenseKeywordAnalysis.KEYWORD_STATUS.PRESENT_IN_BODY_TEXT, Pair.Value);
                }
            }
        }
        /** Render One ************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            lock (this.DisplayListViewLock)
            {
                ListViewItem lvItem = null;

                if (this.DisplayListView.Items.ContainsKey(Url))
                {
                    lvItem = this.DisplayListView.Items[Url];
                }

                if (lvItem != null)
                {
                    int ColIndexInlinks       = this.DisplayListView.Columns.IndexOfKey(MacroscopeConstants.Inlinks);
                    int ColIndexOutlinks      = this.DisplayListView.Columns.IndexOfKey(MacroscopeConstants.Outlinks);
                    int ColIndexInhyperlinks  = this.DisplayListView.Columns.IndexOfKey(MacroscopeConstants.HyperlinksIn);
                    int ColIndexOuthyperlinks = this.DisplayListView.Columns.IndexOfKey(MacroscopeConstants.HyperlinksOut);

                    lvItem.SubItems[ColIndexInlinks].Text       = msDoc.CountInlinks().ToString();
                    lvItem.SubItems[ColIndexOutlinks].Text      = msDoc.CountOutlinks().ToString();
                    lvItem.SubItems[ColIndexInhyperlinks].Text  = msDoc.CountHyperlinksIn().ToString();
                    lvItem.SubItems[ColIndexOuthyperlinks].Text = msDoc.CountHyperlinksOut().ToString();
                }
            }
        }
Ejemplo n.º 5
0
        /**************************************************************************/

        private bool CheckNodeAlreadyVisited(
            MacroscopeDocument msDoc,
            MacroscopeHyperlinkOut HyperlinkOut
            )
        {
            bool Result = false;

            if (this.NodeVisited.ContainsKey(msDoc))
            {
                if (this.NodeVisited[msDoc].Contains(HyperlinkOut))
                {
                    Result = true;
                }
                else
                {
                    this.NodeVisited[msDoc].Add(HyperlinkOut);
                }
            }
            else
            {
                this.NodeVisited[msDoc] = new List <MacroscopeHyperlinkOut> ();

                this.NodeVisited[msDoc].Add(HyperlinkOut);
            }

            return(Result);
        }
Ejemplo n.º 6
0
        public async Task TestHtmlDocument()
        {
            MacroscopeJobMaster          JobMaster;
            MacroscopeDocumentCollection DocCollection;

            List <string> UrlList = new List <string>();

            UrlList.Add("https://nazuke.github.io/");

            JobMaster = new MacroscopeJobMaster(
                JobRunTimeMode: MacroscopeConstants.RunTimeMode.LIVE,
                TaskController: this
                );

            DocCollection = new MacroscopeDocumentCollection(JobMaster: JobMaster);

            foreach (string Url in UrlList)
            {
                MacroscopeDocument msDoc = DocCollection.CreateDocument(Url: Url);

                Assert.IsNotNull(msDoc, string.Format("FAIL: {0}", Url));

                bool ExecuteResult = await msDoc.Execute();

                Assert.IsTrue(ExecuteResult, string.Format("FAIL: {0}", "Execute()"));

                Assert.AreEqual(Url, msDoc.GetUrl(), string.Format("FAIL: {0}", Url));

                Assert.IsTrue(msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML), string.Format("FAIL: {0}", Url));
            }
        }
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocument msDoc,
            string Url
            )
        {
        }
Ejemplo n.º 8
0
        /**************************************************************************/

        private void BuildWorksheetXpaths(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField(MacroscopeConstants.Url);
                ws.WriteField(MacroscopeConstants.StatusCode);
                ws.WriteField(MacroscopeConstants.Status);
                ws.WriteField(MacroscopeConstants.ContentType);
                ws.WriteField("Extracted Label");
                ws.WriteField("Extracted Value");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc      = DocCollection.GetDocument(Url);
                string             DocUrl     = msDoc.GetUrl();
                string             StatusCode = (( int )msDoc.GetStatusCode()).ToString();
                string             Status     = msDoc.GetStatusCode().ToString();
                string             MimeType   = msDoc.GetMimeType();

                if (!this.DataExtractorXpaths.CanApplyDataExtractorsToDocument(msDoc: msDoc))
                {
                    continue;
                }

                foreach (KeyValuePair <string, string> DataExtractedPair in msDoc.IterateDataExtractedXpaths())
                {
                    string ExtractedLabel = DataExtractedPair.Key;
                    string ExtractedValue = DataExtractedPair.Value;

                    if (
                        string.IsNullOrEmpty(ExtractedLabel) ||
                        string.IsNullOrEmpty(ExtractedValue))
                    {
                        continue;
                    }

                    this.InsertAndFormatUrlCell(ws, msDoc);

                    this.InsertAndFormatStatusCodeCell(ws, msDoc);

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(Status));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(MimeType));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ExtractedLabel));

                    this.InsertAndFormatContentCell(ws, this.FormatIfMissing(ExtractedValue));

                    ws.NextRecord();
                }
            }
        }
Ejemplo n.º 9
0
        /**************************************************************************/

        public void RemoveDocument(MacroscopeDocument msDoc)
        {
            lock (this.DocumentChain)
            {
                this.DocumentChain.Remove(msDoc);
            }
        }
Ejemplo n.º 10
0
        /**************************************************************************/

        private bool CrossCheckDocuments(MacroscopeDocument msDocCompare)
        {
            bool CrossChecked = false;

            string Key1 = string.Join(":", this.msDocOriginal.GetChecksum(), msDocCompare.GetChecksum());
            string Key2 = string.Join(":", msDocCompare.GetChecksum(), this.msDocOriginal.GetChecksum());

            lock (this.CrossCheck)
            {
                if (this.CrossCheck.ContainsKey(Key1))
                {
                    CrossChecked = true;
                }
                else
                {
                    this.CrossCheck.Add(Key1, true);
                }

                if (this.CrossCheck.ContainsKey(Key2))
                {
                    CrossChecked = true;
                }
                else
                {
                    this.CrossCheck.Add(Key2, true);
                }
            }

            return(CrossChecked);
        }
Ejemplo n.º 11
0
        /**************************************************************************/

        public static string CleanDocumentText(MacroscopeDocument msDoc)
        {
            string CleanedText = msDoc.GetDocumentTextRaw();

            if (!string.IsNullOrEmpty(CleanedText))
            {
                try
                {
                    CleanedText = HtmlEntity.DeEntitize(CleanedText);
                }
                catch (System.Collections.Generic.KeyNotFoundException ex)
                {
                    DebugMsgStatic(string.Format("CleanDocumentText: {0}", ex.Message));
                    msDoc.AddRemark("CleanDocumentText", "Possibly contains invalid HTML Entities.");
                }
                catch (Exception ex)
                {
                    DebugMsgStatic(string.Format("CleanDocumentText: {0}", ex.Message));
                    msDoc.AddRemark("CleanDocumentText", "Possibly contains invalid HTML Entities.");
                }

                CleanedText = CleanText(Text: CleanedText);
            }

            return(CleanedText);
        }
Ejemplo n.º 12
0
        /**************************************************************************/

        public void InsertAndFormatUrlCell(
            CsvWriter ws,
            MacroscopeDocument msDoc
            )
        {
            ws.WriteField(msDoc.GetUrl());
        }
Ejemplo n.º 13
0
        /**************************************************************************/

        public void AddDocument(MacroscopeDocument msDoc)
        {
            lock (this.DocumentChain)
            {
                this.DocumentChain.AddLast(msDoc);
            }
        }
Ejemplo n.º 14
0
        public void TestDetectLanguage()
        {
            List <string> UrlList = new List <string> ();

            UrlList.Add("https://nazuke.github.io/SEOMacroscope/");

            MacroscopePreferencesManager.SetDetectLanguage(Enabled: true);
            MacroscopePreferencesManager.SetRequestTimeout(Seconds: 10);

            for (int i = 0; i < 10; i++)
            {
                foreach (string Url in UrlList)
                {
                    MacroscopeDocument msDoc = new MacroscopeDocument(Url: Url);

                    Assert.IsNotNull(msDoc, string.Format("FAIL: {0}", Url));

                    Assert.IsTrue(msDoc.Execute(), string.Format("FAIL: {0}", "Execute()"));

                    Assert.IsTrue(msDoc.GetIsHtml(), string.Format("FAIL: {0}", Url));

                    Assert.IsNotNullOrEmpty(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle()));

                    string LanguageTitle       = msDoc.GetTitleLanguage();
                    string LanguageDescription = msDoc.GetDescriptionLanguage();
                    string LanguageBodyText    = msDoc.GetDocumentTextLanguage();

                    Assert.AreEqual("en", LanguageTitle, string.Format("FAIL: {0} :: {1}", "LanguageTitle", LanguageTitle));

                    Assert.AreEqual("en", LanguageDescription, string.Format("FAIL: {0} :: {1}", "LanguageDescription", LanguageDescription));

                    Assert.AreEqual("en", LanguageBodyText, string.Format("FAIL: {0} :: {1}", "LanguageBodyText", LanguageBodyText));
                }
            }
        }
        /** Render One Document *******************************************/

        public virtual void RenderListView(MacroscopeDocument msDoc, string Url)
        {
            if (msDoc == null)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem> (1);

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )1;
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.ControlBox = false;

                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            Application.DoEvents();

            if (msDoc != null)
            {
                this.RenderListView(
                    ListViewItems: ListViewItems,
                    msDoc: msDoc,
                    Url: msDoc.GetUrl()
                    );
            }

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                Count++;
                MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                ProgressForm.UpdatePercentages(
                    Title: null,
                    Message: null,
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
        /**************************************************************************/

        public MacroscopeLevenshteinFingerprint(MacroscopeDocument msDoc)
        {
            this.SuppressDebugMsg = true;

            this.Document          = msDoc;
            this.Fingerprint       = "";
            this.FingerprintLocker = new Object();
        }
Ejemplo n.º 17
0
        /**************************************************************************/

        public void InsertAndFormatRedirectCell(
            CsvWriter ws,
            MacroscopeDocument msDoc
            )
        {
            string Value = msDoc.GetIsRedirect().ToString();

            ws.WriteField(Value);
        }
Ejemplo n.º 18
0
        /**************************************************************************/

        public void InsertAndFormatRobotsCell(
            CsvWriter ws,
            MacroscopeDocument msDoc
            )
        {
            string Value = msDoc.GetAllowedByRobotsAsString();

            ws.WriteField(Value);
        }
Ejemplo n.º 19
0
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            // NO-OP
        }
        /**************************************************************************/

        private void BuildWorksheetSitemapErrors(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Sitemap URL");
                ws.WriteField("Status Code");
                ws.WriteField("Robots");
                ws.WriteField("URL");

                ws.NextRecord();
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                if (msDoc.GetIsInternal() && msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.SITEMAPXML))
                {
                    foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks())
                    {
                        string             TargetUrl   = Outlink.GetTargetUrl();
                        MacroscopeDocument msDocLinked = DocCollection.GetDocumentByUrl(Url: TargetUrl);
                        bool InsertRow = false;

                        if (msDocLinked.GetIsInternal())
                        {
                            int StatusCode = (int)msDocLinked.GetStatusCode();
                            if ((StatusCode >= 400) && (StatusCode <= 599))
                            {
                                InsertRow = true;
                            }
                            if (!msDocLinked.GetAllowedByRobots())
                            {
                                InsertRow = true;
                            }
                        }

                        if (InsertRow)
                        {
                            this.InsertAndFormatUrlCell(ws, msDoc);

                            this.InsertAndFormatStatusCodeCell(ws, msDoc);

                            this.InsertAndFormatRobotsCell(ws, msDoc);

                            this.InsertAndFormatUrlCell(ws, TargetUrl);

                            ws.NextRecord();
                        }
                    }
                }
            }
        }
Ejemplo n.º 21
0
        /**************************************************************************/

        private void ProcessOutlinks(MacroscopeDocument msDoc)
        {
            if (
                (this.JobMaster.GetRunTimeMode() == MacroscopeConstants.RunTimeMode.LISTFILE) ||
                (this.JobMaster.GetRunTimeMode() == MacroscopeConstants.RunTimeMode.LISTTEXT) ||
                (this.JobMaster.GetRunTimeMode() == MacroscopeConstants.RunTimeMode.SITEMAP))
            {
                if (!MacroscopePreferencesManager.GetScanSitesInList())
                {
                    return;
                }
            }

            foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks())
            {
                Boolean Proceed = true;

                if (!Outlink.GetDoFollow())
                {
                    continue;
                }

                if (Outlink.GetTargetUrl() == null)
                {
                    continue;
                }

                if (this.JobMaster.GetJobHistory().SeenHistoryItem(Outlink.GetTargetUrl()))
                {
                    continue;
                }

                if (this.JobMaster.GetPageLimit() > -1)
                {
                    if (this.JobMaster.GetPageLimitCount() >= this.JobMaster.GetPageLimit())
                    {
                        this.DebugMsg(
                            string.Format(
                                "PAGE LIMIT REACHED: {0} :: {1}",
                                this.JobMaster.GetPageLimit(),
                                this.JobMaster.GetPageLimitCount()
                                )
                            );
                        Proceed = false;
                    }
                }

                if (Proceed)
                {
                    this.JobMaster.AddUrlQueueItem(
                        Url: Outlink.GetTargetUrl(),
                        Check: true
                        );
                }
            }
        }
        /**************************************************************************/

        public MacroscopeClickPathAnalysis(MacroscopeDocumentCollection DocumentCollection)
        {
            this.DocCollection = DocumentCollection;

            this.RootDoc = null;

            this.NodeVisited = new Dictionary <MacroscopeDocument, List <MacroscopeHyperlinkOut> > ();

            this.PageChains = new SortedDictionary <string, List <LinkedList <string> > > ();
        }
        /**************************************************************************/

        public MacroscopeDocument GetDocument(string Url)
        {
            MacroscopeDocument msDoc = null;

            if (this.DocumentList.ContainsKey(Url))
            {
                msDoc = this.DocumentList[Url];
            }
            return(msDoc);
        }
Ejemplo n.º 24
0
        /**************************************************************************/

        public MacroscopeDocument GetLastDocument()
        {
            MacroscopeDocument msDoc = null;

            lock (this.DocumentChain)
            {
                msDoc = this.DocumentChain.Last.Value;
            }

            return(msDoc);
        }
        /**************************************************************************/

        public void Analyze(MacroscopeDocument RootDoc)
        {
            LinkedList <string> PageChain = new LinkedList <string> ();

            this.RootDoc = RootDoc;

            this.NodeVisited.Clear();

            this.PageChains.Clear();

            this.Descend(
                PageChain: PageChain,
                ParentDoc: RootDoc
                );

            this.DebugMsg("######################################################");

            // TODO: Remove this after debugging:
            foreach (string Url in this.PageChains.Keys)
            {
                this.DebugMsg(string.Format("PageChains URL: {0}", Url));
                int Count = 0;
                foreach (LinkedList <string> Chain in this.PageChains[Url])
                {
                    this.DebugMsg(string.Format("----{0}: {1}", Count, Url));
                    foreach (string ChainedUrl in Chain)
                    {
                        this.DebugMsg(string.Format("--------ChainedUrl: {0}", ChainedUrl));
                    }
                    Count++;
                }
            }

            this.DebugMsg("######################################################");

            /*
             * // TODO: Remove this after debugging:
             * foreach( string Url in this.PageChains.Keys )
             * {
             * this.DebugMsg( string.Format( "PageChains URL: {0}", Url ) );
             * int Count = 0;
             * foreach( LinkedList<string> Chain in this.PageChains[Url] )
             * {
             *  this.DebugMsg( string.Format( "----{0}: {1}", Count, Chain.Count ) );
             *  Count++;
             * }
             * }
             */

            this.DebugMsg("######################################################");

            return;
        }
Ejemplo n.º 26
0
        /**************************************************************************/

        protected override void RenderListView(
            List <ListViewItem> ListViewItems,
            MacroscopeDocumentCollection DocCollection,
            MacroscopeDocument msDoc,
            string Url
            )
        {
            string Title       = msDoc.GetTitle();
            string Description = msDoc.GetDescription();
            string Keywords    = msDoc.GetKeywords();

            string PairKey = string.Join("", Url);

            ListViewItem lvItem = null;

            if (this.DisplayListView.Items.ContainsKey(PairKey))
            {
                try
                {
                    lvItem = this.DisplayListView.Items[PairKey];
                    lvItem.SubItems[0].Text = Url;
                    lvItem.SubItems[1].Text = Title;
                    lvItem.SubItems[2].Text = Description;
                    lvItem.SubItems[3].Text = Keywords;
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("MacroscopeDisplaySearchCollection 1: {0}", ex.Message));
                }
            }
            else
            {
                try
                {
                    lvItem = new ListViewItem(PairKey);
                    lvItem.UseItemStyleForSubItems = false;
                    lvItem.Name = PairKey;

                    lvItem.SubItems[0].Text = Url;
                    lvItem.SubItems.Add(Title);
                    lvItem.SubItems.Add(Description);
                    lvItem.SubItems.Add(Keywords);

                    ListViewItems.Add(lvItem);
                }
                catch (Exception ex)
                {
                    DebugMsg(string.Format("MacroscopeDisplaySearchCollection 2: {0}", ex.Message));
                }
            }

            //this.DocumentCount.Text = string.Format( "Documents: {0}", DisplayListView.Items.Count );
        }
Ejemplo n.º 27
0
        /**************************************************************************/

        public void RemoveDocument(MacroscopeDocument msDoc)
        {
            string Url = msDoc.GetUrl();

            lock (this.DocumentList)
            {
                if (this.DocumentList.ContainsKey(Url))
                {
                    this.DocumentList.Remove(Url);
                }
            }
        }
Ejemplo n.º 28
0
 public void TestGetNodeText()
 {
     foreach (string HtmlDocKey in this.HtmlDocs.Keys)
     {
         MacroscopeDocument msDoc   = new MacroscopeDocument(Url: "https://nazuke.github.io/");
         string             Html    = this.HtmlDocs[HtmlDocKey];
         HtmlDocument       HtmlDoc = new HtmlDocument();
         HtmlDoc.LoadHtml(html: Html);
         List <string> CleanedText = msDoc.GetNodeText(Node: HtmlDoc.DocumentNode);
         Assert.IsNotEmpty(CleanedText, "CleanedText is empty");
     }
 }
        /**************************************************************************/

        public static IMacroscopeAnalyzeReadability AnalyzerFactory(MacroscopeDocument msDoc)
        {
            IMacroscopeAnalyzeReadability Analyzer = null;
            string IsoLanguageCode = msDoc.GetIsoLanguageCode();

            if (!string.IsNullOrEmpty(IsoLanguageCode))
            {
                Analyzer = MacroscopeAnalyzeReadability.AnalyzerFactory(IsoLanguageCode: IsoLanguageCode);
            }

            return(Analyzer);
        }
Ejemplo n.º 30
0
        /**************************************************************************/

        private void BuildWorksheetPageRedirectsAudit(
            MacroscopeJobMaster JobMaster,
            CsvWriter ws
            )
        {
            MacroscopeDocumentCollection DocCollection = JobMaster.GetDocCollection();
            MacroscopeAllowedHosts       AllowedHosts  = JobMaster.GetAllowedHosts();

            {
                ws.WriteField("Origin URL");
                ws.WriteField("Status Code");
                ws.WriteField("Status");
                ws.WriteField("Destination URL");

                ws.NextRecord();
            }

            foreach (string Url in DocCollection.DocumentKeys())
            {
                MacroscopeDocument msDoc = DocCollection.GetDocument(Url: Url);

                if (!msDoc.GetIsRedirect())
                {
                    continue;
                }

                string OriginURL      = msDoc.GetUrlRedirectFrom();
                string StatusCode     = (( int )msDoc.GetStatusCode()).ToString();
                string Status         = msDoc.GetStatusCode().ToString();
                string DestinationURL = msDoc.GetUrlRedirectTo();

                if (string.IsNullOrEmpty(OriginURL))
                {
                    continue;
                }

                if (string.IsNullOrEmpty(DestinationURL))
                {
                    continue;
                }

                this.InsertAndFormatUrlCell(ws, OriginURL);

                this.InsertAndFormatContentCell(ws, StatusCode);

                this.InsertAndFormatContentCell(ws, Status);

                this.InsertAndFormatUrlCell(ws, DestinationURL);

                ws.NextRecord();
            }
        }