Esempio n. 1
0
        /** TEXT Sitemap Generators ***********************************************/

        public List <string> GenerateTextSitemap(string Host)
        {
            Dictionary <string, Boolean> Dedupe = new Dictionary <string, Boolean> (DocCollection.CountDocuments());

            List <string> SitemapText = new List <string> (this.DocCollection.CountDocuments());

            foreach (MacroscopeDocument msDoc in this.DocCollection.IterateDocuments())
            {
                Boolean Proceed = false;

                if (!msDoc.GetStatusCode().Equals(HttpStatusCode.OK))
                {
                    continue;
                }

                if (
                    (!msDoc.GetIsInternal()) ||
                    (msDoc.GetIsRedirect()))
                {
                    continue;
                }

                if (
                    msDoc.GetIsHtml() ||
                    msDoc.GetIsPdf())
                {
                    Proceed = true;
                }

                if (!string.IsNullOrEmpty(Host))
                {
                    if (msDoc.GetHostAndPort().Equals(Host))
                    {
                        Proceed = true;
                    }
                    else
                    {
                        Proceed = false;
                    }
                }

                if (Proceed)
                {
                    SitemapText.Add(msDoc.GetUrl());

                    if (
                        MacroscopePreferencesManager.GetSitemapIncludeLinkedPdfs() &&
                        msDoc.GetIsHtml())
                    {
                        this.GenerateTextSitemapPdfEntries(
                            msDoc: msDoc,
                            SitemapText: SitemapText,
                            Dedupe: Dedupe
                            );
                    }
                }
            }

            return(SitemapText);
        }
        /** Sitemaps **************************************************************/

        public async Task <List <string> > GetSitemapsAsList(string Url)
        {
            List <string> SitemapsList = new List <string>();

            if (MacroscopePreferencesManager.GetFollowRobotsProtocol())
            {
                Robots robot = await this.FetchRobot(Url : Url);

                try
                {
                    if ((robot != null) && (robot.Sitemaps != null))
                    {
                        foreach (Sitemap SitemapEntry in robot.Sitemaps)
                        {
                            string SitemapUrl    = SitemapEntry.Url.ToString();
                            string SitemapUrlAbs = MacroscopeHttpUrlUtils.MakeUrlAbsolute(BaseUrl: Url, Url: SitemapUrl);

                            SitemapsList.Add(SitemapUrlAbs);

                            this.DebugMsg(string.Format("ROBOTS SitemapUrl: {0}", SitemapUrl));
                        }
                    }
                }
                catch (Exception ex)
                {
                    this.DebugMsg(ex.Message);
                }
            }

            return(SitemapsList);
        }
        /** -------------------------------------------------------------------- **/

        public static IMacroscopeAnalyzeReadability AnalyzerFactory(string IsoLanguageCode)
        {
            IMacroscopeAnalyzeReadability Analyzer = null;

            if (IsoLanguageCode.Equals("x-default"))
            {
                IsoLanguageCode = "en";
            }

            switch (IsoLanguageCode)
            {
            case "en":
                switch (MacroscopePreferencesManager.GetAnalyzeTextReadabilityEnglishAlgorithm())
                {
                case MacroscopeAnalyzeReadability.AnalyzeReadabilityEnglishAlgorithm.FLESCH_KINCAID:
                    Analyzer = new MacroscopeAnalyzeReadabilityFleschKincaid();
                    break;

                case MacroscopeAnalyzeReadability.AnalyzeReadabilityEnglishAlgorithm.SMOG:
                    Analyzer = new MacroscopeAnalyzeReadabilitySmog();
                    break;

                default:
                    break;
                }
                break;

            default:
                break;
            }

            return(Analyzer);
        }
Esempio n. 4
0
        /**************************************************************************/

        public void RenderListView(MacroscopeJobMaster JobMaster)
        {
            Dictionary <String, bool> Blocked = JobMaster.GetBlockedByRobotsList();

            if (Blocked.Count == 0)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem>(1);

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = (decimal)Blocked.Count;
            decimal MajorPercentage = ((decimal)100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (string Url in Blocked.Keys)
            {
                bool IsInternal = JobMaster.GetAllowedHosts().IsInternalUrl(Url);

                this.RenderListView(
                    ListViewItems: ListViewItems,
                    Url: Url,
                    IsBlocked: Blocked[Url],
                    IsInternal: IsInternal
                    );

                Count++;
                MajorPercentage = ((decimal)100 / TotalDocs) * Count;

                ProgressForm.UpdatePercentages(
                    Title: null,
                    Message: null,
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
        /**************************************************************************/

        public static void HtmlAndLinkedAssets()
        {
            MacroscopePreferencesManager.SetDefaultValues();

            HtmlOnly();

            MacroscopePreferencesManager.SetFetchStylesheets(true);
            MacroscopePreferencesManager.SetFetchJavascripts(true);
            MacroscopePreferencesManager.SetFetchImages(true);
            MacroscopePreferencesManager.SetFetchAudio(true);
            MacroscopePreferencesManager.SetFetchVideo(true);
            MacroscopePreferencesManager.SetFetchXml(true);
            MacroscopePreferencesManager.SetFetchBinaries(false);

            MacroscopePreferencesManager.SetProcessAudio(true);
            MacroscopePreferencesManager.SetProcessBinaries(false);
            MacroscopePreferencesManager.SetProcessImages(true);
            MacroscopePreferencesManager.SetProcessJavascripts(true);
            MacroscopePreferencesManager.SetProcessPdfs(false);
            MacroscopePreferencesManager.SetProcessStylesheets(true);
            MacroscopePreferencesManager.SetProcessVideo(false);
            MacroscopePreferencesManager.SetProcessXml(false);

            MacroscopePreferencesManager.SavePreferences();
        }
        public void TestDetectLanguage()
        {
            List <string> UrlList = new List <string> ();

            UrlList.Add("https://nazuke.github.io/SEOMacroscope/");

            MacroscopePreferencesManager.SetDetectLanguage(Enabled: true);
            MacroscopePreferencesManager.SetRequestTimeout(Seconds: 10);

            for (int i = 0; i < 10; i++)
            {
                foreach (string Url in UrlList)
                {
                    MacroscopeDocument msDoc = new MacroscopeDocument(Url: Url);

                    Assert.IsNotNull(msDoc, string.Format("FAIL: {0}", Url));

                    Assert.IsTrue(msDoc.Execute(), string.Format("FAIL: {0}", "Execute()"));

                    Assert.IsTrue(msDoc.GetIsHtml(), string.Format("FAIL: {0}", Url));

                    Assert.IsNotNullOrEmpty(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle()));

                    string LanguageTitle       = msDoc.GetTitleLanguage();
                    string LanguageDescription = msDoc.GetDescriptionLanguage();
                    string LanguageBodyText    = msDoc.GetDocumentTextLanguage();

                    Assert.AreEqual("en", LanguageTitle, string.Format("FAIL: {0} :: {1}", "LanguageTitle", LanguageTitle));

                    Assert.AreEqual("en", LanguageDescription, string.Format("FAIL: {0} :: {1}", "LanguageDescription", LanguageDescription));

                    Assert.AreEqual("en", LanguageBodyText, string.Format("FAIL: {0} :: {1}", "LanguageBodyText", LanguageBodyText));
                }
            }
        }
Esempio n. 7
0
        /**************************************************************************/

        public void RenderListViewSearchTargetUrls(
            MacroscopeDocumentCollection DocCollection,
            string UrlFragment
            )
        {
            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments());

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count     = 0;
            decimal TotalDocs = (decimal)DocCollection.CountDocuments();

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Displaying Links",
                    Message: "Processing links in document collection for display:",
                    MajorPercentage: ((decimal)100 / TotalDocs) * Count,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Url = msDoc.GetUrl();

                if (msDoc != null)
                {
                    this.RenderListViewSearchTargetUrls(
                        ListViewItems: ListViewItems,
                        msDoc: msDoc,
                        Url: Url,
                        UrlFragment: UrlFragment
                        );
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: ((decimal)100 / TotalDocs) * Count,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
Esempio n. 8
0
        /**************************************************************************/

        private MacroscopeLink AddSitemapTextOutlink(
            string AbsoluteUrl,
            MacroscopeConstants.InOutLinkType LinkType,
            Boolean Follow
            )
        {
            MacroscopeLink OutLink = null;

            if (!MacroscopePreferencesManager.GetCheckExternalLinks())
            {
                MacroscopeAllowedHosts AllowedHosts = this.DocCollection.GetAllowedHosts();
                if (AllowedHosts != null)
                {
                    if (!AllowedHosts.IsAllowedFromUrl(Url: AbsoluteUrl))
                    {
                        return(OutLink);
                    }
                }
            }

            OutLink = new MacroscopeLink(
                SourceUrl: this.GetUrl(),
                TargetUrl: AbsoluteUrl,
                LinkType: LinkType,
                Follow: Follow
                );

            this.Outlinks.Add(OutLink);

            return(OutLink);
        }
Esempio n. 9
0
        /** -------------------------------------------------------------------- **/

        private void _ReconfigureFileMenuRecentUrlsItems()
        {
            List <string>           CrawlHistory   = MacroscopePreferencesManager.GetCrawlHistory();
            ToolStripItemCollection RecentUrlItems = this.recentURLsToolStripMenuItem.DropDownItems;

            RecentUrlItems.Clear();
            CrawlHistory.Reverse();

            foreach (string Url in CrawlHistory)
            {
                string        UrlTruncated = Url;
                ToolStripItem UrlItem      = RecentUrlItems.Add(text: "...");

                if (Url.Length > 64)
                {
                    UrlTruncated = Url.Substring(0, 64) + "...";
                }

                UrlTruncated   = UrlTruncated.Replace("&", "&&");
                UrlItem.Tag    = Url;
                UrlItem.Text   = UrlTruncated;
                UrlItem.Click += ClickCallbackFileMenuRecentUrlsItem;
            }

            {
                ToolStripSeparator separator = new ToolStripSeparator();
                RecentUrlItems.Add(separator);
            }

            {
                ToolStripItem UrlItem = RecentUrlItems.Add(text: "Clear Recent URLs");
                UrlItem.Click += ClickCallbackFileMenuRecentUrlsClear;
            }
        }
Esempio n. 10
0
        /** -------------------------------------------------------------------- **/

        public void AddUrlQueueItem(string Url)
        {
            string NewUrl = Url;

            if (MacroscopePreferencesManager.GetIgnoreQueries())
            {
                NewUrl = MacroscopeUrlUtils.StripQueryString(Url: NewUrl);
            }

            if (MacroscopePreferencesManager.GetIgnoreHashFragments())
            {
                NewUrl = MacroscopeUrlUtils.StripHashFragment(Url: NewUrl);
            }

            if (!this.JobHistory.SeenHistoryItem(Url: NewUrl))
            {
                try
                {
                    MacroscopeJobItem JobItem;

                    JobItem = new MacroscopeJobItem(Url: NewUrl);

                    this.NamedQueueJobItems.AddToNamedQueue(
                        Name: MacroscopeConstants.NamedQueueUrlList,
                        Item: JobItem
                        );
                }
                catch (MacroscopeNamedQueueException ex)
                {
                    this.DebugMsg(string.Format("AddUrlQueueItem: {0}", ex.Message));
                }
            }

            this.AddToProgress(Url: NewUrl);
        }
Esempio n. 11
0
        /**************************************************************************/

        public MacroscopeDisplayRedirectChains(MacroscopeMainForm MainForm, ListView TargetListView)
            : base(MainForm, TargetListView)
        {
            this.MainForm        = MainForm;
            this.DisplayListView = TargetListView;
            this.DocumentCount   = this.MainForm.macroscopeOverviewTabPanelInstance.toolStripLabelRedirectChainsItems;

            this.MaximumHops = MacroscopePreferencesManager.GetRedirectChainsMaxHops();

            if (this.MainForm.InvokeRequired)
            {
                this.MainForm.Invoke(
                    new MethodInvoker(
                        delegate
                {
                    this.ConfigureListView();
                }
                        )
                    );
            }
            else
            {
                this.ConfigureListView();
            }
        }
Esempio n. 12
0
        /** Crawl Delay ***********************************************************/

        public async Task <int> GetCrawlDelay(string Url)
        {
            int    Delay = 0;
            Robots robot;

            if (!MacroscopePreferencesManager.GetFollowRobotsProtocol())
            {
                return(Delay);
            }

            robot = await this.FetchRobot(Url : Url);

            if (robot != null)
            {
                long CrawlDelayTime = robot.CrawlDelay(userAgent: this.UserAgentName());

                if (CrawlDelayTime == 0)
                {
                    CrawlDelayTime = robot.CrawlDelay("*");
                }

                if (CrawlDelayTime > 0)
                {
                    Delay = (int)(CrawlDelayTime / 1000);
                }

                DebugMsg(string.Format("ROBOTS CrawlDelayTime: {0}", CrawlDelayTime));
                DebugMsg(string.Format("ROBOTS Delay: {0}", Delay));
            }

            return(Delay);
        }
Esempio n. 13
0
        /** -------------------------------------------------------------------- **/

        private void ScanningControlsEnable()
        {
            this.loadUrlListToolStripMenuItem.Enabled    = true;
            this.exportToolStripMenuItem.Enabled         = true;
            this.taskParametersToolStripMenuItem.Enabled = true;
            this.reportsToolStripMenuItem.Enabled        = true;

            this.textBoxStartUrl.Enabled = true;
            this.ButtonStart.Enabled     = true;
            this.ButtonStop.Enabled      = false;
            this.ButtonReset.Enabled     = false;

            this.ProgressBarScan.Visible = false;

            this.toolStripButtonRetryBrokenLinks.Enabled   = true;
            this.toolStripButtonRetryTimedOutLinks.Enabled = true;

            if (MacroscopePreferencesManager.GetAnalyzeClickPaths())
            {
                this.toolStripButtonRecalculateClickPaths.Enabled = true;
            }
            else
            {
                this.toolStripButtonRecalculateClickPaths.Enabled = false;
            }

            this.ReconfigureReportsMenu();

            this.ReconfigureStructureOverviewControls();

            this.ReconfigureSearchCollectionControls();
        }
Esempio n. 14
0
        /**************************************************************************/

        public MacroscopeJobWorker(MacroscopeJobMaster JobMaster)
        {
            this.SuppressDebugMsg = true;

            this.JobMaster = JobMaster;

            this.DocCollection = this.JobMaster.GetDocCollection();

            this.AllowedHosts = this.JobMaster.GetAllowedHosts();

            this.IncludeExcludeUrls = this.JobMaster.GetIncludeExcludeUrls();

            if (MacroscopePreferencesManager.GetCrawlDelay() > 0)
            {
                this.CrawlDelay = MacroscopePreferencesManager.GetCrawlDelay();
            }

            if (MacroscopePreferencesManager.GetFollowRobotsProtocol())
            {
                if (this.JobMaster.GetCrawlDelay() > 0)
                {
                    this.CrawlDelay = this.JobMaster.GetCrawlDelay();
                }
            }
        }
        /** SEARCH INDEX **********************************************************/

        public List <MacroscopeDocument> ExecuteSearchForDocuments(
            MacroscopeSearchIndex.SearchMode SMode,
            string [] Terms
            )
        {
            List <MacroscopeDocument> DocList = null;
            bool CaseSensitive = MacroscopePreferencesManager.GetCaseSensitiveTextIndexing();

            for (int i = 0; i < Terms.Length; i++)
            {
                if (!CaseSensitive)
                {
                    Terms[i] = Terms[i].ToLower();
                }
            }

            switch (SMode)
            {
            case MacroscopeSearchIndex.SearchMode.OR:
                DocList = this.ExecuteSearchForDocumentsOR(Terms);
                break;

            case MacroscopeSearchIndex.SearchMode.AND:
                DocList = this.ExecuteSearchForDocumentsAND(Terms);
                break;
            }

            return(DocList);
        }
        /** Render One Document *******************************************/

        public virtual void RenderListView(MacroscopeDocument msDoc, string Url)
        {
            if (msDoc == null)
            {
                return;
            }

            List <ListViewItem> ListViewItems = new List <ListViewItem> (1);

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )1;
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.ControlBox = false;

                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            Application.DoEvents();

            if (msDoc != null)
            {
                this.RenderListView(
                    ListViewItems: ListViewItems,
                    msDoc: msDoc,
                    Url: msDoc.GetUrl()
                    );
            }

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                Count++;
                MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                ProgressForm.UpdatePercentages(
                    Title: null,
                    Message: null,
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
        /** Render Entire DocCollection *******************************************/

        public void RenderTreeView(MacroscopeDocumentCollection DocCollection)
        {
            if (DocCollection.CountDocuments() == 0)
            {
                return;
            }

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count           = 0;
            decimal TotalDocs       = ( decimal )DocCollection.CountDocuments();
            decimal MajorPercentage = (( decimal )100 / TotalDocs) * Count;

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Preparing Display",
                    Message: "Processing document collection for display:",
                    MajorPercentage: MajorPercentage,
                    ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                    );
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                Application.DoEvents();

                if (msDoc == null)
                {
                    continue;
                }

                string Url = msDoc.GetUrl();

                this.RenderTreeView(msDoc, Url);

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    MajorPercentage = (( decimal )100 / TotalDocs) * Count;

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: MajorPercentage,
                        ProgressLabelMajor: string.Format("Document {0} / {1}", Count, TotalDocs)
                        );
                }
            }

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            if (ProgressForm != null)
            {
                ProgressForm.Dispose();
            }
        }
Esempio n. 18
0
 /**************************************************************************/
 public void ReconfigureReportsMenu()
 {
     if( MacroscopePreferencesManager.GetEnableLevenshteinDeduplication() ) {
     this.pagesCSVReportToolStripMenuItem.Enabled = true;
       } else {
     this.pagesCSVReportToolStripMenuItem.Enabled = false;
       }
 }
Esempio n. 19
0
        /**************************************************************************/

        private void SetPrefsFormControlFieldToDefaults(object sender, EventArgs e)
        {
            Button DefaultsButton = ( Button )sender;

            MacroscopePreferencesManager.SetDefaultValues();

            this.SetPrefsFormControlFields();
        }
Esempio n. 20
0
        /**************************************************************************/

        public void RenderListViewSearchSourceUrls(
            MacroscopeDocumentCollection DocCollection,
            string UrlFragment
            )
        {
            List <ListViewItem> ListViewItems = new List <ListViewItem>(DocCollection.CountDocuments());

            MacroscopeSinglePercentageProgressForm ProgressForm = new MacroscopeSinglePercentageProgressForm(this.MainForm);
            decimal Count     = 0;
            decimal TotalDocs = (decimal)DocCollection.CountDocuments();

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.UpdatePercentages(
                    Title: "Displaying Links",
                    Message: "Processing links in document collection for display:",
                    MajorPercentage: ((decimal)100 / TotalDocs) * Count,
                    ProgressLabelMajor: "Documents Processed"
                    );
            }

            foreach (MacroscopeDocument msDoc in DocCollection.IterateDocuments())
            {
                string Url = msDoc.GetUrl();

                if (Url.IndexOf(UrlFragment, StringComparison.CurrentCulture) >= 0)
                {
                    this.RenderListView(
                        ListViewItems: ListViewItems,
                        DocCollection: DocCollection,
                        msDoc: msDoc,
                        Url: Url
                        );
                }

                if (MacroscopePreferencesManager.GetShowProgressDialogues())
                {
                    Count++;
                    TotalDocs = (decimal)DocCollection.CountDocuments();

                    ProgressForm.UpdatePercentages(
                        Title: null,
                        Message: null,
                        MajorPercentage: ((decimal)100 / TotalDocs) * Count,
                        ProgressLabelMajor: null
                        );
                }
            }

            this.DisplayListView.Items.AddRange(ListViewItems.ToArray());

            if (MacroscopePreferencesManager.GetShowProgressDialogues())
            {
                ProgressForm.DoClose();
            }

            ProgressForm.Dispose();
        }
Esempio n. 21
0
        /**************************************************************************/

        private void ProcessOutlinks(MacroscopeDocument msDoc)
        {
            if (
                (this.JobMaster.GetRunTimeMode() == MacroscopeConstants.RunTimeMode.LISTFILE) ||
                (this.JobMaster.GetRunTimeMode() == MacroscopeConstants.RunTimeMode.LISTTEXT) ||
                (this.JobMaster.GetRunTimeMode() == MacroscopeConstants.RunTimeMode.SITEMAP))
            {
                if (!MacroscopePreferencesManager.GetScanSitesInList())
                {
                    return;
                }
            }

            foreach (MacroscopeLink Outlink in msDoc.IterateOutlinks())
            {
                Boolean Proceed = true;

                if (!Outlink.GetDoFollow())
                {
                    continue;
                }

                if (Outlink.GetTargetUrl() == null)
                {
                    continue;
                }

                if (this.JobMaster.GetJobHistory().SeenHistoryItem(Outlink.GetTargetUrl()))
                {
                    continue;
                }

                if (this.JobMaster.GetPageLimit() > -1)
                {
                    if (this.JobMaster.GetPageLimitCount() >= this.JobMaster.GetPageLimit())
                    {
                        this.DebugMsg(
                            string.Format(
                                "PAGE LIMIT REACHED: {0} :: {1}",
                                this.JobMaster.GetPageLimit(),
                                this.JobMaster.GetPageLimitCount()
                                )
                            );
                        Proceed = false;
                    }
                }

                if (Proceed)
                {
                    this.JobMaster.AddUrlQueueItem(
                        Url: Outlink.GetTargetUrl(),
                        Check: true
                        );
                }
            }
        }
        /**************************************************************************/

        public static void HtmlAndPdfs()
        {
            MacroscopePreferencesManager.SetDefaultValues();

            HtmlOnly();

            MacroscopePreferencesManager.SetProcessPdfs(true);

            MacroscopePreferencesManager.SavePreferences();
        }
Esempio n. 23
0
        /** -------------------------------------------------------------------- **/

        public async Task <bool> ApplyRobotRule(string Url)
        {
            bool Allowed = true;

            if (MacroscopePreferencesManager.GetFollowRobotsProtocol())
            {
                Allowed = await this.CheckRobotRule(Url : Url);
            }

            return(Allowed);
        }
Esempio n. 24
0
        /**************************************************************************/

        public void ReconfigureStructureOverviewControls()
        {
            if (MacroscopePreferencesManager.GetEnableTextIndexing())
            {
                this.macroscopeOverviewTabPanelInstance.toolStripStructureSearchTextBoxSearch.Enabled = true;
            }
            else
            {
                this.macroscopeOverviewTabPanelInstance.toolStripStructureSearchTextBoxSearch.Enabled = false;
            }
        }
Esempio n. 25
0
        /**************************************************************************/

        private void ConfigureListViewColumns()
        {
            this.MaximumHops = MacroscopePreferencesManager.GetRedirectChainsMaxHops();
            this.DisplayListView.Columns.Clear();
            for (int iHop = 1; iHop <= this.MaximumHops; iHop++)
            {
                this.DisplayListView.Columns.Add(string.Format("HOP_{0}_URL", iHop), string.Format("Hop {0} URL", iHop));
                this.DisplayListView.Columns.Add(string.Format("HOP_{0}_STATUS", iHop), string.Format("Hop {0} Status", iHop));
            }
            this.DisplayListView.AutoResizeColumns(ColumnHeaderAutoResizeStyle.HeaderSize);
        }
        /**************************************************************************/

        public static void HrefLangMatrix()
        {
            MacroscopePreferencesManager.SetDefaultValues();

            HtmlOnly();

            MacroscopePreferencesManager.SetFollowCanonicalLinks(true);
            MacroscopePreferencesManager.SetFollowAlternateLinks(true);
            MacroscopePreferencesManager.SetFollowHrefLangLinks(true);

            MacroscopePreferencesManager.SavePreferences();
        }
Esempio n. 27
0
        /** Generate Robot URL ****************************************************/

        public static string GenerateRobotUrl(string Url)
        {
            string RobotUrl = null;

            if (MacroscopePreferencesManager.GetFollowRobotsProtocol())
            {
                DebugMsgStatic(string.Format("ROBOTS Disabled: {0}", Url));

                Uri    BaseUri      = null;
                string BaseUriPort  = "";
                Uri    RobotsUri    = null;
                string RobotsTxtUrl = null;

                try
                {
                    BaseUri = new Uri(Url, UriKind.Absolute);

                    if (BaseUri.Port > 0)
                    {
                        BaseUriPort = string.Format(":{0}", BaseUri.Port);
                    }

                    RobotsUri = new Uri(
                        string.Format(
                            "{0}://{1}{2}{3}",
                            BaseUri.Scheme,
                            BaseUri.Host,
                            BaseUriPort,
                            "/robots.txt"
                            ),
                        UriKind.Absolute
                        );

                    RobotsTxtUrl = RobotsUri.ToString();
                }
                catch (InvalidOperationException ex)
                {
                    DebugMsgStatic(string.Format("GenerateRobotUrl: {0}", ex.Message));
                }
                catch (UriFormatException ex)
                {
                    DebugMsgStatic(string.Format("GenerateRobotUrl: {0}", ex.Message));
                }

                if (!string.IsNullOrEmpty(RobotsTxtUrl))
                {
                    RobotUrl = RobotsTxtUrl;
                }
            }

            return(RobotUrl);
        }
Esempio n. 28
0
        /** -------------------------------------------------------------------- **/

        public void ProbeRobotsFile(string Url)
        {
            if (MacroscopePreferencesManager.GetFollowSitemapLinks())
            {
                List <string> SitemapList = Robots.GetSitemapsAsList(Url);
                if (SitemapList.Count > 0)
                {
                    for (int i = 0; i < SitemapList.Count; i++)
                    {
                        this.AddUrlQueueItem(Url: SitemapList[i]);
                    }
                }
            }
        }
Esempio n. 29
0
        /**************************************************************************/

        private Boolean Check()
        {
            // TODO: Increase level of detail here.

            HttpWebRequest  req = null;
            HttpWebResponse res = null;
            Boolean         IsAvailableCheck = false;

            try
            {
                req           = WebRequest.CreateHttp(this.Url);
                req.Method    = "HEAD";
                req.Timeout   = 10000;
                req.KeepAlive = false;
                req.Host      = MacroscopeUrlUtils.GetHostnameAndPortFromUrl(this.Url);
                req.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate;

                MacroscopePreferencesManager.EnableHttpProxy(req);

                using (res = ( HttpWebResponse )req.GetResponse())
                {
                    DebugMsg(string.Format("MacroscopeHrefLang Status: {0}", res.StatusCode));

                    if (res.StatusCode == HttpStatusCode.OK)
                    {
                        IsAvailableCheck = true;

                        this.ProcessResponseHttpHeaders(req: req, res: res);
                    }
                    else
                    {
                        IsAvailableCheck = false;
                    }

                    res.Close();
                }
            }
            catch (UriFormatException ex)
            {
                DebugMsg(string.Format("MacroscopeHrefLang UriFormatException: {0}", ex.Message));
            }
            catch (WebException ex)
            {
                DebugMsg(string.Format("MacroscopeHrefLang WebException: {0}", ex.Message));
            }

            return(IsAvailableCheck);
        }
Esempio n. 30
0
        public async Task TestDetectLanguage()
        {
            MacroscopeJobMaster          JobMaster;
            MacroscopeDocumentCollection DocCollection;
            List <string> UrlList = new List <string>();

            UrlList.Add("https://nazuke.github.io/SEOMacroscope/");
            MacroscopePreferencesManager.SetDefaultValues();
            MacroscopePreferencesManager.SetDetectLanguage(Enabled: true);
            MacroscopePreferencesManager.SetRequestTimeout(Seconds: 10);

            JobMaster = new MacroscopeJobMaster(
                JobRunTimeMode: MacroscopeConstants.RunTimeMode.LIVE,
                TaskController: this
                );

            DocCollection = new MacroscopeDocumentCollection(JobMaster: JobMaster);

            for (int i = 0; i < 10; i++)
            {
                foreach (string Url in UrlList)
                {
                    MacroscopeDocument msDoc = DocCollection.CreateDocument(Url: Url);

                    Assert.IsNotNull(msDoc, string.Format("FAIL: {0}", Url));

                    bool ExecuteResult = await msDoc.Execute();

                    Assert.IsTrue(ExecuteResult, string.Format("FAIL: {0}", "Execute()"));

                    Assert.IsTrue(msDoc.IsDocumentType(Type: MacroscopeConstants.DocumentType.HTML), string.Format("FAIL: {0}", Url));

                    Assert.IsNotNull(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle()));

                    Assert.IsNotEmpty(msDoc.GetTitle(), string.Format("FAIL: {0}", msDoc.GetTitle()));

                    string LanguageTitle       = msDoc.GetTitleLanguage();
                    string LanguageDescription = msDoc.GetDescriptionLanguage();
                    string LanguageBodyText    = msDoc.GetDocumentTextLanguage();

                    Assert.AreEqual("en", LanguageTitle, string.Format("FAIL: {0} :: {1}", "LanguageTitle", LanguageTitle));

                    Assert.AreEqual("en", LanguageDescription, string.Format("FAIL: {0} :: {1}", "LanguageDescription", LanguageDescription));

                    Assert.AreEqual("en", LanguageBodyText, string.Format("FAIL: {0} :: {1}", "LanguageBodyText", LanguageBodyText));
                }
            }
        }