/** -------------------------------------------------------------------- **/

        public void AddUrlQueueItem(string Url)
        {
            string NewUrl = Url;

            if (MacroscopePreferencesManager.GetIgnoreQueries())
            {
                NewUrl = MacroscopeUrlUtils.StripQueryString(Url: NewUrl);
            }

            if (MacroscopePreferencesManager.GetIgnoreHashFragments())
            {
                NewUrl = MacroscopeUrlUtils.StripHashFragment(Url: NewUrl);
            }

            if (!this.JobHistory.SeenHistoryItem(Url: NewUrl))
            {
                try
                {
                    MacroscopeJobItem JobItem;

                    JobItem = new MacroscopeJobItem(Url: NewUrl);

                    this.NamedQueueJobItems.AddToNamedQueue(
                        Name: MacroscopeConstants.NamedQueueUrlList,
                        Item: JobItem
                        );
                }
                catch (MacroscopeNamedQueueException ex)
                {
                    this.DebugMsg(string.Format("AddUrlQueueItem: {0}", ex.Message));
                }
            }

            this.AddToProgress(Url: NewUrl);
        }
        public void TestCreateNamedQueue()
        {
            MacroscopeNamedQueue <MacroscopeJobItem> NamedQueue = new MacroscopeNamedQueue <MacroscopeJobItem>();

            NamedQueue.CreateNamedQueue(QUEUENAME);
            MacroscopeJobItem JobItem1 = new MacroscopeJobItem("http://www.company.com/");

            NamedQueue.AddToNamedQueue(QUEUENAME, JobItem1);
            Assert.AreEqual(1, NamedQueue.CountNamedQueueItems(QUEUENAME));
        }
        public void TestFailAddingDuplicateToNamedQueue()
        {
            MacroscopeNamedQueue <MacroscopeJobItem> NamedQueue = new MacroscopeNamedQueue <MacroscopeJobItem>();

            NamedQueue.CreateNamedQueue(QUEUENAME, MacroscopeNamedQueue <MacroscopeJobItem> .MODE.USE_HISTORY);
            MacroscopeJobItem JobItem1 = new MacroscopeJobItem("http://www.company.com/");
            MacroscopeJobItem JobItem2 = new MacroscopeJobItem("http://www.company.com/");

            NamedQueue.AddToNamedQueue(QUEUENAME, JobItem1);
            NamedQueue.AddToNamedQueue(QUEUENAME, JobItem2);
            Assert.AreEqual(1, NamedQueue.CountNamedQueueItems(QUEUENAME));
        }
        public void TestAddRemoveAddAgainToNamedQueue()
        {
            MacroscopeNamedQueue <MacroscopeJobItem> NamedQueue = new MacroscopeNamedQueue <MacroscopeJobItem>();

            NamedQueue.CreateNamedQueue(QUEUENAME, MacroscopeNamedQueue <MacroscopeJobItem> .MODE.USE_HISTORY);

            MacroscopeJobItem JobItem1 = new MacroscopeJobItem("http://www.company.com/");

            NamedQueue.AddToNamedQueue(QUEUENAME, JobItem1);

            Assert.AreEqual(1, NamedQueue.CountNamedQueueItems(QUEUENAME));

            MacroscopeJobItem JobItem2 = NamedQueue.GetNamedQueueItem(QUEUENAME);

            Assert.AreEqual(0, NamedQueue.CountNamedQueueItems(QUEUENAME));

            bool Forgotten = NamedQueue.ForgetNamedQueueItem(QUEUENAME, JobItem1);

            NamedQueue.AddToNamedQueue(QUEUENAME, JobItem1);

            Assert.AreEqual(1, NamedQueue.CountNamedQueueItems(QUEUENAME));
        }
        /** -------------------------------------------------------------------- **/

        public void ForgetUrlQueueItem(string Url)
        {
            MacroscopeJobItem JobItem;
            string            NewUrl = Url;

            if (MacroscopePreferencesManager.GetIgnoreQueries())
            {
                NewUrl = MacroscopeUrlUtils.StripQueryString(Url: NewUrl);
            }

            if (MacroscopePreferencesManager.GetIgnoreHashFragments())
            {
                NewUrl = MacroscopeUrlUtils.StripHashFragment(Url: NewUrl);
            }

            JobItem = new MacroscopeJobItem(Url: NewUrl);

            this.NamedQueueJobItems.ForgetNamedQueueItem(
                Name: MacroscopeConstants.NamedQueueUrlList,
                Item: JobItem
                );
        }
示例#6
0
        /**************************************************************************/

        public async void Execute()
        {
            int MaxFetches = MacroscopePreferencesManager.GetMaxFetchesPerWorker();

            while (MaxFetches > 0)
            {
                if (this.JobMaster.GetThreadsStop())
                {
                    this.DebugMsg(string.Format("JobMaster.GetThreadsStop: {0}", this.JobMaster.GetThreadsStop()));
                    break;
                }
                else
                {
                    MacroscopeJobItem JobItem           = this.JobMaster.GetUrlQueueItem();
                    string            Url               = null;
                    string            RedirectedFromUrl = null;

                    if (JobItem != null)
                    {
                        Url = JobItem.GetItemUrl();
                        RedirectedFromUrl = JobItem.GetItemRedirectedFromUrl();
                    }

                    if (!string.IsNullOrEmpty(Url))
                    {
                        if (!this.CheckIncludeExcludeUrl(Url))
                        {
                            Url = null;
                        }
                    }

                    if (!string.IsNullOrEmpty(Url))
                    {
                        if (
                            !MacroscopePreferencesManager.GetCrawlParentDirectories() &&
                            !MacroscopePreferencesManager.GetCrawlChildDirectories() &&
                            Url != this.JobMaster.GetStartUrl())
                        {
                            Url = null;
                        }
                        else if (
                            !MacroscopePreferencesManager.GetCrawlParentDirectories() ||
                            !MacroscopePreferencesManager.GetCrawlChildDirectories())
                        {
                            this.DebugMsg(string.Format("Running Parent/Child Check: {0}", Url));

                            if (
                                MacroscopePreferencesManager.GetCrawlParentDirectories() &&
                                (!string.IsNullOrEmpty(Url)))
                            {
                                if (!MacroscopeHttpUrlUtils.IsWithinParentDirectory(StartUrl: this.JobMaster.GetParentStartingDirectory(), Url: Url))
                                {
                                    Url = null;
                                }
                            }

                            if (
                                MacroscopePreferencesManager.GetCrawlChildDirectories() &&
                                (!string.IsNullOrEmpty(Url)))
                            {
                                if (!MacroscopeHttpUrlUtils.IsWithinChildDirectory(StartUrl: this.JobMaster.GetChildStartingDirectory(), Url: Url))
                                {
                                    Url = null;
                                }
                            }
                        }
                        else
                        {
                            this.DebugMsg(string.Format("Skipping Parent/Child Check: {0}", Url));
                        }
                    }

                    if (!string.IsNullOrEmpty(Url))
                    {
                        if (MacroscopePreferencesManager.GetDepth() >= 0)
                        {
                            if (MacroscopeHttpUrlUtils.FindUrlDepth(Url: Url) > MacroscopePreferencesManager.GetDepth())
                            {
                                this.DebugMsg(string.Format("URL Too Deep: {0}", Url));
                                Url = null;
                            }
                        }
                    }

                    if (!string.IsNullOrEmpty(Url))
                    {
                        this.DebugMsg(string.Format("Execute: {0}", Url));

                        int Tries = MacroscopePreferencesManager.GetMaxRetries();

                        JobHistory.AddHistoryItem(Url: Url);

                        do
                        {
                            this.DebugMsg(string.Format("Trying Fetch: {0} :: {1}", Tries, Url));

                            MacroscopeConstants.FetchStatus FetchStatus = MacroscopeConstants.FetchStatus.VOID;

                            try
                            {
                                if (!string.IsNullOrEmpty(RedirectedFromUrl))
                                {
                                    FetchStatus = await this.Fetch(Url, RedirectedFromUrl);
                                }
                                else
                                {
                                    FetchStatus = await this.Fetch(Url);
                                }
                            }
                            catch (Exception ex)
                            {
                                this.DebugMsg(string.Format("FetchStatus: {0}", ex.Message));
                                this.DebugMsg(string.Format("Url: {0}", Url));
                                this.DebugMsg(string.Format("FetchStatus: {0}", FetchStatus));
                            }

                            switch (FetchStatus)
                            {
                            case MacroscopeConstants.FetchStatus.ERROR:
                                this.DebugMsg(string.Format("Fetch Failed: {0} :: {1}", Tries, Url));
                                Thread.Sleep(25);
                                break;

                            case MacroscopeConstants.FetchStatus.NETWORK_ERROR:
                                this.DebugMsg(string.Format("Fetch Failed: {0} :: {1}", Tries, Url));
                                Thread.Sleep(25);
                                break;

                            default:
                                this.JobMaster.NotifyWorkersFetched(Url: Url);
                                Tries = 0;
                                break;
                            }

                            Tries--;
                        } while(Tries > 0);

                        if (this.CrawlDelay > 0)
                        {
                            this.DebugMsg(string.Format("CRAWL DELAY: Sleeping for {0} seconds...", this.CrawlDelay));
                            Thread.Sleep(CrawlDelay * 1000);
                        }
                    }
                }

                MaxFetches--;

                //Thread.Yield();
            }

            this.JobMaster.NotifyWorkersDone();
        }
        /** -------------------------------------------------------------------- **/

        public MacroscopeJobItem GetUrlQueueItem()
        {
            MacroscopeJobItem JobItem = this.NamedQueueJobItems.GetNamedQueueItem(MacroscopeConstants.NamedQueueUrlList);

            return(JobItem);
        }
        /**************************************************************************/

        public void Execute()
        {
            int MaxFetches = MacroscopePreferencesManager.GetMaxFetchesPerWorker();

            while (MaxFetches > 0)
            {
                if (this.JobMaster.GetThreadsStop())
                {
                    DebugMsg(string.Format("JobMaster.GetThreadsStop: {0}", this.JobMaster.GetThreadsStop()));
                    break;
                }
                else
                {
                    MacroscopeJobItem JobItem = this.JobMaster.GetUrlQueueItem();
                    string            Url     = null;

                    if (JobItem != null)
                    {
                        Url = JobItem.GetItemUrl();
                    }

                    if (!string.IsNullOrEmpty(Url))
                    {
                        if (!this.CheckIncludeExcludeUrl(Url))
                        {
                            Url = null;
                        }
                    }

                    if (!string.IsNullOrEmpty(Url))
                    {
                        if (
                            !MacroscopePreferencesManager.GetCrawlParentDirectories() &&
                            !MacroscopePreferencesManager.GetCrawlChildDirectories() &&
                            Url != this.JobMaster.GetStartUrl())
                        {
                            Url = null;
                        }
                        else if (
                            !MacroscopePreferencesManager.GetCrawlParentDirectories() ||
                            !MacroscopePreferencesManager.GetCrawlChildDirectories())
                        {
                            DebugMsg(string.Format("Running Parent/Child Check: {0}", Url));

                            if (
                                MacroscopePreferencesManager.GetCrawlParentDirectories() &&
                                (!string.IsNullOrEmpty(Url)))
                            {
                                if (!this.JobMaster.IsWithinParentDirectory(Url))
                                {
                                    Url = null;
                                }
                            }

                            if (
                                MacroscopePreferencesManager.GetCrawlChildDirectories() &&
                                (!string.IsNullOrEmpty(Url)))
                            {
                                if (!this.JobMaster.IsWithinChildDirectory(Url))
                                {
                                    Url = null;
                                }
                            }
                        }
                        else
                        {
                            DebugMsg(string.Format("Skipping Parent/Child Check: {0}", Url));
                        }
                    }

                    if (!string.IsNullOrEmpty(Url))
                    {
                        DebugMsg(string.Format("Execute: {0}", Url));

                        int Tries = MacroscopePreferencesManager.GetMaxRetries();

                        do
                        {
                            DebugMsg(string.Format("Trying Fetch: {0} :: {1}", Tries, Url));

                            if (
                                (this.Fetch(Url) == MacroscopeConstants.FetchStatus.ERROR) ||
                                (this.Fetch(Url) == MacroscopeConstants.FetchStatus.ERROR))
                            {
                                DebugMsg(string.Format("Fetch Failed: {0} :: {1}", Tries, Url));
                                Thread.Sleep(1000);
                            }
                            else
                            {
                                this.JobMaster.NotifyWorkersFetched(Url);
                                break;
                            }

                            Tries--;
                        } while(Tries > 0);

                        if (this.CrawlDelay > 0)
                        {
                            DebugMsg(string.Format("CRAWL DELAY: Sleeping for {0} seconds...", this.CrawlDelay));
                            Thread.Sleep(CrawlDelay * 1000);
                        }
                    }
                }

                MaxFetches--;

                Thread.Yield();
            }

            this.JobMaster.NotifyWorkersDone();
        }