Beispiel #1
0
        public static void SpawnExternalCrawl(IRepository repo, int sessionId, int crawlerId, Uri externalUri)
        {
            //if we are less than max concurrent crawls
            //and this crawl is not defined
            //spawn the process

            int inProgress = repo.GetCountOfCrawlsInProgress(sessionId);

            if (inProgress < repo.GetSession(sessionId).MaxConcurrentCrawls)
            {
                int  nextCrawlerId  = -1;
                bool alreadyDefined = false;
                do
                {
                    nextCrawlerId  = repo.GetNextCrawlerId(sessionId);
                    alreadyDefined = repo.GetCrawl(sessionId, nextCrawlerId) != null;
                } while (alreadyDefined || nextCrawlerId <= crawlerId);

                //Spawn
                {
                    string           args = string.Format("{0} {1} {2}", sessionId, nextCrawlerId, externalUri.AbsoluteUri);
                    ProcessStartInfo psi  = new ProcessStartInfo("ThrongBot.CrawlRunner.exe", args);

                    Process p = Process.Start(psi);

                    Console.WriteLine("Process {0} spawned, crawlerId: {1}, seedUrl: {1}", p.Id, nextCrawlerId, externalUri.AbsoluteUri);
                }
            }
        }
Beispiel #2
0
        public static bool CanExternalCrawlBeSpawned(IRepository repo, int sessionId)
        {
            // check config canSpawnCrawlsConfigSetting first
            bool canSpawn = false;

            int inProgress = repo.GetCountOfCrawlsInProgress(sessionId);

            if (inProgress < repo.GetSession(sessionId).MaxConcurrentCrawls)
            {
                canSpawn = true;
            }

            return(canSpawn);
        }
Beispiel #3
0
        public static void SpawnExternalCrawl(IRepository repo, int sessionId, int crawlerId, Uri externalUri)
        {
            //if we are less than max concurrent crawls
            //and this crawl is not defined
            //spawn the process

            int inProgress = repo.GetCountOfCrawlsInProgress(sessionId);
            if (inProgress < repo.GetSession(sessionId).MaxConcurrentCrawls)
            {
                int nextCrawlerId = -1;
                bool alreadyDefined = false;
                do
                {
                    nextCrawlerId = repo.GetNextCrawlerId(sessionId);
                    alreadyDefined = repo.GetCrawl(sessionId, nextCrawlerId) != null;
                } while (alreadyDefined || nextCrawlerId <= crawlerId);

                //Spawn
                {
                    string args = string.Format("{0} {1} {2}", sessionId, nextCrawlerId, externalUri.AbsoluteUri);
                    ProcessStartInfo psi = new ProcessStartInfo("ThrongBot.CrawlRunner.exe", args);

                    Process p = Process.Start(psi);

                    Console.WriteLine("Process {0} spawned, crawlerId: {1}, seedUrl: {1}", p.Id, nextCrawlerId, externalUri.AbsoluteUri);
                }
            }
        }
Beispiel #4
0
        public static bool CanExternalCrawlBeSpawned(IRepository repo, int sessionId)
        {
            // check config canSpawnCrawlsConfigSetting first
            bool canSpawn = false;

            int inProgress = repo.GetCountOfCrawlsInProgress(sessionId);
            if (inProgress < repo.GetSession(sessionId).MaxConcurrentCrawls)
            {
                canSpawn = true;
            }

            return canSpawn;
        }