Example #1
0
        /// <summary>
        /// Sets up the crawl with all the parameters it needs to continue until finished.
        /// </summary>
        /// <param name="urlToStart">The valid url to start crawling from</param>
        /// <param name="maxAttempts">Number of times to try failed pages (not implemented)</param>
        /// <param name="secondsDelay">Number of seconds to wait between page loads</param>
        /// <param name="steps">Number of steps away from the urlToStart to transverse before stopping</param>
        /// <param name="databaseFileName">Name of the file to store data in</param>
        public void Seed(string urlToStart, int maxAttempts, int secondsDelay, int steps, string databaseFileName)
        {
            try
            {
                _seedUri = new Uri(urlToStart);
            }
            catch (Exception ex)
            {
                throw new ArgumentException("Invalid URI supplied as seed", ex);
            }
            if (!_seedUri.IsWellFormedOriginalString() || !_seedUri.IsAbsoluteUri)
            {
                throw new ArgumentException("Invliad URI supplied as seed");
            }
            var seedNode = new WebNode {
                NodeUri = new Uri(urlToStart)
            };

            WebNodes.Add(seedNode);

            _maxCrawlAttempts = maxAttempts;
            _secondsDelay     = secondsDelay;
            _maxSteps         = steps;

            var dbSetup = new Database();

            dbSetup.ConnectToDatabase(databaseFileName);
            _snapShot   = new SnapShot(databaseFileName);
            _snapShotId = _snapShot.InsertSnapShot(urlToStart, secondsDelay, steps);
        }