public CrawlerProcessing(int maxConcurrentDownload, int sleepTime, Uri uri, Web.RobotsTxt robots)
 {
     CrawlList = new CrawlList(robots);
     client    = new HttpClient();
     this.maxConcurrentDownload = maxConcurrentDownload;
     ServicePointManager.DefaultConnectionLimit = maxConcurrentDownload;
     SleepTime = sleepTime;
     BaseUri   = uri;
     Robots    = robots;
 }
        public static bo.Web.RobotsTxt ParseRobotsTxt(string robotsTxt)
        {
            bo.Web.RobotsTxt robots = new Web.RobotsTxt();

            string[] lines = robotsTxt.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
            // go through the lines and parse them
            for (int i = 0; i < lines.Length; i++)
            {
                if (!lines[i].Trim().StartsWith("#")) // checking for commented lines
                {
                }
            }

            string[] user_agents = Regex.Split(robotsTxt, "User-agent:");
            string   userAgents  = "";

            foreach (String agent in user_agents)
            {
                if (!agent.Contains("#")) // only ones where there is no comments at the beginning
                {
                    if (agent.Trim().StartsWith("*"))
                    {
                        userAgents = agent.Trim().Substring(1);
                    }
                }
            }

            String[] disallow = Regex.Split(userAgents, "Disallow:");
            if (disallow.Length > 0)
            {
                foreach (String item in disallow)
                {
                    if (!string.IsNullOrEmpty(item.Trim()) && item.Trim() != "\n")
                    {
                        robots.DisallowedList.Add(item.Trim());
                    }
                }
            }
            return(robots);
        }