Beispiel #1
0
 /// <summary>
 /// Creates a new instance of the <see cref="Controller"/> class.
 /// </summary>
 public Controller()
 {
     globals = Globals.Instance();
     log     = new QueueEventLogger(100);
     crawler = null;
     stats   = new long[10];
     proxy   = CrawlWaveServerProxy.Instance(globals);
 }
Beispiel #2
0
        private Globals globals;                                       //Provides access to the global variables and application settings

        #endregion

        #region Constructor and Singleton Instance Members

        /// <summary>
        /// The constructor is private so that only the class itself can create an instance.
        /// </summary>
        private HostRequestFilter()
        {
            //Initialize the synchronization mechanism
            mutex = new Mutex();
            //Initialize the storage for the HostRequestFilterEntry objects
            hostTable = new Dictionary <string, HostRequestFilterEntry>(128);
            //Get a reference to the global variables and application settings
            globals = Globals.Instance();
        }
Beispiel #3
0
        private Globals globals;         //Provides access to the global variables and application settings

        #endregion

        #region Constructor and Singleton Instance Members

        /// <summary>
        /// The constructor is private so that only the class itself can create an instance.
        /// </summary>
        private HostBanFilter()
        {
            //Initialize the storage for the banned host entries
            hostTable = new Hashtable();
            //Get a reference to the global variables and application settings
            globals = Globals.Instance();
            //Initialize the list of banned hosts
            //proxy = WebServiceProxy.Instance();
            proxy = CrawlWaveServerProxy.Instance(globals);
            InitializeBannedHosts();
        }
Beispiel #4
0
        private const string supportedContentType = "application/x-shockwave-flash"; //The Content Type supported by the parser

        #endregion

        #region Constructor and Singleton Instance members

        /// <summary>
        /// The constructor is private so that only the class itself can create an instance.
        /// </summary>
        private SwfParser()
        {
            //Initialize the synchronization mechanism
            mutex = new Mutex();
            //Initialize the Encoding
            encoding = Encoding.UTF8;            //GetEncoding("ISO-8859-7");
            //Initialize the converters and parsers
            converter = new CSwf2HtmlConverterClass();
            parser    = HtmlParser.Instance();
            //Get a reference to the global variables and application settings
            globals = Globals.Instance();
        }
Beispiel #5
0
        //The alternative content type description (PDF has 2 IANA reserved content-types)

        #endregion

        #region Constructor and Singleton Instance members

        /// <summary>
        /// The constructor is private so that only the class itself can create an instance.
        /// </summary>
        private PdfParser()
        {
            //Initialize the synchronization mechanism
            mutex = new Mutex();
            //Initialize the encoding
            encoding = Encoding.GetEncoding("ISO-8859-7");
            //Initialize the converters and parsers
            converter = new XpdfTextClass();
            parser    = TextParser.Instance();
            //Get a reference to the global variables and application settings
            globals = Globals.Instance();
        }
Beispiel #6
0
        private Encoding encoding = System.Text.Encoding.UTF8;   //Needed to parse the robots.txt files

        #endregion

        #region Constructor and Singleton Instance Members

        /// <summary>
        /// The constructor is private so that only the class itself can create an instance.
        /// </summary>
        private RobotsFilter()
        {
            //Initialize the synchronization mechanism
            mutex = new Mutex();
            //Initialize the storage for the RobotsTxtEntry objects
            robotsTable = new Dictionary <string, RobotsTxtEntry>(1024);
            //Get a reference to the global variables and application settings
            globals = Globals.Instance();
            //Initialize the various strings. Interning them saves us a little memory.
            userAgent = new string [] { String.Intern("User-agent: "), String.Intern("User-agent: *"), String.Intern("User-agent: CrawlWave") };
            disallow  = String.Intern("Disallow: ");
            FileName  = String.Intern(globals.AppDataPath + "RobotsCache.xml");
        }
Beispiel #7
0
        //The Content Type supported by the parser

        #endregion

        #region Constructor and Singleton Instance members

        /// <summary>
        /// The constructor is private so that only the class itself can create an instance.
        /// </summary>
        private TextParser()
        {
            //Initialize the synchronization mechanism
            mutex = new Mutex();
            //Initialize the Regular Expressions
            hrefRegex = new Regex(@"(http|https)://[\w]+(\.[\w]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
            //use "(http|ftp|https)://[\w]+(\.[\w]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?" to enable ftp urls
            sessionIDRegex = new Regex(@"([0-9a-fA-F]{40,64})|([\{|\(]?[0-9a-fA-F]{8}[-]?([0-9a-fA-F]{4}[-]?){3}[0-9a-fA-F]{12}[\)|\}]?)$", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled);       //@"^([0-9a-f]{32})|(\{?[0-9a-f]{8}-([0-9a-f]{4}-){3}-[0-9a-f]{12}\}?)$"
            spacesRegex    = new Regex(@"\s+", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
            //Initialize the filters
            robotsFilter = RobotsFilter.Instance();
            domainFilter = DomainFilter.Instance();
            //Get a reference to the global variables and application settings
            globals = Globals.Instance();
        }
Beispiel #8
0
        private Globals globals;              //Provides access to the global variables and application settings

        #endregion

        #region Constructor and Singleton Instance Members

        /// <summary>
        /// The constructor is private so that only the class itself can create an instance.
        /// </summary>
        private DomainFilter()
        {
            //Initialize the synchronization mechanism
            mutex = new Mutex();
            //Initialize the storage for the IP Addresses
            ipTable = new IPCountryTable(16);             //keyLength of 16 will create 65536 root nodes
            //Initialize the various strings. Interning them saves us a little memory.
            FileNames = new string [] { String.Intern("apnic.latest"), String.Intern("arin.latest"), String.Intern("lacnic.latest"), String.Intern("ripencc.latest"), };
            //initialize the regular expression
            ipAddressRegex = new Regex(@"^(?:(?:25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)(?(\.?\d)\.)){4}$", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
            //For IPv6 addresses the following pattern can be used:
            // ^(([\dA-Fa-f]{1,4}:){7}[\dA-Fa-f]{1,4})(:([\d]{1,3}.){3}[\d]{1,3})?$
            // and the input length must be between 16 and 39 characters
            //Get a reference to the global variables and application settings
            globals = Globals.Instance();
            //Load the IP Address tables into the storage
            LoadIPAddresses();
        }
Beispiel #9
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private Client()
 {
     globals = Globals.Instance();
 }