/// <summary> /// Creates a new instance of the <see cref="Controller"/> class. /// </summary> public Controller() { globals = Globals.Instance(); log = new QueueEventLogger(100); crawler = null; stats = new long[10]; proxy = CrawlWaveServerProxy.Instance(globals); }
private Globals globals; //Provides access to the global variables and application settings #endregion #region Constructor and Singleton Instance Members /// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private HostRequestFilter() { //Initialize the synchronization mechanism mutex = new Mutex(); //Initialize the storage for the HostRequestFilterEntry objects hostTable = new Dictionary <string, HostRequestFilterEntry>(128); //Get a reference to the global variables and application settings globals = Globals.Instance(); }
private Globals globals; //Provides access to the global variables and application settings #endregion #region Constructor and Singleton Instance Members /// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private HostBanFilter() { //Initialize the storage for the banned host entries hostTable = new Hashtable(); //Get a reference to the global variables and application settings globals = Globals.Instance(); //Initialize the list of banned hosts //proxy = WebServiceProxy.Instance(); proxy = CrawlWaveServerProxy.Instance(globals); InitializeBannedHosts(); }
private const string supportedContentType = "application/x-shockwave-flash"; //The Content Type supported by the parser #endregion #region Constructor and Singleton Instance members /// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private SwfParser() { //Initialize the synchronization mechanism mutex = new Mutex(); //Initialize the Encoding encoding = Encoding.UTF8; //GetEncoding("ISO-8859-7"); //Initialize the converters and parsers converter = new CSwf2HtmlConverterClass(); parser = HtmlParser.Instance(); //Get a reference to the global variables and application settings globals = Globals.Instance(); }
//The alternative content type description (PDF has 2 IANA reserved content-types) #endregion #region Constructor and Singleton Instance members /// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private PdfParser() { //Initialize the synchronization mechanism mutex = new Mutex(); //Initialize the encoding encoding = Encoding.GetEncoding("ISO-8859-7"); //Initialize the converters and parsers converter = new XpdfTextClass(); parser = TextParser.Instance(); //Get a reference to the global variables and application settings globals = Globals.Instance(); }
private Encoding encoding = System.Text.Encoding.UTF8; //Needed to parse the robots.txt files #endregion #region Constructor and Singleton Instance Members /// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private RobotsFilter() { //Initialize the synchronization mechanism mutex = new Mutex(); //Initialize the storage for the RobotsTxtEntry objects robotsTable = new Dictionary <string, RobotsTxtEntry>(1024); //Get a reference to the global variables and application settings globals = Globals.Instance(); //Initialize the various strings. Interning them saves us a little memory. userAgent = new string [] { String.Intern("User-agent: "), String.Intern("User-agent: *"), String.Intern("User-agent: CrawlWave") }; disallow = String.Intern("Disallow: "); FileName = String.Intern(globals.AppDataPath + "RobotsCache.xml"); }
//The Content Type supported by the parser #endregion #region Constructor and Singleton Instance members /// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private TextParser() { //Initialize the synchronization mechanism mutex = new Mutex(); //Initialize the Regular Expressions hrefRegex = new Regex(@"(http|https)://[\w]+(\.[\w]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled); //use "(http|ftp|https)://[\w]+(\.[\w]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?" to enable ftp urls sessionIDRegex = new Regex(@"([0-9a-fA-F]{40,64})|([\{|\(]?[0-9a-fA-F]{8}[-]?([0-9a-fA-F]{4}[-]?){3}[0-9a-fA-F]{12}[\)|\}]?)$", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled); //@"^([0-9a-f]{32})|(\{?[0-9a-f]{8}-([0-9a-f]{4}-){3}-[0-9a-f]{12}\}?)$" spacesRegex = new Regex(@"\s+", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled); //Initialize the filters robotsFilter = RobotsFilter.Instance(); domainFilter = DomainFilter.Instance(); //Get a reference to the global variables and application settings globals = Globals.Instance(); }
private Globals globals; //Provides access to the global variables and application settings #endregion #region Constructor and Singleton Instance Members /// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private DomainFilter() { //Initialize the synchronization mechanism mutex = new Mutex(); //Initialize the storage for the IP Addresses ipTable = new IPCountryTable(16); //keyLength of 16 will create 65536 root nodes //Initialize the various strings. Interning them saves us a little memory. FileNames = new string [] { String.Intern("apnic.latest"), String.Intern("arin.latest"), String.Intern("lacnic.latest"), String.Intern("ripencc.latest"), }; //initialize the regular expression ipAddressRegex = new Regex(@"^(?:(?:25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)(?(\.?\d)\.)){4}$", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled); //For IPv6 addresses the following pattern can be used: // ^(([\dA-Fa-f]{1,4}:){7}[\dA-Fa-f]{1,4})(:([\d]{1,3}.){3}[\d]{1,3})?$ // and the input length must be between 16 and 39 characters //Get a reference to the global variables and application settings globals = Globals.Instance(); //Load the IP Address tables into the storage LoadIPAddresses(); }
/// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private Client() { globals = Globals.Instance(); }