/// <summary> /// Creates a new instance of the <see cref="Controller"/> class. /// </summary> public Controller() { globals = Globals.Instance(); log = new QueueEventLogger(100); crawler = null; stats = new long[10]; proxy = CrawlWaveServerProxy.Instance(globals); }
private Mutex mutex; //Mutex supporting safe access from multiple threads #endregion Fields #region Constructors /// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private HostRequestFilter() { //Initialize the synchronization mechanism mutex=new Mutex(); //Initialize the storage for the HostRequestFilterEntry objects hostTable = new Dictionary<string, HostRequestFilterEntry>(128); //Get a reference to the global variables and application settings globals = Globals.Instance(); }
/// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private HostBanFilter() { //Initialize the storage for the banned host entries hostTable = new Hashtable(); //Get a reference to the global variables and application settings globals = Globals.Instance(); //Initialize the list of banned hosts //proxy = WebServiceProxy.Instance(); proxy = CrawlWaveServerProxy.Instance(globals); InitializeBannedHosts(); }
/// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private PdfParser() { //Initialize the synchronization mechanism mutex=new Mutex(); //Initialize the encoding encoding=Encoding.GetEncoding("ISO-8859-7"); //Initialize the converters and parsers converter = new XpdfTextClass(); parser = TextParser.Instance(); //Get a reference to the global variables and application settings globals = Globals.Instance(); }
private HtmlParser parser; //This will be used to extract links after the conversion #endregion Fields #region Constructors /// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private SwfParser() { //Initialize the synchronization mechanism mutex=new Mutex(); //Initialize the Encoding encoding = Encoding.UTF8;//GetEncoding("ISO-8859-7"); //Initialize the converters and parsers converter = new CSwf2HtmlConverterClass(); parser = HtmlParser.Instance(); //Get a reference to the global variables and application settings globals = Globals.Instance(); }
private Mutex mutex; //Mutex supporting safe access from multiple threads #endregion Fields #region Constructors /// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private DomainFilter() { //Initialize the synchronization mechanism mutex=new Mutex(); //Initialize the storage for the IP Addresses ipTable = new IPCountryTable(16); //keyLength of 16 will create 65536 root nodes //Initialize the various strings. Interning them saves us a little memory. FileNames = new string [] {String.Intern("apnic.latest"), String.Intern("arin.latest"), String.Intern("lacnic.latest"), String.Intern("ripencc.latest"),}; //initialize the regular expression ipAddressRegex = new Regex(@"^(?:(?:25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)(?(\.?\d)\.)){4}$",RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); //For IPv6 addresses the following pattern can be used: // ^(([\dA-Fa-f]{1,4}:){7}[\dA-Fa-f]{1,4})(:([\d]{1,3}.){3}[\d]{1,3})?$ // and the input length must be between 16 and 39 characters //Get a reference to the global variables and application settings globals = Globals.Instance(); //Load the IP Address tables into the storage LoadIPAddresses(); }
/// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private Client() { globals = Globals.Instance(); }
/// <summary> /// Provides a global access point for the single instance of the <see cref="Globals"/> /// class. /// </summary> /// <returns>A reference to the single instance of <see cref="Globals"/>.</returns> public static Globals Instance() { if (instance==null) { //Make sure the call is thread-safe. We cannot use the private mutex since //it hasn't yet been initialized - it gets initialized in the constructor. Mutex imutex=new Mutex(); imutex.WaitOne(); if( instance == null ) { instance = new Globals(); } imutex.Close(); } return instance; }
private Regex spacesRegex; //Regular Expression for compacting white space characters #endregion Fields #region Constructors /// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private TextParser() { //Initialize the synchronization mechanism mutex=new Mutex(); //Initialize the Regular Expressions hrefRegex=new Regex(@"(http|https)://[\w]+(\.[\w]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?", RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); //use "(http|ftp|https)://[\w]+(\.[\w]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?" to enable ftp urls sessionIDRegex = new Regex(@"([0-9a-fA-F]{40,64})|([\{|\(]?[0-9a-fA-F]{8}[-]?([0-9a-fA-F]{4}[-]?){3}[0-9a-fA-F]{12}[\)|\}]?)$", RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); //@"^([0-9a-f]{32})|(\{?[0-9a-f]{8}-([0-9a-f]{4}-){3}-[0-9a-f]{12}\}?)$" spacesRegex = new Regex(@"\s+",RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); //Initialize the filters robotsFilter = RobotsFilter.Instance(); domainFilter = DomainFilter.Instance(); //Get a reference to the global variables and application settings globals = Globals.Instance(); }
private Regex stylesRegex; //Regular Expression for stylesheets #endregion Fields #region Constructors /// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private HtmlParser() { //Initialize the synchronization mechanism mutex=new Mutex(); //Initialize the Regular Expressions ahrefRegex = new Regex("href\\s*=\\s*(?:\"(?<1>[^\"]*)\"|(?<1>\\S+))", RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); baseRegex = new Regex("base\\s*href=\\s*(?:\"(?<1>[^\"]*)\"|(?<1>\\S+))",RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); charsetRegex = new Regex("<meta\\s*http-equiv=([^>])*charset\\s*=\\s*([^>])*(iso-8859-7|windows-1253)([^>])*>",RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); frameRegex = new Regex("frame\\s*.*src\\s*=\\s*(?:\"(?<1>[^\"]*)\"|(?<1>\\S+))", RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); flashRegex = new Regex("<embed\\s*([^>])*src\\s*=([^>])*type\\s*=([^>])*application/x-shockwave-flash([^>])*>", RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); refreshRegex = new Regex("<meta\\s*http-equiv=([^>])*refresh([^>])*content\\s*=\\s*\"[^>]*\">", RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); robotRegex = new Regex("<meta\\s*name\\s*=\\s*\"robots\"\\s*content\\s*=\\s*\"[^>]*\">", RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); scriptRegex = new Regex(@"(?i)<script([^>])*>(\w|\W)*</script([^>])*>",RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); //@"(?i)<script([^>])*>(\w|\W)*</script([^>])*>" or @"<script[^>]*>(\w|\W)*?</script[^>]*>" spacesRegex = new Regex(@"\s+",RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); stylesRegex = new Regex(@"<style([^>])*>(\w|\W)*</style([^>])*>",RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); stripTagRegex = new Regex("<[^>]*>", RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled);//<[^>]+> or >(?:(?<t>[^<]*)) sessionIDRegex = new Regex(@"([0-9a-fA-F]{40,64})|([\{|\(]?[0-9a-fA-F]{8}[-]?([0-9a-fA-F]{4}[-]?){3}[0-9a-fA-F]{12}[\)|\}]?)$", RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); //@"^([0-9a-f]{32})|(\{?[0-9a-f]{8}-([0-9a-f]{4}-){3}-[0-9a-f]{12}\}?)$" inlinedSessionIDRegex = new Regex(@"/(%28|\{)?(([0-9a-fA-F]{8}[-]?(([0-9a-fA-F]{4}[-]?){3}[0-9a-fA-F]{12}))|([0-9a-fA-F]{12,64}))(%29|\})?/", RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); //Initialize the filters robotsFilter = RobotsFilter.Instance(); domainFilter = DomainFilter.Instance(); //Initialize the culture info to Greek (ISO) culture = CultureInfo.CreateSpecificCulture("el-GR"); //Get a reference to the global variables and application settings globals = Globals.Instance(); }