Beispiel #1
0
 /// <summary>
 /// Provides a global access point for the single instance of the <see cref="HostBanFilter"/>
 /// class.
 /// </summary>
 /// <returns>A reference to the single instance of <see cref="HostBanFilter"/>.</returns>
 public static HostBanFilter Instance()
 {
     if (instance==null)
     {
         //Make sure the call is thread-safe. We cannot use the private mutex since
         //it hasn't yet been initialized - it gets initialized in the constructor.
         Mutex imutex=new Mutex();
         imutex.WaitOne();
         if( instance == null )
         {
             instance = new HostBanFilter();
         }
         imutex.Close();
     }
     return instance;
 }
Beispiel #2
0
 /// <summary>
 /// Provides a global access point for the single instance of the <see cref="HostBanFilter"/>
 /// class.
 /// </summary>
 /// <returns>A reference to the single instance of <see cref="HostBanFilter"/>.</returns>
 public static HostBanFilter Instance()
 {
     if (instance == null)
     {
         //Make sure the call is thread-safe. We cannot use the private mutex since
         //it hasn't yet been initialized - it gets initialized in the constructor.
         Mutex imutex = new Mutex();
         imutex.WaitOne();
         if (instance == null)
         {
             instance = new HostBanFilter();
         }
         imutex.Close();
     }
     return(instance);
 }
Beispiel #3
0
 /// <summary>
 /// Constructs a new istance of the <see cref="Crawler"/> class and initializes its
 /// properties with the default values. There should be only one instance of Crawler
 /// </summary>
 public Crawler()
 {
     //first of all get a reference to the global variables because they are needed
     //in order to initialize some variables.
     globals = Globals.Instance();
     mustStop = false;
     stopping = false;
     state = CrawlerState.Stopped;
     stats = new long[10] {0,0,0,0,0,0,0,0,0,0};
     numThreads = (int)globals.Settings.ConnectionSpeed;
     runningThreads = 0;
     sendResultsThread = null;
     synchronizeThread = null;
     crawlingThreads = null;
     syncBackOff = new ExponentialBackoff(BackoffSpeed.Declining);
     downloadBackOff = new ExponentialBackoff(BackoffSpeed.Fast);
     urlsToCrawl = new Queue();
     resultFileNames = new Queue();
     crawledUrls = new ArrayList();
     queueSize = 0;
     dataFileName = String.Empty;
     defaultEncoding = Encoding.GetEncoding("ISO-8859-7");
     htmlParser = HtmlParser.Instance();
     textParser = TextParser.Instance();
     pdfParser = PdfParser.Instance();
     swfParser = SwfParser.Instance();
     robotsFilter = RobotsFilter.Instance();
     domainFilter = DomainFilter.Instance();
     hostRequestFilter =HostRequestFilter.Instance();
     hostBanFilter = HostBanFilter.Instance();
     proxy = WebServiceProxy.Instance();
 }
Beispiel #4
0
 /// <summary>
 /// Constructs a new istance of the <see cref="Crawler"/> class and initializes its
 /// properties with the default values. The constructor is private so that only the
 /// class itself can create an instance.
 /// </summary>
 private Crawler()
 {
     //first of all get a reference to the global variables because they are needed
     //in order to initialize some variables.
     globals = Globals.Instance();
     mustStop = false;
     stopping = false;
     state = CrawlerState.Stopped;
     stats = new long[10] {0,0,0,0,0,0,0,0,0,0};
     numThreads = (int)globals.Settings.ConnectionSpeed;
     runningThreads = 0;
     //sendResultsThread = null;
     synchronizeThread = null;
     crawlingThreads = null;
     syncBackOff = new Backoff(BackoffSpeed.Declining, 30000);
     downloadBackOff = new Backoff(BackoffSpeed.Fast);
     urlsToCrawl = new Queue();
     resultFileNames = new Queue();
     crawledUrls = new ArrayList();
     queueSize = 0;
     dataFileName = String.Empty;
     defaultEncoding = Encoding.GetEncoding("ISO-8859-7");
     defaultGreekEncoding = Encoding.GetEncoding(1253);
     contentRegex = new Regex("<meta\\s*http-equiv=([^>])*charset\\s*=\\s*([^>])*(utf-7|utf-8|utf-16|windows-1253)([^>])*>",RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled);
     htmlParser = HtmlParser.Instance();
     textParser = TextParser.Instance();
     pdfParser = PdfParser.Instance();
     swfParser = SwfParser.Instance();
     nullParser = NullParser.Instance();
     robotsFilter = RobotsFilter.Instance();
     robotsFilter.LoadEntries();
     domainFilter = DomainFilter.Instance();
     hostRequestFilter = HostRequestFilter.Instance();
     hostBanFilter = HostBanFilter.Instance();
     //proxy = WebServiceProxy.Instance();
     proxy = CrawlWaveServerProxy.Instance(globals);
 }