/// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private PdfParser() { //Initialize the synchronization mechanism mutex=new Mutex(); //Initialize the encoding encoding=Encoding.GetEncoding("ISO-8859-7"); //Initialize the converters and parsers converter = new XpdfTextClass(); parser = TextParser.Instance(); //Get a reference to the global variables and application settings globals = Globals.Instance(); }
/// <summary> /// Constructs a new istance of the <see cref="Crawler"/> class and initializes its /// properties with the default values. There should be only one instance of Crawler /// </summary> public Crawler() { //first of all get a reference to the global variables because they are needed //in order to initialize some variables. globals = Globals.Instance(); mustStop = false; stopping = false; state = CrawlerState.Stopped; stats = new long[10] {0,0,0,0,0,0,0,0,0,0}; numThreads = (int)globals.Settings.ConnectionSpeed; runningThreads = 0; sendResultsThread = null; synchronizeThread = null; crawlingThreads = null; syncBackOff = new ExponentialBackoff(BackoffSpeed.Declining); downloadBackOff = new ExponentialBackoff(BackoffSpeed.Fast); urlsToCrawl = new Queue(); resultFileNames = new Queue(); crawledUrls = new ArrayList(); queueSize = 0; dataFileName = String.Empty; defaultEncoding = Encoding.GetEncoding("ISO-8859-7"); htmlParser = HtmlParser.Instance(); textParser = TextParser.Instance(); pdfParser = PdfParser.Instance(); swfParser = SwfParser.Instance(); robotsFilter = RobotsFilter.Instance(); domainFilter = DomainFilter.Instance(); hostRequestFilter =HostRequestFilter.Instance(); hostBanFilter = HostBanFilter.Instance(); proxy = WebServiceProxy.Instance(); }
/// <summary> /// Constructs a new istance of the <see cref="Crawler"/> class and initializes its /// properties with the default values. The constructor is private so that only the /// class itself can create an instance. /// </summary> private Crawler() { //first of all get a reference to the global variables because they are needed //in order to initialize some variables. globals = Globals.Instance(); mustStop = false; stopping = false; state = CrawlerState.Stopped; stats = new long[10] {0,0,0,0,0,0,0,0,0,0}; numThreads = (int)globals.Settings.ConnectionSpeed; runningThreads = 0; //sendResultsThread = null; synchronizeThread = null; crawlingThreads = null; syncBackOff = new Backoff(BackoffSpeed.Declining, 30000); downloadBackOff = new Backoff(BackoffSpeed.Fast); urlsToCrawl = new Queue(); resultFileNames = new Queue(); crawledUrls = new ArrayList(); queueSize = 0; dataFileName = String.Empty; defaultEncoding = Encoding.GetEncoding("ISO-8859-7"); defaultGreekEncoding = Encoding.GetEncoding(1253); contentRegex = new Regex("<meta\\s*http-equiv=([^>])*charset\\s*=\\s*([^>])*(utf-7|utf-8|utf-16|windows-1253)([^>])*>",RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled); htmlParser = HtmlParser.Instance(); textParser = TextParser.Instance(); pdfParser = PdfParser.Instance(); swfParser = SwfParser.Instance(); nullParser = NullParser.Instance(); robotsFilter = RobotsFilter.Instance(); robotsFilter.LoadEntries(); domainFilter = DomainFilter.Instance(); hostRequestFilter = HostRequestFilter.Instance(); hostBanFilter = HostBanFilter.Instance(); //proxy = WebServiceProxy.Instance(); proxy = CrawlWaveServerProxy.Instance(globals); }
/// <summary> /// Provides a global access point for the single instance of the <see cref="TextParser"/> /// class. /// </summary> /// <returns>A reference to the single instance of <see cref="TextParser"/></returns> public static TextParser Instance() { if (instance==null) { //Make sure the call is thread-safe. We cannot use the private mutex since //it hasn't yet been initialized - it gets initialized in the constructor. Mutex imutex=new Mutex(); imutex.WaitOne(); if( instance == null ) { instance = new TextParser(); } imutex.Close(); } return instance; }