예제 #1
0
        private CrawlWaveServerProxy(ICrawlWaveServerSettingsProvider provider)
        {
            if (provider == null)
            {
                throw new ArgumentNullException();
            }

            IChannel channel;

            switch (provider.ChannelType)
            {
            case "http":
                channel = (IChannel)(new HttpChannel());                        //(provider.Port));
                break;

            case "ipc":
                channel = (IChannel)(new IpcChannel(string.Format("{0}:{1}", provider.Hostname, provider.Port)));
                break;

            case "tcp":
                channel = (IChannel)(new TcpChannel());                        //(provider.Port));
                break;

            default:
                throw new ArgumentException();
                break;
            }

            ChannelServices.RegisterChannel(channel, true);

            proxy = (ICrawlWaveServer)Activator.GetObject(
                typeof(ICrawlWaveServer),
                string.Format("{0}://{1}:{2}/CrawlWaveServer.rem", provider.ChannelType, provider.Hostname, provider.Port));
        }
예제 #2
0
        private CrawlWaveServerProxy(ICrawlWaveServerSettingsProvider provider)
        {
            if (provider == null)
                throw new ArgumentNullException();

            IChannel channel;

            switch (provider.ChannelType)
            {
                case "http":
                    channel = (IChannel)(new HttpChannel());//(provider.Port));
                    break;

                case "ipc":
                    channel = (IChannel)(new IpcChannel(string.Format("{0}:{1}", provider.Hostname, provider.Port)));
                    break;

                case "tcp":
                    channel = (IChannel)(new TcpChannel());//(provider.Port));
                    break;

                default:
                    throw new ArgumentException();
                    break;
            }

            ChannelServices.RegisterChannel(channel, true);

            proxy = (ICrawlWaveServer)Activator.GetObject(
                typeof(ICrawlWaveServer),
                string.Format("{0}://{1}:{2}/CrawlWaveServer.rem", provider.ChannelType, provider.Hostname, provider.Port));
        }
예제 #3
0
 /// <summary>
 /// Creates a new instance of the <see cref="Controller"/> class.
 /// </summary>
 public Controller()
 {
     globals = Globals.Instance();
     log = new QueueEventLogger(100);
     crawler = null;
     stats = new long[10];
     proxy = CrawlWaveServerProxy.Instance(globals);
 }
예제 #4
0
 /// <summary>
 /// Creates a new instance of the <see cref="Controller"/> class.
 /// </summary>
 public Controller()
 {
     globals = Globals.Instance();
     log     = new QueueEventLogger(100);
     crawler = null;
     stats   = new long[10];
     proxy   = CrawlWaveServerProxy.Instance(globals);
 }
예제 #5
0
        /// <summary>
        /// Performs the update of the Client's version. It queries an available server for
        /// the latest version and if a new version exists it goes on with the update.
        /// </summary>
        private void UpdateClient()
        {
            try
            {
                while (!InternetUtils.ConnectedToInternet())
                {
                    Thread.Sleep(updateBackoff.Next());
                }
                //proxy = WebServiceProxy.Instance();
                proxy = CrawlWaveServerProxy.Instance(globals);


                string latest          = String.Empty;
                SerializedException sx = proxy.SendLatestVersion(globals.Client_Info, out latest);
                if (sx != null)
                {
                    globals.SystemLog.LogError("CrawlWave Client Scheduler failed to retrieve the latest version of the Client:" + sx.Message);
                    return;
                }

                Version latestVersion = new Version(latest);
                if (GetClientVersion() < latestVersion)
                {
                    //we must update the client. First of all download the update.
                    updating = true;
                    byte [] buffer = new byte[0];
                    sx = proxy.SendUpdatedVersion(globals.Client_Info, latest, out buffer);
                    if (sx != null || buffer.Length == 0)
                    {
                        globals.SystemLog.LogError("CrawlWave Client Scheduler failed to retrieve the latest version of the Client: " + sx.Message);
                        updating = false;
                        return;
                    }
                    //save the compressed file to disk. If necessary launch the installer.
                    string     updateFileName = globals.AppPath + latest + ".zip";
                    FileStream outputStream   = new FileStream(updateFileName, FileMode.Create);
                    outputStream.Write(buffer, 0, buffer.Length);
                    outputStream.Close();

                    string mustLaunchInstaller = ExtractUpdatedFiles(updateFileName);
                    if (mustLaunchInstaller != String.Empty)
                    {
                        //Launch Installer and exit
                        Process.Start(mustLaunchInstaller);
                    }
                }
            }
            catch
            {}
            finally
            {
                updating = false;
            }
        }
예제 #6
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private HostBanFilter()
 {
     //Initialize the storage for the banned host entries
     hostTable = new Hashtable();
     //Get a reference to the global variables and application settings
     globals = Globals.Instance();
     //Initialize the list of banned hosts
     //proxy = WebServiceProxy.Instance();
     proxy = CrawlWaveServerProxy.Instance(globals);
     InitializeBannedHosts();
 }
예제 #7
0
        private Globals globals;         //Provides access to the global variables and application settings

        #endregion

        #region Constructor and Singleton Instance Members

        /// <summary>
        /// The constructor is private so that only the class itself can create an instance.
        /// </summary>
        private HostBanFilter()
        {
            //Initialize the storage for the banned host entries
            hostTable = new Hashtable();
            //Get a reference to the global variables and application settings
            globals = Globals.Instance();
            //Initialize the list of banned hosts
            //proxy = WebServiceProxy.Instance();
            proxy = CrawlWaveServerProxy.Instance(globals);
            InitializeBannedHosts();
        }
예제 #8
0
 /// <summary>
 /// Clears the banned hosts list and initializes it with the latest version.
 /// </summary>
 private void InitializeBannedHosts()
 {
     try
     {
         ICrawlWaveServer server = proxy;
         try
         {
             server.IsAlive();
         }
         catch
         {
             if (globals.Settings.LogLevel <= CWLogLevel.LogWarning)
             {
                 globals.FileLog.LogWarning("HostBanFilter failed to connect to the server.");
             }
             return;
         }
         DataSet             ds = new DataSet();
         SerializedException sx = server.SendBannedHosts(globals.Client_Info, out ds);
         if (sx != null)
         {
             if (globals.Settings.LogLevel <= CWLogLevel.LogWarning)
             {
                 globals.FileLog.LogWarning("HostBanFilter failed to download a list of banned hosts.");
             }
             return;
         }
         else
         {
             if (ds.Tables[0].Rows.Count > 0)
             {
                 lock (hostTable.SyncRoot)
                 {
                     hostTable.Clear();
                     foreach (DataRow dr in ds.Tables[0].Rows)
                     {
                         Guid g = (Guid)(dr[0]);
                         try
                         {
                             hostTable.Add(g.ToByteArray(), null);
                         }
                         catch
                         {
                             continue;
                         }
                     }
                 }
             }
         }
         ds.Dispose();
     }
     catch
     {}
 }
예제 #9
0
 /// <summary>
 /// Constructs a new istance of the <see cref="Crawler"/> class and initializes its
 /// properties with the default values. The constructor is private so that only the
 /// class itself can create an instance.
 /// </summary>
 private Crawler()
 {
     //first of all get a reference to the global variables because they are needed
     //in order to initialize some variables.
     globals = Globals.Instance();
     mustStop = false;
     stopping = false;
     state = CrawlerState.Stopped;
     stats = new long[10] {0,0,0,0,0,0,0,0,0,0};
     numThreads = (int)globals.Settings.ConnectionSpeed;
     runningThreads = 0;
     //sendResultsThread = null;
     synchronizeThread = null;
     crawlingThreads = null;
     syncBackOff = new Backoff(BackoffSpeed.Declining, 30000);
     downloadBackOff = new Backoff(BackoffSpeed.Fast);
     urlsToCrawl = new Queue();
     resultFileNames = new Queue();
     crawledUrls = new ArrayList();
     queueSize = 0;
     dataFileName = String.Empty;
     defaultEncoding = Encoding.GetEncoding("ISO-8859-7");
     defaultGreekEncoding = Encoding.GetEncoding(1253);
     contentRegex = new Regex("<meta\\s*http-equiv=([^>])*charset\\s*=\\s*([^>])*(utf-7|utf-8|utf-16|windows-1253)([^>])*>",RegexOptions.CultureInvariant|RegexOptions.Multiline|RegexOptions.IgnoreCase|RegexOptions.Compiled);
     htmlParser = HtmlParser.Instance();
     textParser = TextParser.Instance();
     pdfParser = PdfParser.Instance();
     swfParser = SwfParser.Instance();
     nullParser = NullParser.Instance();
     robotsFilter = RobotsFilter.Instance();
     robotsFilter.LoadEntries();
     domainFilter = DomainFilter.Instance();
     hostRequestFilter = HostRequestFilter.Instance();
     hostBanFilter = HostBanFilter.Instance();
     //proxy = WebServiceProxy.Instance();
     proxy = CrawlWaveServerProxy.Instance(globals);
 }
예제 #10
0
        /// <summary>
        /// Performs the update of the Client's version. It queries an available server for
        /// the latest version and if a new version exists it goes on with the update.
        /// </summary>
        private void UpdateClient()
        {
            try
            {
                while(!InternetUtils.ConnectedToInternet())
                {
                    Thread.Sleep(updateBackoff.Next());
                }
                //proxy = WebServiceProxy.Instance();
                proxy = CrawlWaveServerProxy.Instance(globals);

                string latest = String.Empty;
                SerializedException sx = proxy.SendLatestVersion(globals.Client_Info, out latest);
                if(sx!=null)
                {
                    globals.SystemLog.LogError("CrawlWave Client Scheduler failed to retrieve the latest version of the Client:" + sx.Message);
                    return;
                }

                Version latestVersion = new Version(latest);
                if(GetClientVersion()<latestVersion)
                {
                    //we must update the client. First of all download the update.
                    updating = true;
                    byte [] buffer = new byte[0];
                    sx = proxy.SendUpdatedVersion(globals.Client_Info, latest, out buffer);
                    if(sx!=null || buffer.Length==0)
                    {
                        globals.SystemLog.LogError("CrawlWave Client Scheduler failed to retrieve the latest version of the Client: " + sx.Message);
                        updating = false;
                        return;
                    }
                    //save the compressed file to disk. If necessary launch the installer.
                    string updateFileName = globals.AppPath + latest + ".zip";
                    FileStream outputStream = new FileStream(updateFileName, FileMode.Create);
                    outputStream.Write(buffer, 0, buffer.Length);
                    outputStream.Close();

                    string mustLaunchInstaller = ExtractUpdatedFiles(updateFileName);
                    if(mustLaunchInstaller!=String.Empty)
                    {
                        //Launch Installer and exit
                        Process.Start(mustLaunchInstaller);
                    }
                }
            }
            catch
            {}
            finally
            {
                updating = false;
            }
        }