/// <summary> /// Attempts to stop the Crawler. /// </summary> /// <returns>Null if the operation succeeds, or a <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> /// <exception cref="InvalidOperationException">Thrown if the Crawler has not yet been initialized.</exception> public SerializedException Stop() { SerializedException sx = null; try { if(Crawler.InstanceExists()) { if(crawler == null) { crawler = Crawler.Instance(); AttachObservers(); } crawler.StopImmediately(); } else { throw new InvalidOperationException("The Crawler has not been initialized and cannot be stopped."); } } catch(Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.StackTrace); } return sx; }
/// <summary> /// Attempts to terminate the application. /// </summary> /// <returns>Null if the operation succeeds, or a <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public void Terminate(ref SerializedException sx) { try { //System.Windows.Forms.Application.Exit(); Client.Instance().MustTerminate = true; } catch(Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.StackTrace); } }
/// <summary> /// Attempts to perform the registration of a new user. /// </summary> /// <param name="UserName">The user's username.</param> /// <param name="Password">The user's password.</param> /// <param name="Email">The user's email address.</param> /// <returns>Null if the operation succeeds, or a <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException RegisterUser(string UserName, string Password, string Email) { SerializedException sx = null; try { //WebServiceProxy proxy = WebServiceProxy.Instance(); int ID = 0; byte [] password = MD5Hash.md5(Password); sx = proxy.RegisterUser(ref ID, UserName, password, Email); if(sx!=null) { if(sx.Type == "CrawlWave.Common.CWUserExistsException") { log.LogWarning("User already exists, attempting to register client."); } } globals.Settings.UserID = ID; globals.Settings.UserName = UserName; globals.Settings.Password = password; globals.Settings.Email = Email; CWComputerInfo info = ComputerInfo.GetComputerInfo(); globals.Settings.HardwareInfo = ComputerInfo.GetSHA1HashCode(info); globals.Settings.SaveSettings(); //proxy.ForceInitializeProxies(); ClientInfo ci = new ClientInfo(); ci.UserID = globals.Settings.UserID; sx = proxy.RegisterClient(ref ci, info); globals.Settings.ClientID = ci.ClientID; globals.Settings.SaveSettings(); } catch(Exception ex) { sx = new SerializedException(ex.GetType().ToString(), ex.Message, ex.StackTrace); } return sx; }
/// <summary> /// Attempts to start the Crawler and enable the logging of events. /// </summary> /// <returns>Null if the operation succeeds, or a <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException Start() { SerializedException sx = null; try { if(crawler == null) { crawler = Crawler.Instance(); AttachObservers(); } crawler.Start(); } catch(Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.StackTrace); } return sx; }
/// <summary> /// Selects and returns a list of all the banned hosts. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client requesting the data.</param> /// <param name="data">A <see cref="DataSet"/> that will contain the list of banned hosts.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException SelectBannedHosts(ClientInfo ci, ref DataSet data) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } SqlCommand cmd = new SqlCommand("cw_select_banned_hosts", dbcon); cmd.CommandType = CommandType.StoredProcedure; SqlDataAdapter da = new SqlDataAdapter(cmd); data = new DataSet(); da.Fill(data); da.Dispose(); cmd.Dispose(); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("SelectBannedHosts failed: " + e.ToString()); } } finally { UpdateClientLastActive(ci); LogClientAction(ci, CWClientActions.LogSendBannedHosts); } return sx; }
/// <summary> /// Attempts to retrieve the user's statistics from the server. /// </summary> /// <param name="stats">The statistics of the user.</param> /// <returns>Null if the operation succeeds, or a <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException GetUserStatistics(ref UserStatistics stats) { SerializedException sx = null; try { //WebServiceProxy proxy = WebServiceProxy.Instance(); UserStatistics userstats = null; sx = proxy.SendUserStatistics(globals.Client_Info, out userstats); if(sx!=null) { log.LogError("An error occured while retrieving the statistics:" + sx.Message); globals.FileLog.LogWarning("CrawlWave.Client: Failed to retrieve user's statistics: " + sx.Message); stats = userstats; } } catch(Exception e) { log.LogWarning("An error occured while retrieving the statistics: " + e.Message); globals.FileLog.LogWarning("CrawlWave.Client: Failed to retrieve user's statistics: " + e.ToString()); sx = new SerializedException(e.GetType().ToString(), e.Message, e.StackTrace); } finally { GC.Collect(); } return sx; }
/// <summary> /// Updates the computer hardware info related to a client. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client.</param> /// <param name="info">The <see cref="CWComputerInfo"/> of the client computer.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException StoreNewClientComputerInfo(ClientInfo ci, CWComputerInfo info) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } SqlCommand cmd = new SqlCommand("cw_update_client", dbcon); cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add("@client_id", SqlDbType.UniqueIdentifier); cmd.Parameters.Add("@user_id", SqlDbType.Int); cmd.Parameters.Add("@info_cpu", SqlDbType.NVarChar, 50); cmd.Parameters.Add("@info_ram", SqlDbType.SmallInt); cmd.Parameters.Add("@info_hdd", SqlDbType.Int); cmd.Parameters.Add("@info_net", SqlDbType.TinyInt); cmd.Parameters[0].Value = ci.ClientID; cmd.Parameters[1].Value = ci.UserID; cmd.Parameters[2].Value = info.CPUType; cmd.Parameters[3].Value = info.RAMSize; cmd.Parameters[4].Value = info.HDDSpace; cmd.Parameters[5].Value = (byte)info.ConnectionSpeed; cmd.ExecuteNonQuery(); cmd.Dispose(); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("StoreNewClientComputerInfo failed: " + e.ToString()); } } finally { LogClientAction(ci, CWClientActions.LogGetClientComputerInfo); } return sx; }
/// <summary> /// Performs the registration of a new user by storing his info in the database. /// </summary> /// <param name="ID">The ID that will be assigned to the new user, passed by reference.</param> /// <param name="username">The username requested from the new user.</param> /// <param name="password">The hash of the new user's password.</param> /// <param name="email">The user's email address.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException StoreUserRegistrationInfo(ref int ID, string username, byte[] password, string email) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } SqlCommand cmd = new SqlCommand("cw_insert_user", dbcon); cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add("@username", SqlDbType.NVarChar, 20); cmd.Parameters.Add("@password", SqlDbType.UniqueIdentifier); cmd.Parameters.Add("@email", SqlDbType.NVarChar, 50); cmd.Parameters.Add("@user_id", SqlDbType.Int); cmd.Parameters[3].Direction = ParameterDirection.ReturnValue; cmd.Parameters[0].Value = username; cmd.Parameters[1].Value = new Guid(password); cmd.Parameters[2].Value = email; try { cmd.ExecuteNonQuery(); ID = (int)cmd.Parameters[3].Value; ClientInfo ci; ci.UserID = ID; ci.ClientID = Guid.Empty; ci.Version = "0.0.0.0"; LogClientAction(ci, CWClientActions.LogRegisterUser); } catch (Exception se) { //the user already exists, throw an appropriate exception throw new CWUserExistsException("User registration failed: " + se.Message); } finally { cmd.Dispose(); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("StoreUserRegistrationInfo failed: " + e.ToString()); } } return sx; }
/// <summary> /// Stores the results that the clients return after crawling a set of Urls. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client returning the data.</param> /// <param name="data">An array of <see cref="UrlCrawlData"/> objects containing the data of the crawled urls.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException StoreCrawlResults(ClientInfo ci, UrlCrawlData[] data) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } try { //store the new robots.txt files in the database, nothing else needs to //be done since the urls will be marked as not assigned when their data //is processed by DBUpdater if ((data != null) && (data.Length > 0)) { SqlCommand cmd = new SqlCommand("cw_update_or_insert_robot", dbcon); cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add("@host_id", SqlDbType.UniqueIdentifier); cmd.Parameters.Add("@disallowed", SqlDbType.NVarChar, 1000); foreach (UrlCrawlData urlData in data) { if ((urlData.FlagFetchRobots) || (urlData.Redirected)) { string url = urlData.Url; cmd.Parameters[0].Value = new Guid(MD5Hash.md5(InternetUtils.HostName(url))); cmd.Parameters[1].Value = urlData.RobotsDisallowedPaths; try { cmd.ExecuteNonQuery(); } catch { continue; } } } cmd.Dispose(); SqlCommand statscmd = new SqlCommand("cw_update_client_statistics", dbcon); statscmd.CommandType = CommandType.StoredProcedure; statscmd.Parameters.Add("@client_id", SqlDbType.UniqueIdentifier); statscmd.Parameters.Add("@assigned", SqlDbType.BigInt); statscmd.Parameters.Add("@returned", SqlDbType.BigInt); statscmd.Parameters.Add("@type", SqlDbType.TinyInt); statscmd.Parameters[0].Value = ci.ClientID; statscmd.Parameters[1].Value = DBNull.Value; statscmd.Parameters[2].Value = data.Length; statscmd.Parameters[3].Value = 1; statscmd.ExecuteNonQuery(); statscmd.Dispose(); } } catch (Exception ex) { if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("StoreCrawlResults failed: " + ex.ToString()); } throw ex; } finally { //save xml file on disk try { SaveXMLFile(ci, data); } catch (Exception se) { sx = new SerializedException(se.GetType().ToString(), se.Message, se.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("StoreCrawlResults failed to save XML data on disk: " + se.ToString()); } } } if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); } finally { UpdateClientLastActive(ci); LogClientAction(ci, CWClientActions.LogGetCrawlResults); } return sx; }
/// <summary> /// Selects and returns the statistics for a certain user. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client requesting the statistics.</param> /// <param name="stats">The <see cref="UserStatistics"/> of the user.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException SelectUserStatistics(ClientInfo ci, ref UserStatistics stats) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } SqlCommand cmd = new SqlCommand("cw_select_user_statistic", dbcon); cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add("@user_id", SqlDbType.Int); cmd.Parameters[0].Value = ci.UserID; SqlDataAdapter da = new SqlDataAdapter(cmd); DataSet ds = new DataSet(); da.Fill(ds); da.Dispose(); cmd.Dispose(); if (ds.Tables[0].Rows.Count > 0) { stats.RegistrationDate = (DateTime)ds.Tables[0].Rows[0][2]; stats.LastActive = (DateTime)ds.Tables[0].Rows[0][6]; foreach (DataRow dr in ds.Tables[0].Rows) { stats.NumClients++; stats.UrlsAssigned += (long)dr[4]; stats.UrlsReturned += (long)dr[5]; DateTime la = (DateTime)dr[6]; if (la > stats.LastActive) { stats.LastActive = la; } } } ds.Dispose(); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("SelectUserStatistics failed for user " + ci.UserID.ToString() + ":" + e.ToString()); } } finally { UpdateClientLastActive(ci); LogClientAction(ci, CWClientActions.LogSendUserStatistics); } return sx; }
/// <summary> /// Selects and returns a set of urls that are ready to be crawled. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client requesting urls to crawl.</param> /// <param name="data">An array of <see cref="InternetUrlToCrawl"/> objects containing the selected urls.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException SelectUrlsToCrawl(ClientInfo ci, ref InternetUrlToCrawl[] data) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } //we must use a transaction to make sure that if something goes wrong the //changes to the database will be rolled back. SqlTransaction transaction = dbcon.BeginTransaction(IsolationLevel.Serializable);//perhaps | repeatableread try { //first select the urls to crawl SqlCommand cmd = new SqlCommand("cw_select_urls_to_crawl", dbcon, transaction); cmd.CommandType = CommandType.StoredProcedure; cmd.CommandTimeout = 120; SqlDataAdapter da = new SqlDataAdapter(cmd); DataSet ds = new DataSet(); da.Fill(ds); da.Dispose(); cmd.Dispose(); //now delete them from the table of urls to crawl data = new InternetUrlToCrawl[ds.Tables[0].Rows.Count]; if (data.Length > 0) { int i = 0; foreach (DataRow dr in ds.Tables[0].Rows) { try { InternetUrlToCrawl url = new InternetUrlToCrawl((int)dr[0], (string)dr[1]); if (dr[2] != DBNull.Value) { url.CRC = (long)dr[2]; } if (dr[3] != DBNull.Value) { url.FlagDomain = (DomainFlagValue)((byte)dr[3]); } if (dr[4] != DBNull.Value) { url.RobotsDisallowedPaths = (string)dr[4]; } else { RobotsTxtEntry entry = settings.Robots.GetEntry(InternetUtils.HostName(url)); if (entry != null) { url.RobotsDisallowedPaths = ConcatenatePaths(entry.DisallowedPaths); } else { url.FlagFetchRobots = true; } } data[i++] = url; } catch { continue; } } SqlCommand statscmd = new SqlCommand("cw_update_client_statistics", dbcon, transaction); statscmd.CommandType = CommandType.StoredProcedure; statscmd.CommandTimeout = 120; statscmd.Parameters.Add("@client_id", SqlDbType.UniqueIdentifier); statscmd.Parameters.Add("@assigned", SqlDbType.BigInt); statscmd.Parameters.Add("@returned", SqlDbType.BigInt); statscmd.Parameters.Add("@type", SqlDbType.TinyInt); statscmd.Parameters[0].Value = ci.ClientID; statscmd.Parameters[1].Value = data.Length; statscmd.Parameters[2].Value = DBNull.Value; statscmd.Parameters[3].Value = 0; statscmd.ExecuteNonQuery(); statscmd.Dispose(); transaction.Commit(); } } catch (Exception ex) { transaction.Rollback(); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("SelectUrlsToCrawl failed, Transaction was rolled back: " + ex.ToString()); } throw ex; } finally { UpdateClientLastActive(ci); LogClientAction(ci, CWClientActions.LogSendUrlsToCrawl); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("SelectUrlsToCrawl failed: " + e.ToString()); } } return sx; }
/// <summary> /// Selects and returns a byte array containing a Client Update version. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client requesting the data.</param> /// <param name="version">The requested version.</param> /// <param name="data">A byte array that will contain the binary update file.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException SelectUpdatedVersion(ClientInfo ci, string version, byte[] data) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } SqlCommand cmd = new SqlCommand("cw_select_updated_version", dbcon); cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add("@version", SqlDbType.NChar, 15); cmd.Parameters[0].Value = version; SqlDataAdapter da = new SqlDataAdapter(cmd); DataSet ds = new DataSet(); da.Fill(ds); da.Dispose(); cmd.Dispose(); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } if (ds.Tables[0].Rows.Count == 0) { throw new CWException("Version unavailable or not recognized."); } else { data = (byte[])ds.Tables[0].Rows[0][0]; } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("SelectUpdatedVersion failed: " + e.ToString()); } } finally { UpdateClientLastActive(ci); LogClientAction(ci, CWClientActions.LogSendUpdatedVersion); } return sx; }
/// <summary> /// Selects and returns the latest version of the client updates available. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client performing the call.</param> /// <param name="version">The latest version update available.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException SelectLatestVersion(ClientInfo ci, ref string version) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } //Load the values from the database SqlCommand cmd = new SqlCommand("cw_select_client_versions", dbcon); cmd.CommandType = CommandType.StoredProcedure; DataSet ds = new DataSet(); SqlDataAdapter da = new SqlDataAdapter(cmd); da.Fill(ds); da.Dispose(); cmd.Dispose(); dbcon.Close(); Version latestVersion = new Version(0, 0, 0, 0); Version currentVersion; foreach (DataRow dr in ds.Tables[0].Rows) { try { currentVersion = new Version(((string)dr[0]).Trim()); if (currentVersion > latestVersion) { latestVersion = currentVersion; } } catch { continue; } } ds.Dispose(); version = latestVersion.ToString(); } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("SelectBannedHosts failed: " + e.ToString()); } } finally { UpdateClientLastActive(ci); } return sx; }