/// <summary> /// Logs a client's activity in the Client History table of the system's database. /// It silently ignores errors. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client performing the action.</param> /// <param name="action">The <see cref="CWClientActions"/> action performed by the client.</param> public void LogClientAction(ClientInfo ci, CWClientActions action) { try { if ((settings.LogOptions & action) == action) { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } SqlCommand cmd = new SqlCommand("cw_insert_client_history", dbcon); cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add("@client_id", SqlDbType.UniqueIdentifier); cmd.Parameters.Add("@event_type", SqlDbType.Int); cmd.Parameters[0].Value = ci.ClientID; cmd.Parameters[1].Value = (int)action; cmd.ExecuteNonQuery(); cmd.Dispose(); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } } catch { } }
/// <summary> /// Selects and returns a set of urls that are ready to be crawled. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client requesting urls to crawl.</param> /// <param name="data">An array of <see cref="InternetUrlToCrawl"/> objects containing the selected urls.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException SelectUrlsToCrawl(ClientInfo ci, ref InternetUrlToCrawl[] data) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } //we must use a transaction to make sure that if something goes wrong the //changes to the database will be rolled back. SqlTransaction transaction = dbcon.BeginTransaction(IsolationLevel.Serializable);//perhaps | repeatableread try { //first select the urls to crawl SqlCommand cmd = new SqlCommand("cw_select_urls_to_crawl", dbcon, transaction); cmd.CommandType = CommandType.StoredProcedure; cmd.CommandTimeout = 120; SqlDataAdapter da = new SqlDataAdapter(cmd); DataSet ds = new DataSet(); da.Fill(ds); da.Dispose(); cmd.Dispose(); //now delete them from the table of urls to crawl data = new InternetUrlToCrawl[ds.Tables[0].Rows.Count]; if (data.Length > 0) { int i = 0; foreach (DataRow dr in ds.Tables[0].Rows) { try { InternetUrlToCrawl url = new InternetUrlToCrawl((int)dr[0], (string)dr[1]); if (dr[2] != DBNull.Value) { url.CRC = (long)dr[2]; } if (dr[3] != DBNull.Value) { url.FlagDomain = (DomainFlagValue)((byte)dr[3]); } if (dr[4] != DBNull.Value) { url.RobotsDisallowedPaths = (string)dr[4]; } else { RobotsTxtEntry entry = settings.Robots.GetEntry(InternetUtils.HostName(url)); if (entry != null) { url.RobotsDisallowedPaths = ConcatenatePaths(entry.DisallowedPaths); } else { url.FlagFetchRobots = true; } } data[i++] = url; } catch { continue; } } SqlCommand statscmd = new SqlCommand("cw_update_client_statistics", dbcon, transaction); statscmd.CommandType = CommandType.StoredProcedure; statscmd.CommandTimeout = 120; statscmd.Parameters.Add("@client_id", SqlDbType.UniqueIdentifier); statscmd.Parameters.Add("@assigned", SqlDbType.BigInt); statscmd.Parameters.Add("@returned", SqlDbType.BigInt); statscmd.Parameters.Add("@type", SqlDbType.TinyInt); statscmd.Parameters[0].Value = ci.ClientID; statscmd.Parameters[1].Value = data.Length; statscmd.Parameters[2].Value = DBNull.Value; statscmd.Parameters[3].Value = 0; statscmd.ExecuteNonQuery(); statscmd.Dispose(); transaction.Commit(); } } catch (Exception ex) { transaction.Rollback(); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("SelectUrlsToCrawl failed, Transaction was rolled back: " + ex.ToString()); } throw ex; } finally { UpdateClientLastActive(ci); LogClientAction(ci, CWClientActions.LogSendUrlsToCrawl); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("SelectUrlsToCrawl failed: " + e.ToString()); } } return sx; }
/// <summary> /// Selects and returns a byte array containing a Client Update version. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client requesting the data.</param> /// <param name="version">The requested version.</param> /// <param name="data">A byte array that will contain the binary update file.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException SelectUpdatedVersion(ClientInfo ci, string version, byte[] data) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } SqlCommand cmd = new SqlCommand("cw_select_updated_version", dbcon); cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add("@version", SqlDbType.NChar, 15); cmd.Parameters[0].Value = version; SqlDataAdapter da = new SqlDataAdapter(cmd); DataSet ds = new DataSet(); da.Fill(ds); da.Dispose(); cmd.Dispose(); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } if (ds.Tables[0].Rows.Count == 0) { throw new CWException("Version unavailable or not recognized."); } else { data = (byte[])ds.Tables[0].Rows[0][0]; } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("SelectUpdatedVersion failed: " + e.ToString()); } } finally { UpdateClientLastActive(ci); LogClientAction(ci, CWClientActions.LogSendUpdatedVersion); } return sx; }
/// <summary> /// Selects and returns the latest version of the client updates available. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client performing the call.</param> /// <param name="version">The latest version update available.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException SelectLatestVersion(ClientInfo ci, ref string version) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } //Load the values from the database SqlCommand cmd = new SqlCommand("cw_select_client_versions", dbcon); cmd.CommandType = CommandType.StoredProcedure; DataSet ds = new DataSet(); SqlDataAdapter da = new SqlDataAdapter(cmd); da.Fill(ds); da.Dispose(); cmd.Dispose(); dbcon.Close(); Version latestVersion = new Version(0, 0, 0, 0); Version currentVersion; foreach (DataRow dr in ds.Tables[0].Rows) { try { currentVersion = new Version(((string)dr[0]).Trim()); if (currentVersion > latestVersion) { latestVersion = currentVersion; } } catch { continue; } } ds.Dispose(); version = latestVersion.ToString(); } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("SelectBannedHosts failed: " + e.ToString()); } } finally { UpdateClientLastActive(ci); } return sx; }
/// <summary> /// Constructs a new instance of the <see cref="Globals"/> class. /// </summary> private Globals() { //Initialize the variables. Interning the strings saves us some memory. userAgent = String.Intern("CrawlWave/1.2 (crawlwave[at]spiderwave.aueb.gr http://www.spiderwave.aueb.gr/"); string path = GetAppPath(); try { //If the application cannot write to its local path then it will attempt //to write in the personal path of the current user, under Application Data FileStream fs = File.Create(path + "test.dat"); fs.Close(); File.Delete(path + "test.dat"); } catch { path = System.Environment.GetFolderPath(System.Environment.SpecialFolder.ApplicationData) + "\\CrawlWave\\"; } appPath = String.Intern(path); dataPath = String.Intern(path + "data\\"); workPath = String.Intern(path + "work\\"); //if the data and work directories do not exist create them if(!Directory.Exists(dataPath)) { Directory.CreateDirectory(dataPath); } if(!Directory.Exists(workPath)) { Directory.CreateDirectory(workPath); } logEventSource = String.Intern("CrawlWave"); logFileName = String.Intern(dataPath + "CrawlWave.Client.log"); settings = new ClientSettings(dataPath + "CrawlWave.Client.Config.xml"); settings.LoadSettings(); clientInfo = new ClientInfo(); clientInfo.UserID = settings.UserID; clientInfo.ClientID = settings.ClientID; clientInfo.Version = Assembly.GetExecutingAssembly().GetName().Version.ToString(); systemLog = new SystemEventLogger(logEventSource); fileLog = new FileEventLogger(logFileName, true, logEventSource); }
/// <summary> /// Selects and returns a list of all the banned hosts. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client requesting the data.</param> /// <param name="data">A <see cref="DataSet"/> that will contain the list of banned hosts.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException SelectBannedHosts(ClientInfo ci, ref DataSet data) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } SqlCommand cmd = new SqlCommand("cw_select_banned_hosts", dbcon); cmd.CommandType = CommandType.StoredProcedure; SqlDataAdapter da = new SqlDataAdapter(cmd); data = new DataSet(); da.Fill(data); da.Dispose(); cmd.Dispose(); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("SelectBannedHosts failed: " + e.ToString()); } } finally { UpdateClientLastActive(ci); LogClientAction(ci, CWClientActions.LogSendBannedHosts); } return sx; }
/// <summary> /// Updates a client's last activity date in the system's database. /// </summary> /// <param name="ci">The info of the client performing an action.</param> public void UpdateClientLastActive(ClientInfo ci) { try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } SqlCommand cmd = new SqlCommand("cw_update_client_last_active", dbcon); cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add("@client_id", SqlDbType.UniqueIdentifier); cmd.Parameters[0].Value = ci.ClientID; cmd.ExecuteNonQuery(); cmd.Dispose(); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } catch { } }
public SerializedException SendUpdatedVersion(ClientInfo ci, string version, out byte[] data) { data = null; return engine.SelectUpdatedVersion(ci, version, data); }
public SerializedException SendLatestVersion(ClientInfo ci, out string version) { version = string.Empty; return engine.SelectLatestVersion(ci, ref version); }
public SerializedException SendServers(ClientInfo ci, out System.Data.DataSet data) { data = null; return engine.SelectServers(ci, ref data); }
public SerializedException RegisterClient(ref ClientInfo ci, CWComputerInfo info) { return engine.StoreClientRegistrationInfo(ref ci, info); }
public SerializedException GetCrawlResults(ClientInfo ci, UrlCrawlData[] data) { return engine.StoreCrawlResults(ci, data); }
public SerializedException GetClientComputerInfo(ClientInfo ci, CWComputerInfo info) { return engine.StoreNewClientComputerInfo(ci, info); }
/// <summary> /// Selects and returns the statistics for a certain user. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client requesting the statistics.</param> /// <param name="stats">The <see cref="UserStatistics"/> of the user.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException SelectUserStatistics(ClientInfo ci, ref UserStatistics stats) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } SqlCommand cmd = new SqlCommand("cw_select_user_statistic", dbcon); cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add("@user_id", SqlDbType.Int); cmd.Parameters[0].Value = ci.UserID; SqlDataAdapter da = new SqlDataAdapter(cmd); DataSet ds = new DataSet(); da.Fill(ds); da.Dispose(); cmd.Dispose(); if (ds.Tables[0].Rows.Count > 0) { stats.RegistrationDate = (DateTime)ds.Tables[0].Rows[0][2]; stats.LastActive = (DateTime)ds.Tables[0].Rows[0][6]; foreach (DataRow dr in ds.Tables[0].Rows) { stats.NumClients++; stats.UrlsAssigned += (long)dr[4]; stats.UrlsReturned += (long)dr[5]; DateTime la = (DateTime)dr[6]; if (la > stats.LastActive) { stats.LastActive = la; } } } ds.Dispose(); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("SelectUserStatistics failed for user " + ci.UserID.ToString() + ":" + e.ToString()); } } finally { UpdateClientLastActive(ci); LogClientAction(ci, CWClientActions.LogSendUserStatistics); } return sx; }
public SerializedException SendUrlsToCrawl(ClientInfo ci, out InternetUrlToCrawl[] data) { data = null; engine.LogClientAction(ci, CWClientActions.LogSendUrlsToCrawl); return engine.SelectUrlsToCrawl(ci, ref data); }
/// <summary> /// Stores the results that the clients return after crawling a set of Urls. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client returning the data.</param> /// <param name="data">An array of <see cref="UrlCrawlData"/> objects containing the data of the crawled urls.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException StoreCrawlResults(ClientInfo ci, UrlCrawlData[] data) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } try { //store the new robots.txt files in the database, nothing else needs to //be done since the urls will be marked as not assigned when their data //is processed by DBUpdater if ((data != null) && (data.Length > 0)) { SqlCommand cmd = new SqlCommand("cw_update_or_insert_robot", dbcon); cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add("@host_id", SqlDbType.UniqueIdentifier); cmd.Parameters.Add("@disallowed", SqlDbType.NVarChar, 1000); foreach (UrlCrawlData urlData in data) { if ((urlData.FlagFetchRobots) || (urlData.Redirected)) { string url = urlData.Url; cmd.Parameters[0].Value = new Guid(MD5Hash.md5(InternetUtils.HostName(url))); cmd.Parameters[1].Value = urlData.RobotsDisallowedPaths; try { cmd.ExecuteNonQuery(); } catch { continue; } } } cmd.Dispose(); SqlCommand statscmd = new SqlCommand("cw_update_client_statistics", dbcon); statscmd.CommandType = CommandType.StoredProcedure; statscmd.Parameters.Add("@client_id", SqlDbType.UniqueIdentifier); statscmd.Parameters.Add("@assigned", SqlDbType.BigInt); statscmd.Parameters.Add("@returned", SqlDbType.BigInt); statscmd.Parameters.Add("@type", SqlDbType.TinyInt); statscmd.Parameters[0].Value = ci.ClientID; statscmd.Parameters[1].Value = DBNull.Value; statscmd.Parameters[2].Value = data.Length; statscmd.Parameters[3].Value = 1; statscmd.ExecuteNonQuery(); statscmd.Dispose(); } } catch (Exception ex) { if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("StoreCrawlResults failed: " + ex.ToString()); } throw ex; } finally { //save xml file on disk try { SaveXMLFile(ci, data); } catch (Exception se) { sx = new SerializedException(se.GetType().ToString(), se.Message, se.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("StoreCrawlResults failed to save XML data on disk: " + se.ToString()); } } } if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); } finally { UpdateClientLastActive(ci); LogClientAction(ci, CWClientActions.LogGetCrawlResults); } return sx; }
public SerializedException SendUserStatistics(ClientInfo ci, out UserStatistics stats) { stats = new UserStatistics(); engine.LogClientAction(ci, CWClientActions.LogSendUserStatistics); return engine.SelectUserStatistics(ci, ref stats); }
/// <summary> /// Updates the computer hardware info related to a client. /// </summary> /// <param name="ci">The <see cref="ClientInfo"/> of the client.</param> /// <param name="info">The <see cref="CWComputerInfo"/> of the client computer.</param> /// <returns>Null if the operation succeeds, or <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException StoreNewClientComputerInfo(ClientInfo ci, CWComputerInfo info) { SerializedException sx = null; try { if (!ConnectToDatabase()) { throw new CWDBConnectionFailedException(); } SqlCommand cmd = new SqlCommand("cw_update_client", dbcon); cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add("@client_id", SqlDbType.UniqueIdentifier); cmd.Parameters.Add("@user_id", SqlDbType.Int); cmd.Parameters.Add("@info_cpu", SqlDbType.NVarChar, 50); cmd.Parameters.Add("@info_ram", SqlDbType.SmallInt); cmd.Parameters.Add("@info_hdd", SqlDbType.Int); cmd.Parameters.Add("@info_net", SqlDbType.TinyInt); cmd.Parameters[0].Value = ci.ClientID; cmd.Parameters[1].Value = ci.UserID; cmd.Parameters[2].Value = info.CPUType; cmd.Parameters[3].Value = info.RAMSize; cmd.Parameters[4].Value = info.HDDSpace; cmd.Parameters[5].Value = (byte)info.ConnectionSpeed; cmd.ExecuteNonQuery(); cmd.Dispose(); if (!DisconnectFromDatabase()) { throw new CWDBConnectionFailedException("Disconnect from database failure."); } } catch (Exception e) { sx = new SerializedException(e.GetType().ToString(), e.Message, e.ToString()); if (settings.LogLevel <= CWLogLevel.LogWarning) { settings.Log.LogWarning("StoreNewClientComputerInfo failed: " + e.ToString()); } } finally { LogClientAction(ci, CWClientActions.LogGetClientComputerInfo); } return sx; }
/// <summary> /// Constructs a new instance of the <see cref="UrlCrawlDataFile"/> class. /// </summary> public UrlCrawlDataFile() { Info = new ClientInfo(); Data = null; }
/// <summary> /// Stores an array of <see cref="UrlCrawlData"/> objects and the <see cref="ClientInfo"/> /// of the client who returned them on a compressed file on disk. /// </summary> /// <param name="info">The <see cref="ClientInfo"/> of the client who returned the data.</param> /// <param name="data">An array of <see cref="UrlCrawlData"/> objects containing the /// data returned by the client.</param> private void SaveXMLFile(ClientInfo info, UrlCrawlData[] data) { UrlCrawlDataFile udf = new UrlCrawlDataFile(info, data); string id = Guid.NewGuid().ToString(); //serialize the object into a memory stream MemoryStream ms = new MemoryStream(); //this may need to use SoapFormatter //XmlSerializer xml = new XmlSerializer(typeof(UrlCrawlDataFile)); SoapFormatter xml = new SoapFormatter(); xml.Serialize(ms, udf); byte[] buffer = ms.ToArray(); ms.Close(); string fileName = settings.DataFilesPath + id + ".zip"; Crc32 crc = new Crc32(); ZipOutputStream zs = new ZipOutputStream(File.Create(fileName)); ZipEntry entry = new ZipEntry(id); entry.DateTime = DateTime.Now; entry.Size = buffer.Length; crc.Update(buffer); entry.Crc = crc.Value; zs.PutNextEntry(entry); zs.Write(buffer, 0, buffer.Length); zs.Finish(); zs.Close(); }
/// <summary> /// Constructs a new instance of the <see cref="UrlCrawlDataFile"/> class with the /// provided values. /// </summary> /// <param name="info">The <see cref="ClientInfo"/> of the client who returned the data.</param> /// <param name="data">An array of <see cref="UrlCrawlData"/> objects.</param> public UrlCrawlDataFile(ClientInfo info, UrlCrawlData [] data) { Info = info; Data = data; }
/// <summary> /// Attempts to perform the registration of a new user. /// </summary> /// <param name="UserName">The user's username.</param> /// <param name="Password">The user's password.</param> /// <param name="Email">The user's email address.</param> /// <returns>Null if the operation succeeds, or a <see cref="SerializedException"/> /// encapsulating the error that occured if the operation fails.</returns> public SerializedException RegisterUser(string UserName, string Password, string Email) { SerializedException sx = null; try { //WebServiceProxy proxy = WebServiceProxy.Instance(); int ID = 0; byte [] password = MD5Hash.md5(Password); sx = proxy.RegisterUser(ref ID, UserName, password, Email); if(sx!=null) { if(sx.Type == "CrawlWave.Common.CWUserExistsException") { log.LogWarning("User already exists, attempting to register client."); } } globals.Settings.UserID = ID; globals.Settings.UserName = UserName; globals.Settings.Password = password; globals.Settings.Email = Email; CWComputerInfo info = ComputerInfo.GetComputerInfo(); globals.Settings.HardwareInfo = ComputerInfo.GetSHA1HashCode(info); globals.Settings.SaveSettings(); //proxy.ForceInitializeProxies(); ClientInfo ci = new ClientInfo(); ci.UserID = globals.Settings.UserID; sx = proxy.RegisterClient(ref ci, info); globals.Settings.ClientID = ci.ClientID; globals.Settings.SaveSettings(); } catch(Exception ex) { sx = new SerializedException(ex.GetType().ToString(), ex.Message, ex.StackTrace); } return sx; }
/// <summary> /// Constructs a new instance of the <see cref="Globals"/> class. /// </summary> private Globals() { //Initialize the variables string path = GetAppPath(); try { //If the application cannot write to its local path then it will attempt //to write in the personal path of the current user, under Application Data FileStream fs = File.Create(path + "test.dat"); fs.Close(); File.Delete(path + "test.dat"); } catch { path = System.Environment.GetFolderPath(System.Environment.SpecialFolder.ApplicationData) + "\\CrawlWave\\"; } appPath = String.Intern(path); logEventSource = String.Intern("CrawlWave.ClientScheduler"); logFileName = String.Intern(appPath + "CrawlWave.ClientScheduler.log"); settings = new ClientSettings(appPath + "data\\CrawlWave.Client.config.xml"); settings.LoadSettings(); clientInfo = new ClientInfo(); clientInfo.UserID = settings.UserID; clientInfo.ClientID = settings.ClientID; clientInfo.Version = Assembly.GetExecutingAssembly().GetName().Version.ToString(); systemLog = new SystemEventLogger(logEventSource); fileLog = new FileEventLogger(logFileName, true, logEventSource); }