/// <summary> /// Updates the Url and the Url Data tables /// </summary> /// <param name="data">The UrlCrawlData containing the data of the crawled Url.</param> /// <param name="transaction">The currently active <see cref="SqlTransaction"/>.</param> /// <returns>The ID of the updated url or 0 of something goes wrong.</returns> private int UpdateUrl(UrlCrawlData data, SqlTransaction transaction) { int retVal = 0; try { //build the Sql Command for updating the url table SqlCommand urlcmd = new SqlCommand("cw_update_url", dbcon, transaction); urlcmd.CommandType = CommandType.StoredProcedure; urlcmd.CommandTimeout = settings.DBActionTimeout; urlcmd.Parameters.Add("@url_id", SqlDbType.Int); urlcmd.Parameters.Add("@url", SqlDbType.NVarChar, 500); urlcmd.Parameters.Add("@url_md5", SqlDbType.UniqueIdentifier); urlcmd.Parameters.Add("@url_host_id", SqlDbType.UniqueIdentifier); urlcmd.Parameters.Add("@url_priority", SqlDbType.TinyInt); urlcmd.Parameters.Add("@crc", SqlDbType.BigInt); urlcmd.Parameters.Add("@flag_domain", SqlDbType.TinyInt); urlcmd.Parameters.Add("@flag_robots", SqlDbType.TinyInt); urlcmd.Parameters.Add("@flag_updated", SqlDbType.TinyInt); urlcmd.Parameters.Add("@last_visited", SqlDbType.SmallDateTime); urlcmd.Parameters.Add("@flag_redirected", SqlDbType.TinyInt); urlcmd.Parameters.Add("@id", SqlDbType.Int); urlcmd.Parameters["@id"].Direction = ParameterDirection.Output; //Build the SQL Command for updating the hosts table SqlCommand hostcmd = new SqlCommand("cw_insert_host", dbcon, transaction); hostcmd.CommandType = CommandType.StoredProcedure; hostcmd.CommandTimeout = settings.DBActionTimeout; hostcmd.Parameters.Add("@host_id", SqlDbType.UniqueIdentifier); hostcmd.Parameters.Add("@host_name", SqlDbType.NVarChar, 100); //set their parameters urlcmd.Parameters[0].Value = data.ID; urlcmd.Parameters[1].Value = data.Url; urlcmd.Parameters[2].Value = new Guid(data.MD5); Uri uri = new Uri(data.Url); string host_name = uri.Host; Guid host_id = new Guid(MD5Hash.md5(host_name)); urlcmd.Parameters[3].Value = host_id; urlcmd.Parameters[5].Value = data.CRC; if (data.Redirected) { //we must first attempt to insert the host, otherwise the urlcmd will fail hostcmd.Parameters[0].Value = host_id; hostcmd.Parameters[1].Value = host_name; try { hostcmd.ExecuteNonQuery(); } catch { //it probably exists already } urlcmd.Parameters[4].Value = (byte)data.RedirectedPriority; urlcmd.Parameters[6].Value = (byte)data.RedirectedFlagDomain; urlcmd.Parameters[7].Value = (data.RedirectedFlagRobots)?1:0; urlcmd.Parameters[8].Value = (data.Updated)?1:0; urlcmd.Parameters[9].Value = data.TimeStamp; urlcmd.Parameters[10].Value = 1; } else { urlcmd.Parameters[4].Value = DBNull.Value; urlcmd.Parameters[6].Value = (byte)data.UrlToCrawl.FlagDomain; if (data.FlagFetchRobots) { urlcmd.Parameters[7].Value = (data.RedirectedFlagRobots)?1:0; } else { urlcmd.Parameters[7].Value = 0; } urlcmd.Parameters[8].Value = (data.Updated)?1:0; urlcmd.Parameters[9].Value = data.TimeStamp; urlcmd.Parameters[10].Value = 0; } //retVal = data.ID; //make sure the host command is disposed hostcmd.Dispose(); urlcmd.ExecuteNonQuery(); retVal = (int)urlcmd.Parameters["@id"].Value; urlcmd.Dispose(); if (data.Updated) { //if necessary build the sql command for updating the url data tables SqlCommand urldatacmd = new SqlCommand("cw_update_url_data", dbcon, transaction); urldatacmd.CommandType = CommandType.StoredProcedure; urldatacmd.CommandTimeout = settings.DBActionTimeout; urldatacmd.Parameters.Add("@url_id", SqlDbType.Int); urldatacmd.Parameters.Add("@data", SqlDbType.Image); urldatacmd.Parameters.Add("@length", SqlDbType.Int); urldatacmd.Parameters.Add("@original_length", SqlDbType.Int); urldatacmd.Parameters.Add("@http_code", SqlDbType.SmallInt); urldatacmd.Parameters.Add("@retrieval_time", SqlDbType.Int); urldatacmd.Parameters[0].Value = retVal; //compress the url's data if (data.Data != String.Empty) { byte [] compressed = null; string urldata = InternetUtils.Base64Decode(data.Data); CompressionUtils.CompressString(ref urldata, out compressed); urldatacmd.Parameters[1].Value = compressed; urldatacmd.Parameters[2].Value = compressed.Length; urldatacmd.Parameters[3].Value = data.Data.Length; } else { urldatacmd.Parameters[1].Value = new byte[0]; urldatacmd.Parameters[2].Value = 0; urldatacmd.Parameters[3].Value = 0; } urldatacmd.Parameters[4].Value = (short)data.HttpStatusCode; urldatacmd.Parameters[5].Value = data.RetrievalTime; urldatacmd.ExecuteNonQuery(); urldatacmd.Dispose(); } } catch (Exception e) { AddToReportQueue(CWLoggerEntryType.Warning, "DBUpdater failed to update a Url in the database: " + e.ToString()); retVal = 0; } return(retVal); }