/// <summary> /// Stores an array of <see cref="UrlCrawlData"/> objects and the <see cref="ClientInfo"/> /// of the client who returned them on a compressed file on disk. /// </summary> /// <param name="info">The <see cref="ClientInfo"/> of the client who returned the data.</param> /// <param name="data">An array of <see cref="UrlCrawlData"/> objects containing the /// data returned by the client.</param> private void SaveXMLFile(ClientInfo info, UrlCrawlData[] data) { UrlCrawlDataFile udf = new UrlCrawlDataFile(info, data); string id = Guid.NewGuid().ToString(); //serialize the object into a memory stream MemoryStream ms = new MemoryStream(); //this may need to use SoapFormatter //XmlSerializer xml = new XmlSerializer(typeof(UrlCrawlDataFile)); SoapFormatter xml = new SoapFormatter(); xml.Serialize(ms, udf); byte[] buffer = ms.ToArray(); ms.Close(); string fileName = settings.DataFilesPath + id + ".zip"; Crc32 crc = new Crc32(); ZipOutputStream zs = new ZipOutputStream(File.Create(fileName)); ZipEntry entry = new ZipEntry(id); entry.DateTime = DateTime.Now; entry.Size = buffer.Length; crc.Update(buffer); entry.Crc = crc.Value; zs.PutNextEntry(entry); zs.Write(buffer, 0, buffer.Length); zs.Finish(); zs.Close(); }
/// <summary> /// Loads a data file from the disk, decompresses it and extracts the <see cref="UrlCrawlDataFile"/> /// it contains. /// </summary> /// <param name="fileName">The name of the file to load.</param> /// <returns>The <see cref="UrlCrawlDataFile"/> contained in the file or null if /// something goes wrong.</returns> private UrlCrawlDataFile LoadDataFile(string fileName) { UrlCrawlDataFile retVal = null; try { ZipInputStream zs = new ZipInputStream(File.Open(fileName, FileMode.Open)); ZipEntry entry = zs.GetNextEntry(); if (entry != null) { MemoryStream ms = new MemoryStream(); int size = 4096; byte [] data = new byte[4096]; while (true) { size = zs.Read(data, 0, data.Length); if (size > 0) { ms.Write(data, 0, size); } else { break; } } ms.Position = 0; SoapFormatter xml = new SoapFormatter(); retVal = (UrlCrawlDataFile)xml.Deserialize(ms); ms.Close(); } zs.Close(); } catch (Exception e) { events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Warning, DateTime.Now, "The DBUpdater plugin failed to load file " + fileName + ":" + e.ToString())); } return(retVal); }
/// <summary> /// Performs the database update. It is running on pluginThread, so it must be able /// to handle ThreadAbortException and ThreadInterruptedException. /// </summary> private void PerformUpdate() { try { ReportImmediately(CWLoggerEntryType.Info, "CrawlWave DBUpdater Plugin thread has started with ID 0x" + Thread.CurrentThread.GetHashCode().ToString("x4")); while (!mustStop) { try { //Select one of the data files to process string fileName = SelectDataFileName(); if (fileName != String.Empty) { //Open a data file and extract the information it contains UrlCrawlDataFile udf = LoadDataFile(fileName); if (udf != null) { try { dbcon.Open(); } catch (Exception e) { ReportImmediately(CWLoggerEntryType.Error, "DBUpdater plugin failed to connect to the database:" + e.ToString()); throw e; //this will cause the plugin to pause if necessary } AddToReportQueue(CWLoggerEntryType.Info, "DBUpdater processing file " + fileName); //process each UrlCrawlData foreach (UrlCrawlData data in udf.Data) { SqlTransaction transaction = null; if (settings.UseTransactions) { transaction = dbcon.BeginTransaction(); } int UrlID = 0; try { UrlID = UpdateUrl(data, transaction); if (UrlID != 0) { if (data.UrlToCrawl.FlagDomain == DomainFlagValue.MustVisit) { ClearUrlOutLinks(UrlID, transaction); InsertUrlOutLinks(UrlID, data, transaction); } if (settings.UseTransactions) { transaction.Commit(); } } else { if (settings.UseTransactions) { transaction.Rollback(); } } } catch (ThreadInterruptedException tie) { if (settings.UseTransactions) { transaction.Rollback(); } throw tie; } finally { if (settings.UseTransactions) { transaction.Dispose(); } } } //if everything succeeds delete the file File.Delete(fileName); try { if (dbcon != null) { if (dbcon.State != ConnectionState.Closed) { dbcon.Close(); } } } catch (Exception dce) { ReportImmediately(CWLoggerEntryType.Error, "DBUpdater Plugin failed to close the connection to the database: " + dce.ToString()); } } } } catch (Exception e) { AddToReportQueue(CWLoggerEntryType.Warning, "DBUpdater encountered an unexpected exception: " + e.Message); } finally { Report(); //pause if necessary if (settings.PauseBetweenOperations) { int waitSeconds = PauseInSeconds(); for (int i = 0; i < waitSeconds; i++) { Thread.Sleep(1000); if (mustStop) { break; } } } } GC.Collect(); } } catch (ThreadAbortException) { //The thread was asked to abort, which means it must return at once return; } catch (ThreadInterruptedException) { //The thread has been asked to Join. We have nothing to do but return. return; } finally { AddToReportQueue(CWLoggerEntryType.Info, Thread.CurrentThread.Name + " has stopped."); } }