示例#1
0
        /// <summary>
        /// Stores an array of <see cref="UrlCrawlData"/> objects and the <see cref="ClientInfo"/>
        /// of the client who returned them on a compressed file on disk.
        /// </summary>
        /// <param name="info">The <see cref="ClientInfo"/> of the client who returned the data.</param>
        /// <param name="data">An array of <see cref="UrlCrawlData"/> objects containing the
        /// data returned by the client.</param>
        private void SaveXMLFile(ClientInfo info, UrlCrawlData[] data)
        {
            UrlCrawlDataFile udf = new UrlCrawlDataFile(info, data);
            string           id  = Guid.NewGuid().ToString();
            //serialize the object into a memory stream
            MemoryStream ms = new MemoryStream();
            //this may need to use SoapFormatter
            //XmlSerializer xml = new XmlSerializer(typeof(UrlCrawlDataFile));
            SoapFormatter xml = new SoapFormatter();

            xml.Serialize(ms, udf);
            byte[] buffer = ms.ToArray();
            ms.Close();
            string          fileName = settings.DataFilesPath + id + ".zip";
            Crc32           crc      = new Crc32();
            ZipOutputStream zs       = new ZipOutputStream(File.Create(fileName));
            ZipEntry        entry    = new ZipEntry(id);

            entry.DateTime = DateTime.Now;
            entry.Size     = buffer.Length;
            crc.Update(buffer);
            entry.Crc = crc.Value;
            zs.PutNextEntry(entry);
            zs.Write(buffer, 0, buffer.Length);
            zs.Finish();
            zs.Close();
        }
示例#2
0
        /// <summary>
        /// Loads a data file from the disk, decompresses it and extracts the <see cref="UrlCrawlDataFile"/>
        /// it contains.
        /// </summary>
        /// <param name="fileName">The name of the file to load.</param>
        /// <returns>The <see cref="UrlCrawlDataFile"/> contained in the file or null if
        /// something goes wrong.</returns>
        private UrlCrawlDataFile LoadDataFile(string fileName)
        {
            UrlCrawlDataFile retVal = null;

            try
            {
                ZipInputStream zs    = new ZipInputStream(File.Open(fileName, FileMode.Open));
                ZipEntry       entry = zs.GetNextEntry();
                if (entry != null)
                {
                    MemoryStream ms   = new MemoryStream();
                    int          size = 4096;
                    byte []      data = new byte[4096];
                    while (true)
                    {
                        size = zs.Read(data, 0, data.Length);
                        if (size > 0)
                        {
                            ms.Write(data, 0, size);
                        }
                        else
                        {
                            break;
                        }
                    }
                    ms.Position = 0;
                    SoapFormatter xml = new SoapFormatter();
                    retVal = (UrlCrawlDataFile)xml.Deserialize(ms);
                    ms.Close();
                }
                zs.Close();
            }
            catch (Exception e)
            {
                events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Warning, DateTime.Now, "The DBUpdater plugin failed to load file " + fileName + ":" + e.ToString()));
            }
            return(retVal);
        }
示例#3
0
 /// <summary>
 /// Performs the database update. It is running on pluginThread, so it must be able
 /// to handle ThreadAbortException and ThreadInterruptedException.
 /// </summary>
 private void PerformUpdate()
 {
     try
     {
         ReportImmediately(CWLoggerEntryType.Info, "CrawlWave DBUpdater Plugin thread has started with ID 0x" + Thread.CurrentThread.GetHashCode().ToString("x4"));
         while (!mustStop)
         {
             try
             {
                 //Select one of the data files to process
                 string fileName = SelectDataFileName();
                 if (fileName != String.Empty)
                 {
                     //Open a data file and extract the information it contains
                     UrlCrawlDataFile udf = LoadDataFile(fileName);
                     if (udf != null)
                     {
                         try
                         {
                             dbcon.Open();
                         }
                         catch (Exception e)
                         {
                             ReportImmediately(CWLoggerEntryType.Error, "DBUpdater plugin failed to connect to the database:" + e.ToString());
                             throw e;                                     //this will cause the plugin to pause if necessary
                         }
                         AddToReportQueue(CWLoggerEntryType.Info, "DBUpdater processing file " + fileName);
                         //process each UrlCrawlData
                         foreach (UrlCrawlData data in udf.Data)
                         {
                             SqlTransaction transaction = null;
                             if (settings.UseTransactions)
                             {
                                 transaction = dbcon.BeginTransaction();
                             }
                             int UrlID = 0;
                             try
                             {
                                 UrlID = UpdateUrl(data, transaction);
                                 if (UrlID != 0)
                                 {
                                     if (data.UrlToCrawl.FlagDomain == DomainFlagValue.MustVisit)
                                     {
                                         ClearUrlOutLinks(UrlID, transaction);
                                         InsertUrlOutLinks(UrlID, data, transaction);
                                     }
                                     if (settings.UseTransactions)
                                     {
                                         transaction.Commit();
                                     }
                                 }
                                 else
                                 {
                                     if (settings.UseTransactions)
                                     {
                                         transaction.Rollback();
                                     }
                                 }
                             }
                             catch (ThreadInterruptedException tie)
                             {
                                 if (settings.UseTransactions)
                                 {
                                     transaction.Rollback();
                                 }
                                 throw tie;
                             }
                             finally
                             {
                                 if (settings.UseTransactions)
                                 {
                                     transaction.Dispose();
                                 }
                             }
                         }
                         //if everything succeeds delete the file
                         File.Delete(fileName);
                         try
                         {
                             if (dbcon != null)
                             {
                                 if (dbcon.State != ConnectionState.Closed)
                                 {
                                     dbcon.Close();
                                 }
                             }
                         }
                         catch (Exception dce)
                         {
                             ReportImmediately(CWLoggerEntryType.Error, "DBUpdater Plugin failed to close the connection to the database: " + dce.ToString());
                         }
                     }
                 }
             }
             catch (Exception e)
             {
                 AddToReportQueue(CWLoggerEntryType.Warning, "DBUpdater encountered an unexpected exception: " + e.Message);
             }
             finally
             {
                 Report();
                 //pause if necessary
                 if (settings.PauseBetweenOperations)
                 {
                     int waitSeconds = PauseInSeconds();
                     for (int i = 0; i < waitSeconds; i++)
                     {
                         Thread.Sleep(1000);
                         if (mustStop)
                         {
                             break;
                         }
                     }
                 }
             }
             GC.Collect();
         }
     }
     catch (ThreadAbortException)
     {
         //The thread was asked to abort, which means it must return at once
         return;
     }
     catch (ThreadInterruptedException)
     {
         //The thread has been asked to Join. We have nothing to do but return.
         return;
     }
     finally
     {
         AddToReportQueue(CWLoggerEntryType.Info, Thread.CurrentThread.Name + " has stopped.");
     }
 }