Beispiel #1
0
 public void Run()
 {
     while (open)
     {
         if (parent.CanSend())
         {
             if (messages.TryDequeue(out IMessage delayed))
             {
                 backoff.Reset();
                 parent.Send(delayed);
             }
             else
             {
                 backoff.Now();
             }
         }
         else
         {
             backoff.Now();
         }
     }
 }
 private void PerformExtraction()
 {
     try
     {
         events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Info, DateTime.Now, "CrawlWave Word Extraction Plugin thread has started with ID 0x" + Thread.CurrentThread.GetHashCode().ToString("x4")));
         //the user may have enabled database just before starting the plugin
         if (settings.UseDatabase)
         {
             if (dbcon == null)
             {
                 dbProvider = DBConnectionStringProvider.Instance();
                 settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name);
                 dbcon = new SqlConnection(settings.DBConnectionString);
             }
         }
         while (!mustStop)
         {
             //Select a page from the database, perform word extraction and store the
             //results back in the database
             int    UrlID       = 0;
             string data        = String.Empty;
             int    waitSeconds = 0;
             SelectUrlForWordExtraction(out UrlID, out data);
             if (UrlID != 0)
             {
                 try
                 {
                     backoff.Reset();
                     SortedList words = wordExtractor.ExtractWords(data);
                     if (words.Count != 0)
                     {
                         //add all the words to the database if they don't exist already
                         string word       = String.Empty;
                         short  word_count = 0;
                         int    word_id    = -1;
                         foreach (DictionaryEntry de in words)
                         {
                             word = (string)de.Key;
                             cache.AddStemmedWord(word);
                         }
                         //remove all the old words related to this url from the database
                         RemoveUrlWords(UrlID);
                         //now add relationships between the url and its words
                         foreach (DictionaryEntry d in words)
                         {
                             word       = (string)d.Key;
                             word_count = (short)((int)d.Value);
                             word_id    = cache[word];
                             AddUrlWord(UrlID, word_id, word_count);
                         }
                     }
                     UpdateUrlDataLastProcess(UrlID);
                 }
                 catch (Exception e)
                 {
                     events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Warning, DateTime.Now, "WordExtractionPlugin failed to extract words from Url with ID " + UrlID.ToString() + ": " + e.ToString()));
                     continue;
                 }
             }
             else
             {
                 waitSeconds = backoff.Next() / 1000;
                 for (int i = 0; i < waitSeconds; i++)
                 {
                     Thread.Sleep(1000);
                     if (mustStop)
                     {
                         break;
                     }
                 }
             }
             Report();
             if (settings.PauseBetweenOperations)
             {
                 waitSeconds = PauseInSeconds();
                 for (int i = 0; i < waitSeconds; i++)
                 {
                     Thread.Sleep(1000);
                     if (mustStop)
                     {
                         break;
                     }
                 }
             }
         }
     }
     catch (ThreadAbortException)
     {
         //The thread was asked to abort, which means it must return at once
         return;
     }
     catch (ThreadInterruptedException)
     {
         //The thread has been asked to Join. We have nothing to do but return.
         return;
     }
     finally
     {
         events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Info, DateTime.Now, Thread.CurrentThread.Name + " has stopped."));
     }
 }