Beispiel #1
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private WordsCache()
 {
     settings   = PluginSettings.Instance();
     dbProvider = DBConnectionStringProvider.Instance();
     settings.DBConnectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerPlugins.WordExtraction");
     dbcon    = new SqlConnection(settings.DBConnectionString);
     words    = new Hashtable();
     stemming = Stemming.Instance();
     culture  = new CultureInfo("el-GR");
     LoadCache();
 }
Beispiel #2
0
		/// <summary>
		/// The constructor is private so that only the class itself can create an instance.
		/// </summary>
		private PluginSettings()
		{
			settings = new EbayPluginSettings();
			settings.PauseBetweenOperations = false;
			settings.PauseDelay = 0;
			settings.UseTransactions = false;
			settings.DBActionTimeout = 60;
			dbProvider = DBConnectionStringProvider.Instance();
			connectionString = dbProvider.ProvideDBConnectionString(Assembly.GetExecutingAssembly().GetName().Name);
			LoadSettings();
		}
Beispiel #3
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private Globals()
 {
     dbProvider  = DBConnectionStringProvider.Instance();
     appName     = "CrawlWave.ServerManager";
     appPath     = GetAppPath();
     loadedForms = new Hashtable(8);
     foreach (string formName in formNames)
     {
         loadedForms.Add(formName, null);
     }
     log = new FileEventLogger(appPath + appName + ".log", true, appName);
 }
Beispiel #4
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private PluginSettings()
 {
     settings                        = new USPluginSettings();
     settings.Threshold              = 1000;
     settings.SelectionSize          = 10000;
     settings.SelectionMode          = false;
     settings.PauseBetweenOperations = false;
     settings.PauseDelay             = 0;
     settings.UseTransactions        = false;
     settings.DBTimeout              = 60;
     dbProvider                      = DBConnectionStringProvider.Instance();
     connectionString                = dbProvider.ProvideDBConnectionString(Assembly.GetExecutingAssembly().GetName().Name);
     LoadSettings();
 }
Beispiel #5
0
        /// <summary>
        /// Constructs a new instance of the <see cref="PageRankPlugin"/> class.
        /// </summary>
        public PageRankPlugin()
        {
            description = "CrawlWave PageRank Plugin";
            name        = "CrawlWave.ServerPlugins.PageRank";
            percent     = 0;
            version     = Assembly.GetExecutingAssembly().GetName().Version.ToString();
            string connectionString = DBConnectionStringProvider.Instance().ProvideDBConnectionString(name);

            dbcon         = new SqlConnection(connectionString);
            pluginThread  = null;
            mustStop      = false;
            dataDependent = true;
            enabled       = true;
        }
Beispiel #6
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private PluginSettings()
 {
     settings           = new INPluginSettings();
     settings.Threads   = 5;
     settings.CleanUrls = true;
     settings.CheckUrls = true;
     settings.PauseBetweenOperations = false;
     settings.PauseDelay             = 0;
     dbProvider = DBConnectionStringProvider.Instance();
     rnd        = new Random();
     appName    = Assembly.GetExecutingAssembly().GetName().Name;
     inputFile  = String.Empty;
     outputFile = String.Empty;
     LoadSettings();
 }
Beispiel #7
0
 /// <summary>
 /// Create a new instance of the <see cref="EbayPlugin"/> class.
 /// </summary>
 public EbayPlugin()
 {
     mutex         = new Mutex();
     settings      = PluginSettings.Instance();
     name          = "CrawlWave.ServerPlugins.Ebay";
     description   = "CrawlWave Ebay Plugin";
     dataDependent = true;
     state         = PluginState.Stopped;
     enabled       = true;
     version       = Assembly.GetExecutingAssembly().GetName().Version.ToString();
     percent       = 0;
     mustStop      = false;
     pluginThread  = null;
     dbProvider    = DBConnectionStringProvider.Instance();
     settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name);
     dbcon       = new SqlConnection(settings.DBConnectionString);
     regUser     = new Regex("<h1>eBay My World:\\s*(?<user>[^<]*)<img", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline);
     regFeedback = new Regex("Feedback score: <b>(?<feedback>\\d+)</b><span class=\"vSep\">\\|</span>Positive feedback: <b>(?<positive>\\d+\\.?\\d)%</b>", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline);
     //eBay My World: c7gman<img
     //Feedback score: <b>285</b><span class="vSep">|</span>Positive feedback: <b>100%</b>
 }
 /// <summary>
 /// Create a new instance of the <see cref="WordExtractorPlugin"/> class.
 /// </summary>
 public WordExtractorPlugin()
 {
     mutex         = new Mutex();
     settings      = PluginSettings.Instance();
     name          = "CrawlWave.ServerPlugins.WordExtraction";
     description   = "CrawlWave Word Extraction Plugin";
     dataDependent = false;
     state         = PluginState.Stopped;
     enabled       = true;
     version       = Assembly.GetExecutingAssembly().GetName().Version.ToString();
     percent       = 0;
     mustStop      = false;
     pluginThread  = null;
     if (settings.UseDatabase)
     {
         dbProvider = DBConnectionStringProvider.Instance();
         settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name);
         dbcon = new SqlConnection(settings.DBConnectionString);
     }
     wordExtractor = WordExtractor.Instance();
     cache         = WordsCache.Instance();
     backoff       = new Backoff(BackoffSpeed.Slow, 30000);
 }
 private void PerformExtraction()
 {
     try
     {
         events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Info, DateTime.Now, "CrawlWave Word Extraction Plugin thread has started with ID 0x" + Thread.CurrentThread.GetHashCode().ToString("x4")));
         //the user may have enabled database just before starting the plugin
         if (settings.UseDatabase)
         {
             if (dbcon == null)
             {
                 dbProvider = DBConnectionStringProvider.Instance();
                 settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name);
                 dbcon = new SqlConnection(settings.DBConnectionString);
             }
         }
         while (!mustStop)
         {
             //Select a page from the database, perform word extraction and store the
             //results back in the database
             int    UrlID       = 0;
             string data        = String.Empty;
             int    waitSeconds = 0;
             SelectUrlForWordExtraction(out UrlID, out data);
             if (UrlID != 0)
             {
                 try
                 {
                     backoff.Reset();
                     SortedList words = wordExtractor.ExtractWords(data);
                     if (words.Count != 0)
                     {
                         //add all the words to the database if they don't exist already
                         string word       = String.Empty;
                         short  word_count = 0;
                         int    word_id    = -1;
                         foreach (DictionaryEntry de in words)
                         {
                             word = (string)de.Key;
                             cache.AddStemmedWord(word);
                         }
                         //remove all the old words related to this url from the database
                         RemoveUrlWords(UrlID);
                         //now add relationships between the url and its words
                         foreach (DictionaryEntry d in words)
                         {
                             word       = (string)d.Key;
                             word_count = (short)((int)d.Value);
                             word_id    = cache[word];
                             AddUrlWord(UrlID, word_id, word_count);
                         }
                     }
                     UpdateUrlDataLastProcess(UrlID);
                 }
                 catch (Exception e)
                 {
                     events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Warning, DateTime.Now, "WordExtractionPlugin failed to extract words from Url with ID " + UrlID.ToString() + ": " + e.ToString()));
                     continue;
                 }
             }
             else
             {
                 waitSeconds = backoff.Next() / 1000;
                 for (int i = 0; i < waitSeconds; i++)
                 {
                     Thread.Sleep(1000);
                     if (mustStop)
                     {
                         break;
                     }
                 }
             }
             Report();
             if (settings.PauseBetweenOperations)
             {
                 waitSeconds = PauseInSeconds();
                 for (int i = 0; i < waitSeconds; i++)
                 {
                     Thread.Sleep(1000);
                     if (mustStop)
                     {
                         break;
                     }
                 }
             }
         }
     }
     catch (ThreadAbortException)
     {
         //The thread was asked to abort, which means it must return at once
         return;
     }
     catch (ThreadInterruptedException)
     {
         //The thread has been asked to Join. We have nothing to do but return.
         return;
     }
     finally
     {
         events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Info, DateTime.Now, Thread.CurrentThread.Name + " has stopped."));
     }
 }