Exemple #1
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private RobotsCache()
 {
     dbProvider = DBConnectionStringProvider.Instance();
     connectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerCommon.RobotsCache");
     dbcon = new SqlConnection(connectionString);
     hosts = new Dictionary<byte[], RobotsTxtEntry>();
     LoadCache();
 }
Exemple #2
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private BannedHostsCache()
 {
     dbProvider       = DBConnectionStringProvider.Instance();
     connectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerCommon.BannedHostsCache");
     dbcon            = new SqlConnection(connectionString);
     hosts            = new Dictionary <byte[], string>();
     LoadCache();
 }
Exemple #3
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private WordsCache()
 {
     settings = PluginSettings.Instance();
     dbProvider = DBConnectionStringProvider.Instance();
     settings.DBConnectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerPlugins.WordExtraction");
     dbcon = new SqlConnection(settings.DBConnectionString);
     words = new Hashtable();
     stemming = Stemming.Instance();
     culture = new CultureInfo("el-GR");
     LoadCache();
 }
Exemple #4
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private PluginSettings()
 {
     settings = new EbayPluginSettings();
     settings.PauseBetweenOperations = false;
     settings.PauseDelay = 0;
     settings.UseTransactions = false;
     settings.DBActionTimeout = 60;
     dbProvider = DBConnectionStringProvider.Instance();
     connectionString = dbProvider.ProvideDBConnectionString(Assembly.GetExecutingAssembly().GetName().Name);
     LoadSettings();
 }
Exemple #5
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private PluginSettings()
 {
     settings = new USPluginSettings();
     settings.Threshold = 1000;
     settings.SelectionSize = 10000;
     settings.SelectionMode = false;
     settings.PauseBetweenOperations = false;
     settings.PauseDelay = 0;
     settings.UseTransactions = false;
     settings.DBTimeout = 60;
     dbProvider = DBConnectionStringProvider.Instance();
     connectionString = dbProvider.ProvideDBConnectionString(Assembly.GetExecutingAssembly().GetName().Name);
     LoadSettings();
 }
Exemple #6
0
 /// <summary>
 /// Create a new instance of the <see cref="EbayPlugin"/> class.
 /// </summary>
 public EbayPlugin()
 {
     mutex = new Mutex();
     settings = PluginSettings.Instance();
     name = "CrawlWave.ServerPlugins.Ebay";
     description = "CrawlWave Ebay Plugin";
     dataDependent = true;
     state = PluginState.Stopped;
     enabled = true;
     version = Assembly.GetExecutingAssembly().GetName().Version.ToString();
     percent = 0;
     mustStop = false;
     pluginThread = null;
     dbProvider = DBConnectionStringProvider.Instance();
     settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name);
     dbcon = new SqlConnection(settings.DBConnectionString);
     regUser = new Regex("<h1>eBay My World:\\s*(?<user>[^<]*)<img", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline);
     regFeedback = new Regex("Feedback score: <b>(?<feedback>\\d+)</b><span class=\"vSep\">\\|</span>Positive feedback: <b>(?<positive>\\d+\\.?\\d)%</b>", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline);
     //eBay My World: c7gman<img
     //Feedback score: <b>285</b><span class="vSep">|</span>Positive feedback: <b>100%</b>
 }
 /// <summary>
 /// Create a new instance of the <see cref="WordExtractorPlugin"/> class.
 /// </summary>
 public WordExtractorPlugin()
 {
     mutex = new Mutex();
     settings = PluginSettings.Instance();
     name = "CrawlWave.ServerPlugins.WordExtraction";
     description = "CrawlWave Word Extraction Plugin";
     dataDependent = false;
     state = PluginState.Stopped;
     enabled = true;
     version = Assembly.GetExecutingAssembly().GetName().Version.ToString();
     percent = 0;
     mustStop = false;
     pluginThread = null;
     if(settings.UseDatabase)
     {
         dbProvider = DBConnectionStringProvider.Instance();
         settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name);
         dbcon = new SqlConnection(settings.DBConnectionString);
     }
     wordExtractor = WordExtractor.Instance();
     cache = WordsCache.Instance();
     backoff = new Backoff(BackoffSpeed.Slow, 30000);
 }
 private void PerformExtraction()
 {
     try
     {
         events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Info, DateTime.Now, "CrawlWave Word Extraction Plugin thread has started with ID 0x" + Thread.CurrentThread.GetHashCode().ToString("x4")));
         //the user may have enabled database just before starting the plugin
         if(settings.UseDatabase)
         {
             if(dbcon == null)
             {
                 dbProvider = DBConnectionStringProvider.Instance();
                 settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name);
                 dbcon = new SqlConnection(settings.DBConnectionString);
             }
         }
         while(!mustStop)
         {
             //Select a page from the database, perform word extraction and store the
             //results back in the database
             int UrlID = 0;
             string data = String.Empty;
             int waitSeconds = 0;
             SelectUrlForWordExtraction(out UrlID, out data);
             if(UrlID!=0)
             {
                 try
                 {
                     backoff.Reset();
                     SortedList words = wordExtractor.ExtractWords(data);
                     if(words.Count != 0)
                     {
                         //add all the words to the database if they don't exist already
                         string word = String.Empty;
                         short word_count = 0;
                         int word_id = -1;
                         foreach(DictionaryEntry de in words)
                         {
                             word = (string)de.Key;
                             cache.AddStemmedWord(word);
                         }
                         //remove all the old words related to this url from the database
                         RemoveUrlWords(UrlID);
                         //now add relationships between the url and its words
                         foreach(DictionaryEntry d in words)
                         {
                             word = (string)d.Key;
                             word_count = (short)((int)d.Value);
                             word_id = cache[word];
                             AddUrlWord(UrlID, word_id, word_count);
                         }
                     }
                     UpdateUrlDataLastProcess(UrlID);
                 }
                 catch(Exception e)
                 {
                     events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Warning, DateTime.Now, "WordExtractionPlugin failed to extract words from Url with ID " + UrlID.ToString() + ": " + e.ToString()));
                     continue;
                 }
             }
             else
             {
                 waitSeconds = backoff.Next()/1000;
                 for(int i = 0; i < waitSeconds; i++)
                 {
                     Thread.Sleep(1000);
                     if(mustStop)
                     {
                         break;
                     }
                 }
             }
             Report();
             if(settings.PauseBetweenOperations)
             {
                 waitSeconds = PauseInSeconds();
                 for(int i = 0; i < waitSeconds; i++)
                 {
                     Thread.Sleep(1000);
                     if(mustStop)
                     {
                         break;
                     }
                 }
             }
         }
     }
     catch(ThreadAbortException)
     {
         //The thread was asked to abort, which means it must return at once
         return;
     }
     catch(ThreadInterruptedException)
     {
         //The thread has been asked to Join. We have nothing to do but return.
         return;
     }
     finally
     {
         events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Info, DateTime.Now, Thread.CurrentThread.Name + " has stopped."));
     }
 }
Exemple #9
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private UrlDataProvider()
 {
     dbProvider       = DBConnectionStringProvider.Instance();
     connectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerCommon.UrlDataProvider");
     dbcon            = new SqlConnection(connectionString);
 }
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private UrlDataProvider()
 {
     dbProvider = DBConnectionStringProvider.Instance();
     connectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerCommon.UrlDataProvider");
     dbcon = new SqlConnection(connectionString);
 }