Exemplo n.º 1
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private RobotsCache()
 {
     dbProvider = DBConnectionStringProvider.Instance();
     connectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerCommon.RobotsCache");
     dbcon = new SqlConnection(connectionString);
     hosts = new Dictionary<byte[], RobotsTxtEntry>();
     LoadCache();
 }
Exemplo n.º 2
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private BannedHostsCache()
 {
     dbProvider       = DBConnectionStringProvider.Instance();
     connectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerCommon.BannedHostsCache");
     dbcon            = new SqlConnection(connectionString);
     hosts            = new Dictionary <byte[], string>();
     LoadCache();
 }
Exemplo n.º 3
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private WordsCache()
 {
     settings = PluginSettings.Instance();
     dbProvider = DBConnectionStringProvider.Instance();
     settings.DBConnectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerPlugins.WordExtraction");
     dbcon = new SqlConnection(settings.DBConnectionString);
     words = new Hashtable();
     stemming = Stemming.Instance();
     culture = new CultureInfo("el-GR");
     LoadCache();
 }
Exemplo n.º 4
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private PluginSettings()
 {
     settings = new EbayPluginSettings();
     settings.PauseBetweenOperations = false;
     settings.PauseDelay = 0;
     settings.UseTransactions = false;
     settings.DBActionTimeout = 60;
     dbProvider = DBConnectionStringProvider.Instance();
     connectionString = dbProvider.ProvideDBConnectionString(Assembly.GetExecutingAssembly().GetName().Name);
     LoadSettings();
 }
Exemplo n.º 5
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private Globals()
 {
     dbProvider = DBConnectionStringProvider.Instance();
     appName = "CrawlWave.ServerManager";
     appPath = GetAppPath();
     loadedForms = new Hashtable(8);
     foreach(string formName in formNames)
     {
         loadedForms.Add(formName, null);
     }
     log = new FileEventLogger(appPath + appName + ".log", true, appName);
 }
Exemplo n.º 6
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private PluginSettings()
 {
     settings = new USPluginSettings();
     settings.Threshold = 1000;
     settings.SelectionSize = 10000;
     settings.SelectionMode = false;
     settings.PauseBetweenOperations = false;
     settings.PauseDelay = 0;
     settings.UseTransactions = false;
     settings.DBTimeout = 60;
     dbProvider = DBConnectionStringProvider.Instance();
     connectionString = dbProvider.ProvideDBConnectionString(Assembly.GetExecutingAssembly().GetName().Name);
     LoadSettings();
 }
 /// <summary>
 /// Provides a global access point for the single instance of the <see cref="DBConnectionStringProvider"/>
 /// class.
 /// </summary>
 /// <returns>A reference to the single instance of <see cref="DBConnectionStringProvider"/>.</returns>
 public static DBConnectionStringProvider Instance()
 {
     if (instance==null)
     {
         //Make sure the call is thread-safe.
         Mutex mutex=new Mutex();
         mutex.WaitOne();
         if( instance == null )
         {
             instance = new DBConnectionStringProvider();
         }
         mutex.Close();
     }
     return instance;
 }
Exemplo n.º 8
0
 /// <summary>
 /// Provides a global access point for the single instance of the <see cref="DBConnectionStringProvider"/>
 /// class.
 /// </summary>
 /// <returns>A reference to the single instance of <see cref="DBConnectionStringProvider"/>.</returns>
 public static DBConnectionStringProvider Instance()
 {
     if (instance == null)
     {
         //Make sure the call is thread-safe.
         Mutex mutex = new Mutex();
         mutex.WaitOne();
         if (instance == null)
         {
             instance = new DBConnectionStringProvider();
         }
         mutex.Close();
     }
     return(instance);
 }
Exemplo n.º 9
0
 /// <summary>
 /// Create a new instance of the <see cref="EbayPlugin"/> class.
 /// </summary>
 public EbayPlugin()
 {
     mutex = new Mutex();
     settings = PluginSettings.Instance();
     name = "CrawlWave.ServerPlugins.Ebay";
     description = "CrawlWave Ebay Plugin";
     dataDependent = true;
     state = PluginState.Stopped;
     enabled = true;
     version = Assembly.GetExecutingAssembly().GetName().Version.ToString();
     percent = 0;
     mustStop = false;
     pluginThread = null;
     dbProvider = DBConnectionStringProvider.Instance();
     settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name);
     dbcon = new SqlConnection(settings.DBConnectionString);
     regUser = new Regex("<h1>eBay My World:\\s*(?<user>[^<]*)<img", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline);
     regFeedback = new Regex("Feedback score: <b>(?<feedback>\\d+)</b><span class=\"vSep\">\\|</span>Positive feedback: <b>(?<positive>\\d+\\.?\\d)%</b>", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline);
     //eBay My World: c7gman<img
     //Feedback score: <b>285</b><span class="vSep">|</span>Positive feedback: <b>100%</b>
 }
Exemplo n.º 10
0
 /// <summary>
 /// Create a new instance of the <see cref="WordExtractorPlugin"/> class.
 /// </summary>
 public WordExtractorPlugin()
 {
     mutex = new Mutex();
     settings = PluginSettings.Instance();
     name = "CrawlWave.ServerPlugins.WordExtraction";
     description = "CrawlWave Word Extraction Plugin";
     dataDependent = false;
     state = PluginState.Stopped;
     enabled = true;
     version = Assembly.GetExecutingAssembly().GetName().Version.ToString();
     percent = 0;
     mustStop = false;
     pluginThread = null;
     if(settings.UseDatabase)
     {
         dbProvider = DBConnectionStringProvider.Instance();
         settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name);
         dbcon = new SqlConnection(settings.DBConnectionString);
     }
     wordExtractor = WordExtractor.Instance();
     cache = WordsCache.Instance();
     backoff = new Backoff(BackoffSpeed.Slow, 30000);
 }
Exemplo n.º 11
0
 private void PerformExtraction()
 {
     try
     {
         events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Info, DateTime.Now, "CrawlWave Word Extraction Plugin thread has started with ID 0x" + Thread.CurrentThread.GetHashCode().ToString("x4")));
         //the user may have enabled database just before starting the plugin
         if(settings.UseDatabase)
         {
             if(dbcon == null)
             {
                 dbProvider = DBConnectionStringProvider.Instance();
                 settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name);
                 dbcon = new SqlConnection(settings.DBConnectionString);
             }
         }
         while(!mustStop)
         {
             //Select a page from the database, perform word extraction and store the
             //results back in the database
             int UrlID = 0;
             string data = String.Empty;
             int waitSeconds = 0;
             SelectUrlForWordExtraction(out UrlID, out data);
             if(UrlID!=0)
             {
                 try
                 {
                     backoff.Reset();
                     SortedList words = wordExtractor.ExtractWords(data);
                     if(words.Count != 0)
                     {
                         //add all the words to the database if they don't exist already
                         string word = String.Empty;
                         short word_count = 0;
                         int word_id = -1;
                         foreach(DictionaryEntry de in words)
                         {
                             word = (string)de.Key;
                             cache.AddStemmedWord(word);
                         }
                         //remove all the old words related to this url from the database
                         RemoveUrlWords(UrlID);
                         //now add relationships between the url and its words
                         foreach(DictionaryEntry d in words)
                         {
                             word = (string)d.Key;
                             word_count = (short)((int)d.Value);
                             word_id = cache[word];
                             AddUrlWord(UrlID, word_id, word_count);
                         }
                     }
                     UpdateUrlDataLastProcess(UrlID);
                 }
                 catch(Exception e)
                 {
                     events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Warning, DateTime.Now, "WordExtractionPlugin failed to extract words from Url with ID " + UrlID.ToString() + ": " + e.ToString()));
                     continue;
                 }
             }
             else
             {
                 waitSeconds = backoff.Next()/1000;
                 for(int i = 0; i < waitSeconds; i++)
                 {
                     Thread.Sleep(1000);
                     if(mustStop)
                     {
                         break;
                     }
                 }
             }
             Report();
             if(settings.PauseBetweenOperations)
             {
                 waitSeconds = PauseInSeconds();
                 for(int i = 0; i < waitSeconds; i++)
                 {
                     Thread.Sleep(1000);
                     if(mustStop)
                     {
                         break;
                     }
                 }
             }
         }
     }
     catch(ThreadAbortException)
     {
         //The thread was asked to abort, which means it must return at once
         return;
     }
     catch(ThreadInterruptedException)
     {
         //The thread has been asked to Join. We have nothing to do but return.
         return;
     }
     finally
     {
         events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Info, DateTime.Now, Thread.CurrentThread.Name + " has stopped."));
     }
 }
Exemplo n.º 12
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private PluginSettings()
 {
     settings = new INPluginSettings();
     settings.Threads = 5;
     settings.CleanUrls = true;
     settings.CheckUrls = true;
     settings.PauseBetweenOperations = false;
     settings.PauseDelay = 0;
     dbProvider = DBConnectionStringProvider.Instance();
     rnd = new Random();
     appName = Assembly.GetExecutingAssembly().GetName().Name;
     inputFile = String.Empty;
     outputFile = String.Empty;
     LoadSettings();
 }
Exemplo n.º 13
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private UrlDataProvider()
 {
     dbProvider       = DBConnectionStringProvider.Instance();
     connectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerCommon.UrlDataProvider");
     dbcon            = new SqlConnection(connectionString);
 }
Exemplo n.º 14
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private UrlDataProvider()
 {
     dbProvider = DBConnectionStringProvider.Instance();
     connectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerCommon.UrlDataProvider");
     dbcon = new SqlConnection(connectionString);
 }