/// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private WordsCache() { settings = PluginSettings.Instance(); dbProvider = DBConnectionStringProvider.Instance(); settings.DBConnectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerPlugins.WordExtraction"); dbcon = new SqlConnection(settings.DBConnectionString); words = new Hashtable(); stemming = Stemming.Instance(); culture = new CultureInfo("el-GR"); LoadCache(); }
/// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private PluginSettings() { settings = new EbayPluginSettings(); settings.PauseBetweenOperations = false; settings.PauseDelay = 0; settings.UseTransactions = false; settings.DBActionTimeout = 60; dbProvider = DBConnectionStringProvider.Instance(); connectionString = dbProvider.ProvideDBConnectionString(Assembly.GetExecutingAssembly().GetName().Name); LoadSettings(); }
/// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private Globals() { dbProvider = DBConnectionStringProvider.Instance(); appName = "CrawlWave.ServerManager"; appPath = GetAppPath(); loadedForms = new Hashtable(8); foreach (string formName in formNames) { loadedForms.Add(formName, null); } log = new FileEventLogger(appPath + appName + ".log", true, appName); }
/// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private PluginSettings() { settings = new USPluginSettings(); settings.Threshold = 1000; settings.SelectionSize = 10000; settings.SelectionMode = false; settings.PauseBetweenOperations = false; settings.PauseDelay = 0; settings.UseTransactions = false; settings.DBTimeout = 60; dbProvider = DBConnectionStringProvider.Instance(); connectionString = dbProvider.ProvideDBConnectionString(Assembly.GetExecutingAssembly().GetName().Name); LoadSettings(); }
/// <summary> /// Constructs a new instance of the <see cref="PageRankPlugin"/> class. /// </summary> public PageRankPlugin() { description = "CrawlWave PageRank Plugin"; name = "CrawlWave.ServerPlugins.PageRank"; percent = 0; version = Assembly.GetExecutingAssembly().GetName().Version.ToString(); string connectionString = DBConnectionStringProvider.Instance().ProvideDBConnectionString(name); dbcon = new SqlConnection(connectionString); pluginThread = null; mustStop = false; dataDependent = true; enabled = true; }
/// <summary> /// The constructor is private so that only the class itself can create an instance. /// </summary> private PluginSettings() { settings = new INPluginSettings(); settings.Threads = 5; settings.CleanUrls = true; settings.CheckUrls = true; settings.PauseBetweenOperations = false; settings.PauseDelay = 0; dbProvider = DBConnectionStringProvider.Instance(); rnd = new Random(); appName = Assembly.GetExecutingAssembly().GetName().Name; inputFile = String.Empty; outputFile = String.Empty; LoadSettings(); }
/// <summary> /// Create a new instance of the <see cref="EbayPlugin"/> class. /// </summary> public EbayPlugin() { mutex = new Mutex(); settings = PluginSettings.Instance(); name = "CrawlWave.ServerPlugins.Ebay"; description = "CrawlWave Ebay Plugin"; dataDependent = true; state = PluginState.Stopped; enabled = true; version = Assembly.GetExecutingAssembly().GetName().Version.ToString(); percent = 0; mustStop = false; pluginThread = null; dbProvider = DBConnectionStringProvider.Instance(); settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name); dbcon = new SqlConnection(settings.DBConnectionString); regUser = new Regex("<h1>eBay My World:\\s*(?<user>[^<]*)<img", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline); regFeedback = new Regex("Feedback score: <b>(?<feedback>\\d+)</b><span class=\"vSep\">\\|</span>Positive feedback: <b>(?<positive>\\d+\\.?\\d)%</b>", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline); //eBay My World: c7gman<img //Feedback score: <b>285</b><span class="vSep">|</span>Positive feedback: <b>100%</b> }
/// <summary> /// Create a new instance of the <see cref="WordExtractorPlugin"/> class. /// </summary> public WordExtractorPlugin() { mutex = new Mutex(); settings = PluginSettings.Instance(); name = "CrawlWave.ServerPlugins.WordExtraction"; description = "CrawlWave Word Extraction Plugin"; dataDependent = false; state = PluginState.Stopped; enabled = true; version = Assembly.GetExecutingAssembly().GetName().Version.ToString(); percent = 0; mustStop = false; pluginThread = null; if (settings.UseDatabase) { dbProvider = DBConnectionStringProvider.Instance(); settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name); dbcon = new SqlConnection(settings.DBConnectionString); } wordExtractor = WordExtractor.Instance(); cache = WordsCache.Instance(); backoff = new Backoff(BackoffSpeed.Slow, 30000); }
private void PerformExtraction() { try { events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Info, DateTime.Now, "CrawlWave Word Extraction Plugin thread has started with ID 0x" + Thread.CurrentThread.GetHashCode().ToString("x4"))); //the user may have enabled database just before starting the plugin if (settings.UseDatabase) { if (dbcon == null) { dbProvider = DBConnectionStringProvider.Instance(); settings.DBConnectionString = dbProvider.ProvideDBConnectionString(name); dbcon = new SqlConnection(settings.DBConnectionString); } } while (!mustStop) { //Select a page from the database, perform word extraction and store the //results back in the database int UrlID = 0; string data = String.Empty; int waitSeconds = 0; SelectUrlForWordExtraction(out UrlID, out data); if (UrlID != 0) { try { backoff.Reset(); SortedList words = wordExtractor.ExtractWords(data); if (words.Count != 0) { //add all the words to the database if they don't exist already string word = String.Empty; short word_count = 0; int word_id = -1; foreach (DictionaryEntry de in words) { word = (string)de.Key; cache.AddStemmedWord(word); } //remove all the old words related to this url from the database RemoveUrlWords(UrlID); //now add relationships between the url and its words foreach (DictionaryEntry d in words) { word = (string)d.Key; word_count = (short)((int)d.Value); word_id = cache[word]; AddUrlWord(UrlID, word_id, word_count); } } UpdateUrlDataLastProcess(UrlID); } catch (Exception e) { events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Warning, DateTime.Now, "WordExtractionPlugin failed to extract words from Url with ID " + UrlID.ToString() + ": " + e.ToString())); continue; } } else { waitSeconds = backoff.Next() / 1000; for (int i = 0; i < waitSeconds; i++) { Thread.Sleep(1000); if (mustStop) { break; } } } Report(); if (settings.PauseBetweenOperations) { waitSeconds = PauseInSeconds(); for (int i = 0; i < waitSeconds; i++) { Thread.Sleep(1000); if (mustStop) { break; } } } } } catch (ThreadAbortException) { //The thread was asked to abort, which means it must return at once return; } catch (ThreadInterruptedException) { //The thread has been asked to Join. We have nothing to do but return. return; } finally { events.Enqueue(new EventLoggerEntry(CWLoggerEntryType.Info, DateTime.Now, Thread.CurrentThread.Name + " has stopped.")); } }