Example #1
0
 /// <summary>
 /// The constructor is private so that only the class itself can create an instance.
 /// </summary>
 private WordsCache()
 {
     settings   = PluginSettings.Instance();
     dbProvider = DBConnectionStringProvider.Instance();
     settings.DBConnectionString = dbProvider.ProvideDBConnectionString("CrawlWave.ServerPlugins.WordExtraction");
     dbcon    = new SqlConnection(settings.DBConnectionString);
     words    = new Hashtable();
     stemming = Stemming.Instance();
     culture  = new CultureInfo("el-GR");
     LoadCache();
 }
Example #2
0
 /// <summary>
 /// The private constructor of the WordExtractor class
 /// </summary>
 private WordExtractor()
 {
     mutex              = new Mutex();
     settings           = PluginSettings.Instance();
     regStripTags       = new Regex("<[^>]*>", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled);                                      //<[^>]+> or   >(?:(?<t>[^<]*))
     regStripScripts    = new Regex(@"(?i)<script([^>])*>(\w|\W)*</script([^>])*>", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled); //@"(?i)<script([^>])*>(\w|\W)*</script([^>])*>" or @"<script[^>]*>(\w|\W)*?</script[^>]*>"
     regTitleTags       = new Regex("<\\s*title[^>]*>[^<]*<\\s*/title\\s*>", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
     regKeywordTags     = new Regex("<meta\\s*name\\s*=\\s*\"keywords\"\\s*content\\s*=\\s*\"[^>]*\">", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
     regDescriptionTags = new Regex("<meta\\s*name\\s*=\\s*\"description\"\\s*content\\s*=\\s*\"[^>]*\">", RegexOptions.CultureInvariant | RegexOptions.Multiline | RegexOptions.IgnoreCase | RegexOptions.Compiled);
     stemming           = Stemming.Instance();
     slStopWordsEnglish = new SortedList(arStopWordsEnglish.Length);
     slStopWordsGreek   = new SortedList(arStopWordsGreek.Length);
     culture            = new CultureInfo("el-GR");
     InitializeWordLists();
 }