public FormMain() { InitializeComponent(); //_tc = TextClassificator.CreateLearnedInstance(); System.Threading.Thread.CurrentThread.CurrentCulture = new CultureInfo("ru-RU"); _tc = TextClassificator.LearnedInstance; TextClassificator.GetLoadInstance("_ri"); TextClassificator.GetLoadInstance("_le"); TextClassificator.GetLoadInstance("_ei"); TextClassificator.GetLoadInstance("_si"); }
public static TextClassificator CreateLearnedInstance(string themeGroupName) { if (_learnedInstances == null) _learnedInstances = new Dictionary<string, TextClassificator>(); if (!_learnedInstances.ContainsKey(themeGroupName)) { var separators = new List<string>(); var fs = (string)Resource.ResourceManager.GetObject(TextClassificator.SeparatorsFileName); separators = fs.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList(); var stopWords = new List<string>(); fs = (string)Resource.ResourceManager.GetObject(TextClassificator.StopWordsFileName); stopWords = fs.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList(); var instance = new TextClassificator(separators, stopWords); var themes = new Dictionary<string, string>(); instance.Learn(themeGroupName, themes); _learnedInstances.Add(themeGroupName, instance); } return _learnedInstances[themeGroupName]; }
private static TextClassificator DefaultLoad(string groupPostfix) { var separators = new List<string>(); var fs = (string)Resource.ResourceManager.GetObject(TextClassificator.SeparatorsFileName); separators = fs.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList(); var stopWords = new List<string>(); fs = (string)Resource.ResourceManager.GetObject(TextClassificator.StopWordsFileName); stopWords = fs.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList(); var instance = new TextClassificator(separators, stopWords); var files = new List<System.Collections.DictionaryEntry>(); var findString = groupPostfix; foreach (System.Collections.DictionaryEntry file in Resource.ResourceManager.GetResourceSet(new System.Globalization.CultureInfo("ru-RU"), true, true)) if (file.Key.ToString().Length >= findString.Length && file.Key.ToString().Substring(file.Key.ToString().Length - findString.Length, findString.Length) == findString) files.Add(file); var filesPath = files.ToDictionary(f => f.Key.ToString().Substring(0, f.Key.ToString().IndexOf(groupPostfix)), f => Resource.ResourceManager.GetObject(f.Key.ToString()).ToString()); instance.ImportFromXmlsString(filesPath); return instance; }
/// <summary>создать обученный экземпляр классификатора (все значения берутся по умолчанию) /// </summary> /// <returns></returns> public static TextClassificator CreateLearnedInstance() { var separators = new List<string>(); var fs = (string)Resource.ResourceManager.GetObject(TextClassificator.SeparatorsFileName); separators = fs.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList(); var stopWords = new List<string>(); fs = (string)Resource.ResourceManager.GetObject(TextClassificator.StopWordsFileName); stopWords = fs.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList(); var instance = new TextClassificator(separators, stopWords); var themes = new Dictionary<string, string>(); foreach (System.Collections.DictionaryEntry file in Resource.ResourceManager.GetResourceSet(new System.Globalization.CultureInfo("ru-RU"), true, true)) { if (file.Key.ToString()[0] == '_') { var text = file.Value.ToString(); text = text.ToLower(); text = DeleteWords(text, separators, false); text = DeleteWords(text, stopWords); text = DeleteDoubleSpaceAndBadSymbols(text); themes.Add(file.Key.ToString().TrimStart('_'), text); } } using (var db = new TextClassificatorEntities()) { instance.Learn(db.ThemeGroup.Single(tg => tg.ThemeGroup_id == 1).ThemeGroup_name, themes); } return instance; }