Exemple #1
0
        public FormMain()
        {
            InitializeComponent();
            //_tc = TextClassificator.CreateLearnedInstance();
            System.Threading.Thread.CurrentThread.CurrentCulture = new CultureInfo("ru-RU");
            _tc = TextClassificator.LearnedInstance;

            TextClassificator.GetLoadInstance("_ri");
            TextClassificator.GetLoadInstance("_le");
            TextClassificator.GetLoadInstance("_ei");
            TextClassificator.GetLoadInstance("_si");
        }
        public static TextClassificator CreateLearnedInstance(string themeGroupName)
        {
            if (_learnedInstances == null)
                _learnedInstances = new Dictionary<string, TextClassificator>();
            if (!_learnedInstances.ContainsKey(themeGroupName))
            {
                var separators = new List<string>();
                var fs = (string)Resource.ResourceManager.GetObject(TextClassificator.SeparatorsFileName);
                separators = fs.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList();

                var stopWords = new List<string>();
                fs = (string)Resource.ResourceManager.GetObject(TextClassificator.StopWordsFileName);
                stopWords = fs.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList();

                var instance = new TextClassificator(separators, stopWords);
                var themes = new Dictionary<string, string>();

                instance.Learn(themeGroupName, themes);
                _learnedInstances.Add(themeGroupName, instance);
            }

            return _learnedInstances[themeGroupName];
        }
        private static TextClassificator DefaultLoad(string groupPostfix)
        {
            var separators = new List<string>();
            var fs = (string)Resource.ResourceManager.GetObject(TextClassificator.SeparatorsFileName);
            separators = fs.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList();

            var stopWords = new List<string>();
            fs = (string)Resource.ResourceManager.GetObject(TextClassificator.StopWordsFileName);
            stopWords = fs.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList();

            var instance = new TextClassificator(separators, stopWords);
            var files = new List<System.Collections.DictionaryEntry>();
            var findString = groupPostfix;
            foreach (System.Collections.DictionaryEntry file in Resource.ResourceManager.GetResourceSet(new System.Globalization.CultureInfo("ru-RU"), true, true))
                if (file.Key.ToString().Length >= findString.Length && file.Key.ToString().Substring(file.Key.ToString().Length - findString.Length, findString.Length) == findString)
                    files.Add(file);
            var filesPath = files.ToDictionary(f => f.Key.ToString().Substring(0, f.Key.ToString().IndexOf(groupPostfix)), f => Resource.ResourceManager.GetObject(f.Key.ToString()).ToString());
            instance.ImportFromXmlsString(filesPath);

            return instance;
        }
        /// <summary>создать обученный экземпляр классификатора (все значения берутся по умолчанию)
        /// </summary>
        /// <returns></returns>
        public static TextClassificator CreateLearnedInstance()
        {
            var separators = new List<string>();
            var fs = (string)Resource.ResourceManager.GetObject(TextClassificator.SeparatorsFileName);
            separators = fs.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList();

            var stopWords = new List<string>();
            fs = (string)Resource.ResourceManager.GetObject(TextClassificator.StopWordsFileName);
            stopWords = fs.Split(new[] { '\n' }, StringSplitOptions.RemoveEmptyEntries).ToList();

            var instance = new TextClassificator(separators, stopWords);
            var themes = new Dictionary<string, string>();
            foreach (System.Collections.DictionaryEntry file in Resource.ResourceManager.GetResourceSet(new System.Globalization.CultureInfo("ru-RU"), true, true))
            {
                if (file.Key.ToString()[0] == '_')
                {
                    var text = file.Value.ToString();
                    text = text.ToLower();
                    text = DeleteWords(text, separators, false);
                    text = DeleteWords(text, stopWords);
                    text = DeleteDoubleSpaceAndBadSymbols(text);
                    themes.Add(file.Key.ToString().TrimStart('_'), text);
                }
            }
            using (var db = new TextClassificatorEntities())
            {
                instance.Learn(db.ThemeGroup.Single(tg => tg.ThemeGroup_id == 1).ThemeGroup_name, themes);
            }

            return instance;
        }