/// <summary> /// Инциализирует новый экземпляр класса <see cref="MorphologyModel"/>. /// </summary> /// <param name="nGramm">N-граммная модель.</param> /// <param name="entModel">Модель классов неоднозначности.</param> /// <param name="folder">Папка для хранения файлов модели.</param> /// <param name="lemmaFile">Файл лемматизатора.</param> public MorphologyModel(TagNGramm nGramm, IEntropyClassModel entModel, string folder, string lemmaFile) { this.nGramm = nGramm; this.entClass = entModel; this.folder = folder; if (Directory.Exists(folder)) { Directory.Delete(folder, true); } Directory.CreateDirectory(folder); this.lemmaFile = folder + "\\" + lemmaFile; File.Copy(lemmaFile, this.lemmaFile); FileStream fs = File.OpenRead(this.lemmaFile); lemmatizer = new Lemmatizer(fs); Initialize(); }
/// <summary> /// Трунирует модель классов энтропии. /// </summary> /// <param name="inputFile">Входной файл словаря.</param> /// <param name="reader">Объект для чтения корпуса.</param> private void BuildEntropyClassModel(string inputFile, ICorporaReader reader) { entClass = new DawgEntropyClassModel(); reader.Open(inputFile); serviceTags = new Dictionary <string, Tag>(); foreach (WordForm lexem in reader.ReadDictionary(long.MaxValue)) { if ((lexem.Tag & (Tag.Conjunction | Tag.Particle | Tag.Preposition)) != 0) { Tag outTag = Tag.NoWord; serviceTags.TryGetValue(lexem.Word.ToLower(), out outTag); serviceTags[lexem.Word.ToLower()] = outTag | lexem.Tag; } entClass.AddLexem(lexem); } ((DawgEntropyClassModel)entClass).Build(); reader.Close(); }
/// <summary> /// Инциализирует новый экземпляр класса <see cref="MorphologyModel"/>. /// </summary> /// <param name="si">Информация о сериализации.</param> /// <param name="context">Контекст.</param> protected MorphologyModel(SerializationInfo si, StreamingContext context) { this.folder = si.GetString("folder"); this.groups = (List <TagGroup>)si.GetValue("groups", typeof(List <TagGroup>)); this.punctuation = (List <string>)si.GetValue("punctuation", typeof(List <string>)); using (FileStream fs1 = File.Open(string.Format("{0}/{1}", folder, "nGramm.mdl"), FileMode.Open)) using (FileStream fs2 = File.Open(string.Format("{0}/{1}", folder, "entClass.mdl"), FileMode.Open)) { this.nGramm = Serializer.Deserialize <TagNGramm>(fs1); this.entClass = new DawgEntropyClassModel(); entClass.Load(fs2); } this.sentenceDelimiters = (List <string>)si.GetValue("delimiters", typeof(List <string>)); this.lemmaFile = si.GetString("lemmaFile"); FileStream fs = File.Open(this.lemmaFile, FileMode.Open); this.lemmatizer = new Lemmatizer(fs); this.sentencePattern = (Regex)si.GetValue("sentencePattern", typeof(Regex)); this.lexemPattern = (Regex)si.GetValue("lexemPattern", typeof(Regex)); this.minLengh = si.GetInt32("minLenght"); serviceTags = (Dictionary <string, Tag>)si.GetValue("serviceTags", typeof(Dictionary <string, Tag>)); }