示例#1
0
        /// <summary>
        /// Инциализирует новый экземпляр класса <see cref="MorphologyModel"/>.
        /// </summary>
        /// <param name="nGramm">N-граммная модель.</param>
        /// <param name="entModel">Модель классов неоднозначности.</param>
        /// <param name="folder">Папка для хранения файлов модели.</param>
        /// <param name="lemmaFile">Файл лемматизатора.</param>
        public MorphologyModel(TagNGramm nGramm, IEntropyClassModel entModel,
                               string folder, string lemmaFile)
        {
            this.nGramm   = nGramm;
            this.entClass = entModel;
            this.folder   = folder;
            if (Directory.Exists(folder))
            {
                Directory.Delete(folder, true);
            }
            Directory.CreateDirectory(folder);
            this.lemmaFile = folder + "\\" + lemmaFile;
            File.Copy(lemmaFile, this.lemmaFile);
            FileStream fs = File.OpenRead(this.lemmaFile);

            lemmatizer = new Lemmatizer(fs);
            Initialize();
        }
示例#2
0
 /// <summary>
 /// Трунирует модель классов энтропии.
 /// </summary>
 /// <param name="inputFile">Входной файл словаря.</param>
 /// <param name="reader">Объект для чтения корпуса.</param>
 private void BuildEntropyClassModel(string inputFile, ICorporaReader reader)
 {
     entClass = new DawgEntropyClassModel();
     reader.Open(inputFile);
     serviceTags = new Dictionary <string, Tag>();
     foreach (WordForm lexem in reader.ReadDictionary(long.MaxValue))
     {
         if ((lexem.Tag & (Tag.Conjunction | Tag.Particle | Tag.Preposition)) != 0)
         {
             Tag outTag = Tag.NoWord;
             serviceTags.TryGetValue(lexem.Word.ToLower(), out outTag);
             serviceTags[lexem.Word.ToLower()] = outTag | lexem.Tag;
         }
         entClass.AddLexem(lexem);
     }
     ((DawgEntropyClassModel)entClass).Build();
     reader.Close();
 }
示例#3
0
        /// <summary>
        /// Инциализирует новый экземпляр класса <see cref="MorphologyModel"/>.
        /// </summary>
        /// <param name="si">Информация о сериализации.</param>
        /// <param name="context">Контекст.</param>
        protected MorphologyModel(SerializationInfo si, StreamingContext context)
        {
            this.folder      = si.GetString("folder");
            this.groups      = (List <TagGroup>)si.GetValue("groups", typeof(List <TagGroup>));
            this.punctuation = (List <string>)si.GetValue("punctuation", typeof(List <string>));
            using (FileStream fs1 = File.Open(string.Format("{0}/{1}", folder, "nGramm.mdl"), FileMode.Open))
                using (FileStream fs2 = File.Open(string.Format("{0}/{1}", folder, "entClass.mdl"), FileMode.Open))
                {
                    this.nGramm   = Serializer.Deserialize <TagNGramm>(fs1);
                    this.entClass = new DawgEntropyClassModel();
                    entClass.Load(fs2);
                }
            this.sentenceDelimiters = (List <string>)si.GetValue("delimiters", typeof(List <string>));
            this.lemmaFile          = si.GetString("lemmaFile");
            FileStream fs = File.Open(this.lemmaFile, FileMode.Open);

            this.lemmatizer      = new Lemmatizer(fs);
            this.sentencePattern = (Regex)si.GetValue("sentencePattern", typeof(Regex));
            this.lexemPattern    = (Regex)si.GetValue("lexemPattern", typeof(Regex));
            this.minLengh        = si.GetInt32("minLenght");
            serviceTags          = (Dictionary <string, Tag>)si.GetValue("serviceTags",
                                                                         typeof(Dictionary <string, Tag>));
        }