/// создание морфотипа из строки private MorphoTypePair?CreateMorphoTypePair(string line) { var m = MORPHOTYPE_PREFIX_REGEXP.Match(line); if (m == null || m.Groups.Count < 3) { return(null); } string prefix = m.Groups[1].Value; string pos = m.Groups[2].Value; string name = line.Substring(prefix.Length); var partOfSpeech = default(PartOfSpeechEnum); if (Enum.TryParse(pos, true, out partOfSpeech)) { var morphoType = new MorphoType(_PartOfSpeechList.GetPartOfSpeech(partOfSpeech)); var morphoTypePair = new MorphoTypePair() { Name = name, MorphoType = morphoType, }; return(morphoTypePair); } else { _ModelLoadingErrorCallback("Unknown part-of-speech: '" + pos + '\'', line); } return(null); }
/// добавление слова и всех его форм в словарь /// word - слово /// pMorphoType - морфотип /// nounType - тип сущетсвительного unsafe public void AddWord(string word, MorphoType morphoType, MorphoAttributePair?nounType) { if (morphoType.MorphoForms.Length != 0) { var len = word.Length - morphoType.MorphoForms[0].Ending.Length; var _base = (0 <= len) ? word.Substring(0, len) : word; var baseMorphoForm = new BaseMorphoForm(_base, morphoType, nounType); var _baseUpper = StringsHelper.ToUpperInvariant(_base); fixed(char *baseUpper_ptr = _baseUpper) { AddWordPart(baseUpper_ptr, baseMorphoForm); } } }
/// создание морфоформы из строки private MorphoForm CreateMorphoForm(MorphoType morphoType, string line, List <MorphoAttributePair> morphoAttributePairs) { int index = line.IndexOf(':'); if (index < 0) { throw (new MorphoFormatException()); } var ending = StringsHelper.ToLowerInvariant(line.Substring(0, index).Trim()); if (ending == EMPTY_ENDING) { ending = string.Empty; } morphoAttributePairs.Clear(); var attributes = line.Substring(index + 1).Split(MORPHO_ATTRIBUTE_SEPARATOR, StringSplitOptions.RemoveEmptyEntries); foreach (var attribute in attributes) { var attr = attribute.Trim(); if (!string.IsNullOrEmpty(attr)) { if (Enum.TryParse(attr, true, out MorphoAttributeEnum morphoAttribute)) { var map = _MorphoAttributeList.TryGetMorphoAttributePair(morphoType.MorphoAttributeGroup, morphoAttribute); if (map.HasValue) { morphoAttributePairs.Add(map.Value); } #if DEBUG //TOO MANY ERRORS AFTER last (2016.12.28) getting morpho-dcitionaries from 'LangAnalyzerStd-[ilook]' else { _ModelLoadingErrorCallback("Error in morpho-attribute: '" + attr + '\'', line); } #endif } else { _ModelLoadingErrorCallback("Unknown morpho-attribute: '" + attr + '\'', line); } } } var morphoForm = new MorphoForm(ending, morphoAttributePairs); return(morphoForm); }
/// чтение файла со словами /// path - полный путь к файлу /// nounType - тип существительного private void ReadWords(string filename, MorphoAttributeEnum nounType) { var lines = ReadFile(filename); foreach (var line in lines) { var array = line.Split(WORDS_DICTIONARY_SEPARATOR, StringSplitOptions.RemoveEmptyEntries); if (array.Length != 3) { _ModelLoadingErrorCallback("Wrong line format", line); continue; } MorphoType morphoType = GetMorphoTypeByName(array[1]); if (morphoType == null) { _ModelLoadingErrorCallback("Unknown morpho-type", line); } else if (array[2] != _PartOfSpeechStringDictionary[morphoType.PartOfSpeech]) { _ModelLoadingErrorCallback("Wrong part-of-speech", line); } else { var word = array[0]; var _nounType = default(MorphoAttributePair?); if ((morphoType.MorphoAttributeGroup & MorphoAttributeGroupEnum.NounType) == MorphoAttributeGroupEnum.NounType) { _nounType = _MorphoAttributeList.GetMorphoAttributePair(MorphoAttributeGroupEnum.NounType, nounType); } _TreeDictionary.AddWord(word, morphoType, _nounType); } } }