/// чтение файла со словами /// path - полный путь к файлу /// nounType - тип существительного private void ReadWords(string filename, MorphoAttributeEnum nounType) { var lines = ReadFile(filename); foreach (var line in lines) { #region commented //try //{ #endregion var array = line.Split(WORDS_DICTIONARY_SEPARATOR, StringSplitOptions.RemoveEmptyEntries); if (array.Length != 3) { _ModelLoadingErrorCallback("Wrong line format", line); //throw (new MorphoFormatException()); continue; } MorphoType morphoType = GetMorphoTypeByName(array[1]); if (morphoType == null) { _ModelLoadingErrorCallback("Unknown morpho-type", line); //throw new UnknownMorphoTypeException(); } else if (array[2] != _PartOfSpeechStringDictionary[morphoType.PartOfSpeech]) { _ModelLoadingErrorCallback("Wrong part-of-speech", line); //throw new WrongPartOfSpeechException(); } else { var word = array[0]; /* * if ( word == "коем" ) * System.Diagnostics.Debugger.Break(); * //*/ var _nounType = default(MorphoAttributePair?); if ((morphoType.MorphoAttributeGroup & MorphoAttributeGroupEnum.NounType) == MorphoAttributeGroupEnum.NounType) { _nounType = _MorphoAttributeList.GetMorphoAttributePair(MorphoAttributeGroupEnum.NounType, nounType); } _TreeDictionary.AddWord(word, morphoType, _nounType); } #region commented //} //catch (UnknownMorphoTypeException) //{ // CMorphoModel::Logging("Unknown MorphoType", str); // m_isInitialized = false; //} //catch (WrongPartOfSpeechException) //{ // CMorphoModel::Logging("Wrong PartOfSpeech", str); // m_isInitialized = false; //} #endregion } }
/// создание морфоформы из строки private MorphoForm CreateMorphoForm(MorphoType morphoType, string line, List <MorphoAttributePair> morphoAttributePairs) { int index = line.IndexOf(':'); if (index < 0) { throw (new MorphoFormatException()); } var ending = StringsHelper.ToLowerInvariant(line.Substring(0, index).Trim()); if (ending == EMPTY_ENDING) { ending = string.Empty; } morphoAttributePairs.Clear(); var attributes = line.Substring(index + 1).Split(MORPHO_ATTRIBUTE_SEPARATOR, StringSplitOptions.RemoveEmptyEntries); foreach (var attribute in attributes) { var attr = attribute.Trim(); if (!string.IsNullOrEmpty(attr)) { var morphoAttribute = default(MorphoAttributeEnum); if (Enum.TryParse(attr, true, out morphoAttribute)) { //---morphoAttributePairs.Add( _MorphoAttributeList.GetMorphoAttributePair( morphoType.MorphoAttributeGroup, morphoAttribute ) ); var map = _MorphoAttributeList.TryGetMorphoAttributePair(morphoType.MorphoAttributeGroup, morphoAttribute); if (map.HasValue) { morphoAttributePairs.Add(map.Value); } #if DEBUG //TOO MANY ERRORS AFTER last (2016.12.28) getting morpho-dcitionaries from 'lingvo-[ilook]' else { _ModelLoadingErrorCallback("Error in morpho-attribute: '" + attr + '\'', line); } #endif } else { _ModelLoadingErrorCallback("Unknown morpho-attribute: '" + attr + '\'', line); } } } var morphoForm = new MorphoForm(ending, morphoAttributePairs); return(morphoForm); }
public BaseMorphoForm(string _base, MorphoType morphoType, MorphoAttributePair?nounType) { _NounType = nounType; _Base = _base; //-bad-//string.Intern( _base ); // _NormalForm = _Base; if (morphoType.MorphoForms.Length != 0) { _NormalForm += morphoType.MorphoForms[0].Ending; } _MorphoType = morphoType; //_GlobalCount++; //_GlobalHashsetBase.Add( _Base ); //_GlobalHashsetNormalForm.Add( _NormalForm ); }
/// создание морфотипа из строки private MorphoType_pair_t?CreateMorphoTypePair(string line) { var m = MORPHOTYPE_PREFIX_REGEXP.Match(line); if (m == null || m.Groups.Count < 3) { return(null); } string prefix = m.Groups[1].Value; string pos = m.Groups[2].Value; string name = line.Substring(prefix.Length); var partOfSpeech = default(PartOfSpeechEnum); if (Enum.TryParse(pos, true, out partOfSpeech)) { var morphoType = new MorphoType(_PartOfSpeechList.GetPartOfSpeech(partOfSpeech)); var morphoTypePair = new MorphoType_pair_t() { Name = name, MorphoType = morphoType, }; return(morphoTypePair); } else { _ModelLoadingErrorCallback("Unknown part-of-speech: '" + pos + '\'', line); } return(null); #region commented //int index = MorphoTypePrefixRegExp.indexIn( str ); //if (index != 0) // return (null); //string prefix = MorphoTypePrefixRegExp.cap(1); //string partOfSpeech = MorphoTypePrefixRegExp.cap(2); //string name = str.Substring( prefix.Length ); //return (new CMorphoType( name, CPartOfSpeech.Create( partOfSpeech ) )); #endregion }
/// добавление слова и всех его форм в словарь /// word - слово /// pMorphoType - морфотип /// nounType - тип сущетсвительного unsafe public void AddWord(string word, MorphoType morphoType, MorphoAttributePair?nounType) { #region //if ( word == "он" ) //{ // System.Diagnostics.Debugger.Break(); //} #endregion if (morphoType.MorphoForms.Length != 0) { var len = word.Length - morphoType.MorphoForms[0].Ending.Length; var _base = (0 <= len) ? word.Substring(0, len) : word; var baseMorphoForm = new BaseMorphoForm(_base, morphoType, nounType); var _baseUpper = StringsHelper.ToUpperInvariant(_base); fixed(char *baseUpper_ptr = _baseUpper) { AddWordPart(baseUpper_ptr, baseMorphoForm); } } }