/// <summary> /// создание морфоформы из строки /// </summary> private MorphoFormNative?CreateMorphoForm(MorphoTypeNative morphoType, char *lineBase) { #region find index-of-COLON & check on length var index = IndexOf(lineBase, COLON); if ((index == -1) || (ENDING_BUFFER_SIZE <= index)) { _ModelLoadingErrorCallback("Index of COLON is undefined or length the line is too long", StringsHelper.ToString(lineBase)); return(null); } #endregion #region fill '_ENDING_LOWER_BUFFER' var i = 0; for (char *ptr = lineBase; i < index; ptr++, i++) { var ch = *ptr; if ((_CHARTYPE_MAP[ch] & CharType.IsWhiteSpace) == CharType.IsWhiteSpace) { break; } _ENDING_LOWER_BUFFER[i] = _LOWER_INVARIANT_MAP[ch]; } _ENDING_LOWER_BUFFER[i] = '\0'; #endregion #region fill '_MorphoAttributePairs_Buffer' _MorphoAttributePairs_Buffer.Clear(); for (char *ptr = lineBase + index + 1; ; ptr++) { var ch = *ptr; if (ch == '\0') { break; } if ((_CHARTYPE_MAP[ch] & CharType.IsLetter) != CharType.IsLetter) { continue; } var len = 0; for (; ; ptr++) { ch = *ptr; if (ch == '\0') { break; } var ct = _CHARTYPE_MAP[ch]; if ((ct & CharType.IsLetter) != CharType.IsLetter && (ct & CharType.IsDigit) != CharType.IsDigit) { break; } len++; } if (len != 0) { var morphoAttribute = default(MorphoAttributeEnum); if (_EnumParserMorphoAttribute.TryParse(ptr - len, len, ref morphoAttribute)) { var map = _MorphoAttributeList.TryGetMorphoAttributePair(morphoType.MorphoAttributeGroup, morphoAttribute); if (map.HasValue) { _MorphoAttributePairs_Buffer.Add(map.Value); } #if DEBUG //* //TOO MANY ERRORS AFTER last (2016.12.28) getting morpho-dcitionaries from 'LangAnalyzer-[ilook]' else { var attr = new string( ptr - len, 0, len ); _ModelLoadingErrorCallback("Error in morpho-attribute: '" + attr + '\'', StringsHelper.ToString(lineBase)); } //*/ #endif } else { var attr = new string(ptr - len, 0, len); _ModelLoadingErrorCallback("Unknown morpho-attribute: '" + attr + '\'', StringsHelper.ToString(lineBase)); } } if (ch == '\0') { break; } } #endregion #region Allocate native-memory for endingOfWord //* IntPtr endingPtr; IntPtr endingUpperPtr; if ((i == 1) && (_ENDING_LOWER_BUFFER[0] == UNDERLINE)) { endingPtr = _EMPTY_STRING; endingUpperPtr = _EMPTY_STRING; } else { #region ending-in-original-case endingPtr = new IntPtr(_ENDING_LOWER_BUFFER); if (_EndingDictionary.TryGetValue(endingPtr, out IntPtr existsPtr)) { endingPtr = existsPtr; } else { AllocHGlobalAndCopy(_ENDING_LOWER_BUFFER, index, out endingPtr); _EndingDictionary.Add(endingPtr, endingPtr); } #endregion #region ending-in-upper-case StringsHelper.ToUpperInvariant(_ENDING_LOWER_BUFFER, _ENDING_UPPER_BUFFER); endingUpperPtr = new IntPtr(_ENDING_UPPER_BUFFER); if (_EndingDictionary.TryGetValue(endingUpperPtr, out existsPtr)) { endingUpperPtr = existsPtr; } else { AllocHGlobalAndCopy(_ENDING_UPPER_BUFFER, index, out endingUpperPtr); _EndingDictionary.Add(endingUpperPtr, endingUpperPtr); } #endregion } #endregion var morphoForm = new MorphoFormNative((char *)endingPtr, (char *)endingUpperPtr, _MorphoAttributePairs_Buffer); return(morphoForm); }
/// чтение файла со словами /// path - полный путь к файлу /// nounType - тип существи тельного private void ReadWords(string filename, MorphoAttributeEnum nounType) { var lines = ReadFile(filename); var plw = default(ParsedLineWords_unsafe); foreach (var line in lines) { fixed(char *lineBase = line) { if (!ParseLineWords(lineBase, ref plw)) { _ModelLoadingErrorCallback("Wrong line format", line); continue; } MorphoTypeNative morphoType = GetMorphoTypeByName((IntPtr)plw.MorphoTypeName); if (morphoType == null) { _ModelLoadingErrorCallback("Unknown morpho-type", line); continue; } if (!StringsHelper.IsEqual((IntPtr)plw.PartOfSpeech, _PartOfSpeechToNativeStringMapper[morphoType.PartOfSpeech])) { _ModelLoadingErrorCallback("Wrong part-of-speech", line); continue; } if (morphoType.HasMorphoForms) { var nounTypePair = default(MorphoAttributePair?); if ((morphoType.MorphoAttributeGroup & MorphoAttributeGroupEnum.NounType) == MorphoAttributeGroupEnum.NounType) { nounTypePair = _MorphoAttributeList.GetMorphoAttributePair(MorphoAttributeGroupEnum.NounType, nounType); } #region Allocate native-memory for baseOfWord var len = plw.WordLength - StringsHelper.GetLength(morphoType.FirstEnding); len = ((0 <= len) ? len : plw.WordLength); IntPtr lineBasePtr; if (0 < len) { *(lineBase + len) = '\0'; lineBasePtr = new IntPtr(lineBase); if (_EndingDictionary.TryGetValue(lineBasePtr, out IntPtr existsPtr)) { lineBasePtr = existsPtr; } else { AllocHGlobalAndCopy(lineBase, len, out lineBasePtr); _EndingDictionary.Add(lineBasePtr, lineBasePtr); } } else { lineBasePtr = _EMPTY_STRING; } #endregion _TreeDictionary.AddWord((char *)lineBasePtr, morphoType, ref nounTypePair); } } } }