public WordForms_t GetWordFormsByPartOfSpeech(string word, PartOfSpeechEnum partOfSpeechFilter) { var result = new WordForms_t(word); var wordUpper = StringsHelper.ToUpperInvariant(word); if (_MorphoModel.GetWordForms(wordUpper, _WordForms)) { FillUniqueWordFormsDictionary(partOfSpeechFilter); #region [.fill word-forms list.] _WordForms.Clear(); foreach (var p in _UniqueWordFormsDictionary) { var form = p.Key; var partOfSpeech = p.Value; var wf = new WordForm_t(form, partOfSpeech); _WordForms.Add(wf); } result.Forms = _WordForms; #endregion } return(result); }
internal MorphoForm(string ending, List <MorphoAttributePair> morphoAttributePair) { Ending = string.Intern(ending); EndingUpper = string.Intern(StringsHelper.ToUpperInvariant(Ending)); if (morphoAttributePair.Count != 0) { MorphoAttributePairs = morphoAttributePair.ToArray(); } else { MorphoAttributePairs = EMPTY; } }
public IReadOnlyList <TermProbability> Run_Debug(string text) { if (text.IsNullOrWhiteSpace()) { return(TERMPROBABILITY_EMPTY); } //---------------------------------------------------------// var text_upper = StringsHelper.ToUpperInvariant(text); //---StringsHelper.ToUpperInvariantInPlace( text ); var tuples = _VTS.Run_Debug(text_upper); return(tuples); }
/// добавление слова и всех его форм в словарь /// word - слово /// pMorphoType - морфотип /// nounType - тип сущетсвительного unsafe public void AddWord(string word, MorphoType morphoType, MorphoAttributePair?nounType) { if (morphoType.MorphoForms.Length != 0) { var len = word.Length - morphoType.MorphoForms[0].Ending.Length; var _base = (0 <= len) ? word.Substring(0, len) : word; var baseMorphoForm = new BaseMorphoForm(_base, morphoType, nounType); var _baseUpper = StringsHelper.ToUpperInvariant(_base); fixed(char *baseUpper_ptr = _baseUpper) { AddWordPart(baseUpper_ptr, baseMorphoForm); } } }
internal MorphoForm(string ending, List <MorphoAttributePair> morphoAttributePair) { _Ending = string.Intern(ending); //ending; // _EndingUpper = string.Intern(StringsHelper.ToUpperInvariant(_Ending)); //StringsHelper.ToUpperInvariant( _Ending ); // if (morphoAttributePair.Count != 0) { _MorphoAttributePairs = morphoAttributePair.ToArray(); } else { _MorphoAttributePairs = EMPTY; } //_GlobalCount++; //_GlobalHashsetEnding.Add( _Ending ); //_GlobalHashsetEndingUpper.Add( _EndingUpper ); }
/// добавление слова и всех его форм в словарь /// word - слово /// pMorphoType - морфотип /// nounType - тип сущетсвительного unsafe public void AddWord(string word, MorphoType morphoType, MorphoAttributePair?nounType) { #region //if ( word == "он" ) //{ // System.Diagnostics.Debugger.Break(); //} #endregion if (morphoType.MorphoForms.Length != 0) { var len = word.Length - morphoType.MorphoForms[0].Ending.Length; var _base = (0 <= len) ? word.Substring(0, len) : word; var baseMorphoForm = new BaseMorphoForm(_base, morphoType, nounType); var _baseUpper = StringsHelper.ToUpperInvariant(_base); fixed(char *baseUpper_ptr = _baseUpper) { AddWordPart(baseUpper_ptr, baseMorphoForm); } } }
public WordForms GetWordFormsByPartOfSpeech(string word, PartOfSpeechEnum partOfSpeechFilter) { var result = new WordForms(word); var wordUpper = StringsHelper.ToUpperInvariant(word); if (_morphoModel.GetWordForms(wordUpper, _wordForms)) { FillUniqueWordFormsDictionary(partOfSpeechFilter); _wordForms.Clear(); foreach (var p in _uniqueWordFormsDictionary) { var form = p.Key; var partOfSpeech = p.Value; var wf = new WordForm(form, partOfSpeech); _wordForms.Add(wf); } result.Forms = _wordForms; } return(result); }
unsafe public IReadOnlyList <TermProbability_Offset> Run_Offset(string text) { var textAsUpper = StringsHelper.ToUpperInvariant(text, out var isNullOrWhiteSpace); if (isNullOrWhiteSpace) { return(TERMPROBABILITY_OFFSET_EMPTY); } //---------------------------------------------------------// fixed(char *textAsUpper_ptr = textAsUpper) { var tuples = _VTS_Offset.Run(textAsUpper_ptr, textAsUpper.Length); return(tuples); } #region comm. in-place to-upper /* * fixed ( char* text_ptr = text ) * { * StringsHelper.ToUpperInvariantInPlace( text_ptr, out var isNullOrWhiteSpace ); * if ( isNullOrWhiteSpace ) * { * return (TERMPROBABILITY_OFFSET_EMPTY); * } * //---------------------------------------------------------// * * var tuples = _VTS_Offset.Run( text_ptr, text.Length ); * return (tuples); * } */ #endregion }
public WordMorphology GetWordMorphology(string word, WordFormMorphologyModeEnum wordFormMorphologyMode) { var wordUpper = StringsHelper.ToUpperInvariant(word); return(GetWordMorphology_NoToUpper(wordUpper, wordFormMorphologyMode)); }
/// <summary> /// создание морфоформы из строки /// </summary> private MorphoFormNative?CreateMorphoForm(MorphoTypeNative morphoType, char *lineBase) { #region find index-of-COLON & check on length var index = IndexOf(lineBase, COLON); if ((index == -1) || (ENDING_BUFFER_SIZE <= index)) { _ModelLoadingErrorCallback("Index of COLON is undefined or length the line is too long", StringsHelper.ToString(lineBase)); return(null); } #endregion #region fill '_ENDING_LOWER_BUFFER' var i = 0; for (char *ptr = lineBase; i < index; ptr++, i++) { var ch = *ptr; if ((_CHARTYPE_MAP[ch] & CharType.IsWhiteSpace) == CharType.IsWhiteSpace) { break; } _ENDING_LOWER_BUFFER[i] = _LOWER_INVARIANT_MAP[ch]; } _ENDING_LOWER_BUFFER[i] = '\0'; #endregion #region fill '_MorphoAttributePairs_Buffer' _MorphoAttributePairs_Buffer.Clear(); for (char *ptr = lineBase + index + 1; ; ptr++) { var ch = *ptr; if (ch == '\0') { break; } if ((_CHARTYPE_MAP[ch] & CharType.IsLetter) != CharType.IsLetter) { continue; } var len = 0; for (; ; ptr++) { ch = *ptr; if (ch == '\0') { break; } var ct = _CHARTYPE_MAP[ch]; if ((ct & CharType.IsLetter) != CharType.IsLetter && (ct & CharType.IsDigit) != CharType.IsDigit) { break; } len++; } if (len != 0) { var morphoAttribute = default(MorphoAttributeEnum); if (_EnumParserMorphoAttribute.TryParse(ptr - len, len, ref morphoAttribute)) { var map = _MorphoAttributeList.TryGetMorphoAttributePair(morphoType.MorphoAttributeGroup, morphoAttribute); if (map.HasValue) { _MorphoAttributePairs_Buffer.Add(map.Value); } #if DEBUG //* //TOO MANY ERRORS AFTER last (2016.12.28) getting morpho-dcitionaries from 'LangAnalyzerStd-[ilook]' else { var attr = new string( ptr - len, 0, len ); _ModelLoadingErrorCallback("Error in morpho-attribute: '" + attr + '\'', StringsHelper.ToString(lineBase)); } //*/ #endif } else { var attr = new string(ptr - len, 0, len); _ModelLoadingErrorCallback("Unknown morpho-attribute: '" + attr + '\'', StringsHelper.ToString(lineBase)); } } if (ch == '\0') { break; } } #endregion #region Allocate native-memory for endingOfWord //* IntPtr endingPtr; IntPtr endingUpperPtr; if ((i == 1) && (_ENDING_LOWER_BUFFER[0] == UNDERLINE)) { endingPtr = _EMPTY_STRING; endingUpperPtr = _EMPTY_STRING; } else { #region ending-in-original-case endingPtr = new IntPtr(_ENDING_LOWER_BUFFER); if (_EndingDictionary.TryGetValue(endingPtr, out IntPtr existsPtr)) { endingPtr = existsPtr; } else { AllocHGlobalAndCopy(_ENDING_LOWER_BUFFER, index, out endingPtr); _EndingDictionary.Add(endingPtr, endingPtr); } #endregion #region ending-in-upper-case StringsHelper.ToUpperInvariant(_ENDING_LOWER_BUFFER, _ENDING_UPPER_BUFFER); endingUpperPtr = new IntPtr(_ENDING_UPPER_BUFFER); if (_EndingDictionary.TryGetValue(endingUpperPtr, out existsPtr)) { endingUpperPtr = existsPtr; } else { AllocHGlobalAndCopy(_ENDING_UPPER_BUFFER, index, out endingUpperPtr); _EndingDictionary.Add(endingUpperPtr, endingUpperPtr); } #endregion } #endregion var morphoForm = new MorphoFormNative((char *)endingPtr, (char *)endingUpperPtr, _MorphoAttributePairs_Buffer); return(morphoForm); }
/// получение морфологической информации /// words - слова public WordMorphology_t GetWordMorphology(string word) { var wordUpper = StringsHelper.ToUpperInvariant(word); return(GetWordMorphology_NoToUpper(wordUpper, WordFormMorphologyModeEnum.Default)); }
public WordMorphology_t GetWordMorphology(string word, WordFormMorphologyModeEnum wordFormMorphologyMode) => GetWordMorphology_NoToUpper(StringsHelper.ToUpperInvariant(word), wordFormMorphologyMode);