Example #1
0
        public WordForms_t GetWordFormsByPartOfSpeech(string word, PartOfSpeechEnum partOfSpeechFilter)
        {
            var result    = new WordForms_t(word);
            var wordUpper = StringsHelper.ToUpperInvariant(word);

            if (_MorphoModel.GetWordForms(wordUpper, _WordForms))
            {
                FillUniqueWordFormsDictionary(partOfSpeechFilter);

                #region [.fill word-forms list.]
                _WordForms.Clear();
                foreach (var p in _UniqueWordFormsDictionary)
                {
                    var form         = p.Key;
                    var partOfSpeech = p.Value;

                    var wf = new WordForm_t(form, partOfSpeech);
                    _WordForms.Add(wf);
                }
                result.Forms = _WordForms;
                #endregion
            }

            return(result);
        }
Example #2
0
 internal MorphoForm(string ending, List <MorphoAttributePair> morphoAttributePair)
 {
     Ending      = string.Intern(ending);
     EndingUpper = string.Intern(StringsHelper.ToUpperInvariant(Ending));
     if (morphoAttributePair.Count != 0)
     {
         MorphoAttributePairs = morphoAttributePair.ToArray();
     }
     else
     {
         MorphoAttributePairs = EMPTY;
     }
 }
Example #3
0
        public IReadOnlyList <TermProbability> Run_Debug(string text)
        {
            if (text.IsNullOrWhiteSpace())
            {
                return(TERMPROBABILITY_EMPTY);
            }
            //---------------------------------------------------------//

            var text_upper = StringsHelper.ToUpperInvariant(text);   //---StringsHelper.ToUpperInvariantInPlace( text );

            var tuples = _VTS.Run_Debug(text_upper);

            return(tuples);
        }
 /// добавление слова и всех его форм в словарь
 /// word - слово
 /// pMorphoType - морфотип
 /// nounType - тип сущетсвительного
 unsafe public void AddWord(string word, MorphoType morphoType, MorphoAttributePair?nounType)
 {
     if (morphoType.MorphoForms.Length != 0)
     {
         var len            = word.Length - morphoType.MorphoForms[0].Ending.Length;
         var _base          = (0 <= len) ? word.Substring(0, len) : word;
         var baseMorphoForm = new BaseMorphoForm(_base, morphoType, nounType);
         var _baseUpper     = StringsHelper.ToUpperInvariant(_base);
         fixed(char *baseUpper_ptr = _baseUpper)
         {
             AddWordPart(baseUpper_ptr, baseMorphoForm);
         }
     }
 }
Example #5
0
        internal MorphoForm(string ending, List <MorphoAttributePair> morphoAttributePair)
        {
            _Ending      = string.Intern(ending);                                  //ending; //
            _EndingUpper = string.Intern(StringsHelper.ToUpperInvariant(_Ending)); //StringsHelper.ToUpperInvariant( _Ending ); //
            if (morphoAttributePair.Count != 0)
            {
                _MorphoAttributePairs = morphoAttributePair.ToArray();
            }
            else
            {
                _MorphoAttributePairs = EMPTY;
            }


            //_GlobalCount++;
            //_GlobalHashsetEnding.Add( _Ending );
            //_GlobalHashsetEndingUpper.Add( _EndingUpper );
        }
Example #6
0
        /// добавление слова и всех его форм в словарь
        /// word - слово
        /// pMorphoType - морфотип
        /// nounType - тип сущетсвительного
        unsafe public void AddWord(string word, MorphoType morphoType, MorphoAttributePair?nounType)
        {
            #region
            //if ( word == "он" )
            //{
            //    System.Diagnostics.Debugger.Break();
            //}
            #endregion

            if (morphoType.MorphoForms.Length != 0)
            {
                var len            = word.Length - morphoType.MorphoForms[0].Ending.Length;
                var _base          = (0 <= len) ? word.Substring(0, len) : word;
                var baseMorphoForm = new BaseMorphoForm(_base, morphoType, nounType);
                var _baseUpper     = StringsHelper.ToUpperInvariant(_base);
                fixed(char *baseUpper_ptr = _baseUpper)
                {
                    AddWordPart(baseUpper_ptr, baseMorphoForm);
                }
            }
        }
Example #7
0
        public WordForms GetWordFormsByPartOfSpeech(string word, PartOfSpeechEnum partOfSpeechFilter)
        {
            var result    = new WordForms(word);
            var wordUpper = StringsHelper.ToUpperInvariant(word);

            if (_morphoModel.GetWordForms(wordUpper, _wordForms))
            {
                FillUniqueWordFormsDictionary(partOfSpeechFilter);

                _wordForms.Clear();
                foreach (var p in _uniqueWordFormsDictionary)
                {
                    var form         = p.Key;
                    var partOfSpeech = p.Value;

                    var wf = new WordForm(form, partOfSpeech);
                    _wordForms.Add(wf);
                }
                result.Forms = _wordForms;
            }

            return(result);
        }
Example #8
0
        unsafe public IReadOnlyList <TermProbability_Offset> Run_Offset(string text)
        {
            var textAsUpper = StringsHelper.ToUpperInvariant(text, out var isNullOrWhiteSpace);

            if (isNullOrWhiteSpace)
            {
                return(TERMPROBABILITY_OFFSET_EMPTY);
            }

            //---------------------------------------------------------//

            fixed(char *textAsUpper_ptr = textAsUpper)
            {
                var tuples = _VTS_Offset.Run(textAsUpper_ptr, textAsUpper.Length);

                return(tuples);
            }

            #region comm. in-place to-upper

            /*
             * fixed ( char* text_ptr = text )
             * {
             *  StringsHelper.ToUpperInvariantInPlace( text_ptr, out var isNullOrWhiteSpace );
             *  if ( isNullOrWhiteSpace )
             *  {
             *      return (TERMPROBABILITY_OFFSET_EMPTY);
             *  }
             *  //---------------------------------------------------------//
             *
             *  var tuples = _VTS_Offset.Run( text_ptr, text.Length );
             *  return (tuples);
             * }
             */
            #endregion
        }
Example #9
0
        public WordMorphology GetWordMorphology(string word, WordFormMorphologyModeEnum wordFormMorphologyMode)
        {
            var wordUpper = StringsHelper.ToUpperInvariant(word);

            return(GetWordMorphology_NoToUpper(wordUpper, wordFormMorphologyMode));
        }
Example #10
0
            /// <summary>
            /// создание морфоформы из строки
            /// </summary>
            private MorphoFormNative?CreateMorphoForm(MorphoTypeNative morphoType, char *lineBase)
            {
                #region find index-of-COLON & check on length
                var index = IndexOf(lineBase, COLON);
                if ((index == -1) || (ENDING_BUFFER_SIZE <= index))
                {
                    _ModelLoadingErrorCallback("Index of COLON is undefined or length the line is too long", StringsHelper.ToString(lineBase));
                    return(null);
                }
                #endregion

                #region fill '_ENDING_LOWER_BUFFER'
                var i = 0;
                for (char *ptr = lineBase; i < index; ptr++, i++)
                {
                    var ch = *ptr;
                    if ((_CHARTYPE_MAP[ch] & CharType.IsWhiteSpace) == CharType.IsWhiteSpace)
                    {
                        break;
                    }
                    _ENDING_LOWER_BUFFER[i] = _LOWER_INVARIANT_MAP[ch];
                }
                _ENDING_LOWER_BUFFER[i] = '\0';
                #endregion

                #region fill '_MorphoAttributePairs_Buffer'
                _MorphoAttributePairs_Buffer.Clear();
                for (char *ptr = lineBase + index + 1; ; ptr++)
                {
                    var ch = *ptr;
                    if (ch == '\0')
                    {
                        break;
                    }

                    if ((_CHARTYPE_MAP[ch] & CharType.IsLetter) != CharType.IsLetter)
                    {
                        continue;
                    }

                    var len = 0;
                    for (; ; ptr++)
                    {
                        ch = *ptr;
                        if (ch == '\0')
                        {
                            break;
                        }
                        var ct = _CHARTYPE_MAP[ch];
                        if ((ct & CharType.IsLetter) != CharType.IsLetter &&
                            (ct & CharType.IsDigit) != CharType.IsDigit)
                        {
                            break;
                        }
                        len++;
                    }
                    if (len != 0)
                    {
                        var morphoAttribute = default(MorphoAttributeEnum);
                        if (_EnumParserMorphoAttribute.TryParse(ptr - len, len, ref morphoAttribute))
                        {
                            var map = _MorphoAttributeList.TryGetMorphoAttributePair(morphoType.MorphoAttributeGroup, morphoAttribute);
                            if (map.HasValue)
                            {
                                _MorphoAttributePairs_Buffer.Add(map.Value);
                            }
#if DEBUG
                            //*
                            //TOO MANY ERRORS AFTER last (2016.12.28) getting morpho-dcitionaries from 'LangAnalyzerStd-[ilook]'
                            else
                            {
                                var attr = new string( ptr - len, 0, len );
                                _ModelLoadingErrorCallback("Error in morpho-attribute: '" + attr + '\'', StringsHelper.ToString(lineBase));
                            }
                            //*/
#endif
                        }
                        else
                        {
                            var attr = new string(ptr - len, 0, len);
                            _ModelLoadingErrorCallback("Unknown morpho-attribute: '" + attr + '\'', StringsHelper.ToString(lineBase));
                        }
                    }

                    if (ch == '\0')
                    {
                        break;
                    }
                }
                #endregion

                #region Allocate native-memory for endingOfWord
                //*
                IntPtr endingPtr;
                IntPtr endingUpperPtr;
                if ((i == 1) && (_ENDING_LOWER_BUFFER[0] == UNDERLINE))
                {
                    endingPtr      = _EMPTY_STRING;
                    endingUpperPtr = _EMPTY_STRING;
                }
                else
                {
                    #region ending-in-original-case
                    endingPtr = new IntPtr(_ENDING_LOWER_BUFFER);

                    if (_EndingDictionary.TryGetValue(endingPtr, out IntPtr existsPtr))
                    {
                        endingPtr = existsPtr;
                    }
                    else
                    {
                        AllocHGlobalAndCopy(_ENDING_LOWER_BUFFER, index, out endingPtr);
                        _EndingDictionary.Add(endingPtr, endingPtr);
                    }
                    #endregion

                    #region ending-in-upper-case
                    StringsHelper.ToUpperInvariant(_ENDING_LOWER_BUFFER, _ENDING_UPPER_BUFFER);

                    endingUpperPtr = new IntPtr(_ENDING_UPPER_BUFFER);

                    if (_EndingDictionary.TryGetValue(endingUpperPtr, out existsPtr))
                    {
                        endingUpperPtr = existsPtr;
                    }
                    else
                    {
                        AllocHGlobalAndCopy(_ENDING_UPPER_BUFFER, index, out endingUpperPtr);
                        _EndingDictionary.Add(endingUpperPtr, endingUpperPtr);
                    }
                    #endregion
                }
                #endregion

                var morphoForm = new MorphoFormNative((char *)endingPtr, (char *)endingUpperPtr, _MorphoAttributePairs_Buffer);
                return(morphoForm);
            }
Example #11
0
        /// получение морфологической информации
        /// words - слова
        public WordMorphology_t GetWordMorphology(string word)
        {
            var wordUpper = StringsHelper.ToUpperInvariant(word);

            return(GetWordMorphology_NoToUpper(wordUpper, WordFormMorphologyModeEnum.Default));
        }
Example #12
0
 public WordMorphology_t GetWordMorphology(string word, WordFormMorphologyModeEnum wordFormMorphologyMode) => GetWordMorphology_NoToUpper(StringsHelper.ToUpperInvariant(word), wordFormMorphologyMode);