示例#1
0
        unsafe private void FillWordForms_Core(char *wordPart, int wordPartLength, int fullWordLength, List <WordForm> result)
        {
            if (_BaseMorphoForms == null)
            {
                return;
            }

            foreach (var baseMorphoForm in _BaseMorphoForms)
            {
                int baseLength = baseMorphoForm.Base.Length;
                if ((fullWordLength < baseLength) ||
                    (baseLength + baseMorphoForm.MorphoType.MaxEndingLength < fullWordLength)
                    )
                {
                    continue;
                }

                var morphoForms = baseMorphoForm.MorphoType.MorphoForms;
                foreach (var morphoForm in morphoForms)
                {
                    var endingLength = morphoForm.EndingUpper.Length;
                    if (baseLength + endingLength != fullWordLength)
                    {
                        continue;
                    }

                    if (endingLength != wordPartLength)
                    {
                        continue;
                    }
                    if (wordPartLength == 0)
                    {
                        ;
                    }
                    else
                    if (!StringsHelper.IsEqual(morphoForm.EndingUpper, wordPart, wordPartLength))
                    {
                        continue;
                    }

                    var partOfSpeech = baseMorphoForm.MorphoType.PartOfSpeech;
                    foreach (var _morphoForm in morphoForms)
                    {
                        /// получение словоформы
                        var wordForm = baseMorphoForm.Base + _morphoForm.Ending;

                        var wf = new WordForm(wordForm, partOfSpeech);
                        result.Add(wf);
                    }
                    break;
                }
            }
        }
示例#2
0
            /// чтение файла со словами
            /// path - полный путь к файлу
            /// nounType - тип существи тельного
            private void ReadWords(string filename, MorphoAttributeEnum nounType)
            {
                var lines = ReadFile(filename);

                var plw = default(ParsedLineWords_unsafe);

                foreach (var line in lines)
                {
                    fixed(char *lineBase = line)
                    {
                        if (!ParseLineWords(lineBase, ref plw))
                        {
                            _ModelLoadingErrorCallback("Wrong line format", line);
                            continue;
                        }

                        MorphoTypeNative morphoType = GetMorphoTypeByName((IntPtr)plw.MorphoTypeName);

                        if (morphoType == null)
                        {
                            _ModelLoadingErrorCallback("Unknown morpho-type", line);
                            continue;
                        }

                        if (!StringsHelper.IsEqual((IntPtr)plw.PartOfSpeech, _PartOfSpeechToNativeStringMapper[morphoType.PartOfSpeech]))
                        {
                            _ModelLoadingErrorCallback("Wrong part-of-speech", line);
                            continue;
                        }

                        if (morphoType.HasMorphoForms)
                        {
                            var nounTypePair = default(MorphoAttributePair?);
                            if ((morphoType.MorphoAttributeGroup & MorphoAttributeGroupEnum.NounType) == MorphoAttributeGroupEnum.NounType)
                            {
                                nounTypePair = _MorphoAttributeList.GetMorphoAttributePair(MorphoAttributeGroupEnum.NounType, nounType);
                            }

                            #region Allocate native-memory for baseOfWord
                            var len = plw.WordLength - StringsHelper.GetLength(morphoType.FirstEnding);
                            len = ((0 <= len) ? len : plw.WordLength);

                            IntPtr lineBasePtr;
                            if (0 < len)
                            {
                                *(lineBase + len) = '\0';
                                lineBasePtr       = new IntPtr(lineBase);

                                if (_EndingDictionary.TryGetValue(lineBasePtr, out IntPtr existsPtr))
                                {
                                    lineBasePtr = existsPtr;
                                }
                                else
                                {
                                    AllocHGlobalAndCopy(lineBase, len, out lineBasePtr);
                                    _EndingDictionary.Add(lineBasePtr, lineBasePtr);
                                }
                            }
                            else
                            {
                                lineBasePtr = _EMPTY_STRING;
                            }
                            #endregion

                            _TreeDictionary.AddWord((char *)lineBasePtr, morphoType, ref nounTypePair);
                        }
                    }
                }
            }
示例#3
0
        /// поиск слова в слоте
        /// wordPart - оставшаяся часть слова
        /// pSlot - слот
        /// letterIndex - индекс буквы
        unsafe private void FillWordFormMorphologies_Core(char *wordPart, int wordPartLength, int fullWordLength,
                                                          List <WordFormMorphology> result, WordFormMorphologyModeEnum wordFormMorphologyMode)
        {
            if (_BaseMorphoForms == null)
            {
                return;
            }

            foreach (var baseMorphoForm in _BaseMorphoForms)
            {
                int baseLength = baseMorphoForm.Base.Length;
                if ((fullWordLength < baseLength) ||
                    (baseLength + baseMorphoForm.MorphoType.MaxEndingLength < fullWordLength)
                    )
                {
                    continue;
                }

                foreach (var morphoForm in baseMorphoForm.MorphoType.MorphoForms)
                {
                    var endingLength = morphoForm.EndingUpper.Length;
                    if (baseLength + endingLength != fullWordLength)
                    {
                        continue;
                    }

                    if (endingLength != wordPartLength)
                    {
                        continue;
                    }
                    if (wordPartLength == 0)
                    {
                        ;
                    }
                    else
                    if (!StringsHelper.IsEqual(morphoForm.EndingUpper, wordPart, wordPartLength))
                    {
                        continue;
                    }

                    switch (wordFormMorphologyMode)
                    {
                    case WordFormMorphologyModeEnum.Default:
                    {
                        var wfmi = new WordFormMorphology(baseMorphoForm, MorphoAttributePair.GetMorphoAttribute(baseMorphoForm, morphoForm));
                        result.Add(wfmi);
                    }
                    break;

                    case WordFormMorphologyModeEnum.StartsWithLowerLetter:
                    {
                        fixed(char *normalForm_ptr = baseMorphoForm.NormalForm)
                        {
                            var first_char = *normalForm_ptr;

                            if ((first_char != '\0') && *(XlatUnsafe.Inst._UPPER_INVARIANT_MAP + first_char) == first_char)
                            {
                                continue;
                            }
                        }

                        var wfmi = new WordFormMorphology(baseMorphoForm, MorphoAttributePair.GetMorphoAttribute(baseMorphoForm, morphoForm));
                        result.Add(wfmi);
                    }
                    break;

                    case WordFormMorphologyModeEnum.StartsWithUpperLetter:
                    {
                        fixed(char *normalForm_ptr = baseMorphoForm.NormalForm)
                        {
                            var first_char = *normalForm_ptr;

                            if ((first_char != '\0') && *(XlatUnsafe.Inst._UPPER_INVARIANT_MAP + first_char) != first_char)
                            {
                                continue;
                            }
                        }

                        var wfmi = new WordFormMorphology(baseMorphoForm, MorphoAttributePair.GetMorphoAttribute(baseMorphoForm, morphoForm));
                        result.Add(wfmi);
                    }
                    break;

                    case WordFormMorphologyModeEnum.FirstStartsWithUpperAfterLowerLetter:
                    case WordFormMorphologyModeEnum.FirstStartsWithLowerAfterUpperLetter:
                    {
                        throw new NotImplementedException();
                    }
                    }
                }
            }
        }