Esempio n. 1
0
            /// <summary>
            /// создание морфоформы из строки
            /// </summary>
            private MorphoFormNative?CreateMorphoForm(MorphoTypeNative morphoType, char *lineBase)
            {
                #region find index-of-COLON & check on length
                var index = IndexOf(lineBase, COLON);
                if ((index == -1) || (ENDING_BUFFER_SIZE <= index))
                {
                    _ModelLoadingErrorCallback("Index of COLON is undefined or length the line is too long", StringsHelper.ToString(lineBase));
                    return(null);
                }
                #endregion

                #region fill '_ENDING_LOWER_BUFFER'
                var i = 0;
                for (char *ptr = lineBase; i < index; ptr++, i++)
                {
                    var ch = *ptr;
                    if ((_CHARTYPE_MAP[ch] & CharType.IsWhiteSpace) == CharType.IsWhiteSpace)
                    {
                        break;
                    }
                    _ENDING_LOWER_BUFFER[i] = _LOWER_INVARIANT_MAP[ch];
                }
                _ENDING_LOWER_BUFFER[i] = '\0';
                #endregion

                #region fill '_MorphoAttributePairs_Buffer'
                _MorphoAttributePairs_Buffer.Clear();
                for (char *ptr = lineBase + index + 1; ; ptr++)
                {
                    var ch = *ptr;
                    if (ch == '\0')
                    {
                        break;
                    }

                    if ((_CHARTYPE_MAP[ch] & CharType.IsLetter) != CharType.IsLetter)
                    {
                        continue;
                    }

                    var len = 0;
                    for (; ; ptr++)
                    {
                        ch = *ptr;
                        if (ch == '\0')
                        {
                            break;
                        }
                        var ct = _CHARTYPE_MAP[ch];
                        if ((ct & CharType.IsLetter) != CharType.IsLetter &&
                            (ct & CharType.IsDigit) != CharType.IsDigit)
                        {
                            break;
                        }
                        len++;
                    }
                    if (len != 0)
                    {
                        var morphoAttribute = default(MorphoAttributeEnum);
                        if (_EnumParserMorphoAttribute.TryParse(ptr - len, len, ref morphoAttribute))
                        {
                            var map = _MorphoAttributeList.TryGetMorphoAttributePair(morphoType.MorphoAttributeGroup, morphoAttribute);
                            if (map.HasValue)
                            {
                                _MorphoAttributePairs_Buffer.Add(map.Value);
                            }
#if DEBUG
                            //*
                            //TOO MANY ERRORS AFTER last (2016.12.28) getting morpho-dcitionaries from 'LangAnalyzer-[ilook]'
                            else
                            {
                                var attr = new string( ptr - len, 0, len );
                                _ModelLoadingErrorCallback("Error in morpho-attribute: '" + attr + '\'', StringsHelper.ToString(lineBase));
                            }
                            //*/
#endif
                        }
                        else
                        {
                            var attr = new string(ptr - len, 0, len);
                            _ModelLoadingErrorCallback("Unknown morpho-attribute: '" + attr + '\'', StringsHelper.ToString(lineBase));
                        }
                    }

                    if (ch == '\0')
                    {
                        break;
                    }
                }
                #endregion

                #region Allocate native-memory for endingOfWord
                //*
                IntPtr endingPtr;
                IntPtr endingUpperPtr;
                if ((i == 1) && (_ENDING_LOWER_BUFFER[0] == UNDERLINE))
                {
                    endingPtr      = _EMPTY_STRING;
                    endingUpperPtr = _EMPTY_STRING;
                }
                else
                {
                    #region ending-in-original-case
                    endingPtr = new IntPtr(_ENDING_LOWER_BUFFER);

                    if (_EndingDictionary.TryGetValue(endingPtr, out IntPtr existsPtr))
                    {
                        endingPtr = existsPtr;
                    }
                    else
                    {
                        AllocHGlobalAndCopy(_ENDING_LOWER_BUFFER, index, out endingPtr);
                        _EndingDictionary.Add(endingPtr, endingPtr);
                    }
                    #endregion

                    #region ending-in-upper-case
                    StringsHelper.ToUpperInvariant(_ENDING_LOWER_BUFFER, _ENDING_UPPER_BUFFER);

                    endingUpperPtr = new IntPtr(_ENDING_UPPER_BUFFER);

                    if (_EndingDictionary.TryGetValue(endingUpperPtr, out existsPtr))
                    {
                        endingUpperPtr = existsPtr;
                    }
                    else
                    {
                        AllocHGlobalAndCopy(_ENDING_UPPER_BUFFER, index, out endingUpperPtr);
                        _EndingDictionary.Add(endingUpperPtr, endingUpperPtr);
                    }
                    #endregion
                }
                #endregion

                var morphoForm = new MorphoFormNative((char *)endingPtr, (char *)endingUpperPtr, _MorphoAttributePairs_Buffer);
                return(morphoForm);
            }
Esempio n. 2
0
            /// чтение файла со словами
            /// path - полный путь к файлу
            /// nounType - тип существи тельного
            private void ReadWords(string filename, MorphoAttributeEnum nounType)
            {
                var lines = ReadFile(filename);

                var plw = default(ParsedLineWords_unsafe);

                foreach (var line in lines)
                {
                    fixed(char *lineBase = line)
                    {
                        if (!ParseLineWords(lineBase, ref plw))
                        {
                            _ModelLoadingErrorCallback("Wrong line format", line);
                            continue;
                        }

                        MorphoTypeNative morphoType = GetMorphoTypeByName((IntPtr)plw.MorphoTypeName);

                        if (morphoType == null)
                        {
                            _ModelLoadingErrorCallback("Unknown morpho-type", line);
                            continue;
                        }

                        if (!StringsHelper.IsEqual((IntPtr)plw.PartOfSpeech, _PartOfSpeechToNativeStringMapper[morphoType.PartOfSpeech]))
                        {
                            _ModelLoadingErrorCallback("Wrong part-of-speech", line);
                            continue;
                        }

                        if (morphoType.HasMorphoForms)
                        {
                            var nounTypePair = default(MorphoAttributePair?);
                            if ((morphoType.MorphoAttributeGroup & MorphoAttributeGroupEnum.NounType) == MorphoAttributeGroupEnum.NounType)
                            {
                                nounTypePair = _MorphoAttributeList.GetMorphoAttributePair(MorphoAttributeGroupEnum.NounType, nounType);
                            }

                            #region Allocate native-memory for baseOfWord
                            var len = plw.WordLength - StringsHelper.GetLength(morphoType.FirstEnding);
                            len = ((0 <= len) ? len : plw.WordLength);

                            IntPtr lineBasePtr;
                            if (0 < len)
                            {
                                *(lineBase + len) = '\0';
                                lineBasePtr       = new IntPtr(lineBase);

                                if (_EndingDictionary.TryGetValue(lineBasePtr, out IntPtr existsPtr))
                                {
                                    lineBasePtr = existsPtr;
                                }
                                else
                                {
                                    AllocHGlobalAndCopy(lineBase, len, out lineBasePtr);
                                    _EndingDictionary.Add(lineBasePtr, lineBasePtr);
                                }
                            }
                            else
                            {
                                lineBasePtr = _EMPTY_STRING;
                            }
                            #endregion

                            _TreeDictionary.AddWord((char *)lineBasePtr, morphoType, ref nounTypePair);
                        }
                    }
                }
            }