예제 #1
0
        private void AppendAttrValue(int wordIndex, CRFAttribute crfAttribute)
        {
            /*
             * w – слово
             * s – часть речи;
             * z – морфоатрибуты (для каждой части речи свои значения согласно таблице)
             * y – искомое значение.
             */

            switch (crfAttribute.AttributeName)
            {
            //w – слово
            case 'w':
                #region
            {
                /*
                 * символы ':' '\'
                 * - их комментировать в поле "w", "\:" и "\\"
                 */
                var index = wordIndex + crfAttribute.Position;
                var pw    = (_PinnedWordsBufferPtrBase + index);
                //':'
                if (pw->posTaggerInputType == PosTaggerInputType.Col)
                {
                    *(_AttributeBufferPtr++) = SLASH;
                    *(_AttributeBufferPtr++) = COLON;
                }
                else
                {
                    char *_base = pw->basePtr;
                    switch (*_base)
                    {
                    case SLASH:
                        *(_AttributeBufferPtr++) = SLASH;
                        *(_AttributeBufferPtr++) = SLASH;
                        break;

                    default:
                        //---System.Diagnostics.Debug.Assert( word.valueOriginal.Length <= WORD_MAX_LENGTH );
                        //---System.Diagnostics.Debug.Assert( word.length == word.valueOriginal.Length );
                        for (int i = 0, len = Math.Min(WORD_MAX_LENGTH, pw->length); i < len; i++)
                        {
                            *(_AttributeBufferPtr++) = *(_base + i);
                        }
                        #region commented

                        /*
                         * for ( int i = 0; i < WORD_MAX_LENGTH; i++ )
                         * {
                         *  var ch = *(_base + i);
                         *  if ( ch == '\0' )
                         *      break;
                         *(_AttributeBufferPtr++) = ch;
                         * }
                         */
                        #endregion
                        break;
                    }
                }
            }
                #endregion
                break;

            //s – часть речи;
            case 's':
                #region
            {
                var index = wordIndex + crfAttribute.Position;
                *(_AttributeBufferPtr++) = (_PinnedWordsBufferPtrBase + index)->posTaggerOutputType.ToCrfChar();
            }
                #endregion
                break;

            //z – морфоатрибуты (для каждой части речи свои значения согласно таблице)
            case 'z':
                #region
            {
                var index = wordIndex + crfAttribute.Position;
                var pw    = (_PinnedWordsBufferPtrBase + index);

                switch (pw->posTaggerOutputType)
                {
                case PosTaggerOutputType.Noun:
                    #region
                {
                    var ma = pw->morphoAttribute;
                    *(_AttributeBufferPtr++) = (char)MA.get_Case(ma);
                    *(_AttributeBufferPtr++) = (char)MA.get_Number(ma);
                    *(_AttributeBufferPtr++) = (char)MA.get_NounType(ma);
                }
                    #endregion
                    break;

                case PosTaggerOutputType.Verb:
                case PosTaggerOutputType.Infinitive:
                case PosTaggerOutputType.AdverbialParticiple:
                case PosTaggerOutputType.AuxiliaryVerb:
                case PosTaggerOutputType.Participle:
                    #region
                {
                    var ma = pw->morphoAttribute;
                    *(_AttributeBufferPtr++) = (char)MA.get_Case(ma);
                    *(_AttributeBufferPtr++) = (char)MA.get_Number(ma);
                    *(_AttributeBufferPtr++) = (char)MA.get_Mood(ma);
                    *(_AttributeBufferPtr++) = (char)MA.get_Voice(ma);
                    *(_AttributeBufferPtr++) = (char)MA.get_VerbTransitivity(ma);
                }
                    #endregion
                    break;

                case PosTaggerOutputType.Pronoun:
                case PosTaggerOutputType.AdjectivePronoun:
                case PosTaggerOutputType.PossessivePronoun:
                case PosTaggerOutputType.AdverbialPronoun:
                    #region
                {
                    var ma = pw->morphoAttribute;
                    *(_AttributeBufferPtr++) = (char)MA.get_Case(ma);
                    *(_AttributeBufferPtr++) = (char)MA.get_Number(ma);
                    *(_AttributeBufferPtr++) = (char)MA.get_Form(ma);
                    *(_AttributeBufferPtr++) = (char)MA.get_PronounType(ma);
                }
                    #endregion
                    break;

                case PosTaggerOutputType.Numeral:
                    #region
                {
                    *(_AttributeBufferPtr++) = (char)MA.get_Case(pw->morphoAttribute);
                }
                    #endregion
                    break;

                case PosTaggerOutputType.Adjective:
                    #region
                {
                    var ma = pw->morphoAttribute;
                    *(_AttributeBufferPtr++) = (char)MA.get_Case(ma);
                    *(_AttributeBufferPtr++) = (char)MA.get_Number(ma);
                    *(_AttributeBufferPtr++) = (char)MA.get_Form(ma);
                }
                    #endregion
                    break;

                case PosTaggerOutputType.Preposition:
                    #region
                {
                    *(_AttributeBufferPtr++) = (char)MA.get_Case(pw->morphoAttribute);
                }
                    #endregion
                    break;

                case PosTaggerOutputType.Conjunction:
                    #region
                {
                    *(_AttributeBufferPtr++) = (char)MA.get_ConjunctionType(pw->morphoAttribute);
                }
                    #endregion
                    break;

                default:
                    #region
                {
                    *(_AttributeBufferPtr++) = (char)MA.U_BYTE;
                }
                    #endregion
                    break;
                }
            }
                #endregion
                break;

            //y – искомое значение
            case 'y':
                #region
            {
                *(_AttributeBufferPtr++) = O;     //SINTAXINPUTTYPE_OTHER == "O"
            }
                #endregion
                break;

                #if DEBUG
            default: throw (new InvalidDataException("Invalid column-name: '" + crfAttribute.AttributeName + "'"));
                #endif
            }
        }
예제 #2
0
        private void AppendAttrValue(int wordIndex, CRFAttribute crfAttribute)
        {
            /*
             * s – часть речи;
             * z – морфоатрибуты (для каждой части речи свои значения согласно таблице)
             * y – искомое значение.
             */

            switch (crfAttribute.AttributeName)
            {
            //s – часть речи;
            case 's':
                #region
            {
                var index = wordIndex + crfAttribute.Position;
                *(_AttributeBufferPtr++) = (_PinnedWordsBufferPtrBase + index)->posTaggerOutputType.ToCrfByte();
            }
                #endregion
                break;

            //z – морфоатрибуты (для каждой части речи свои значения согласно таблице)
            case 'z':
                #region
            {
                var index = wordIndex + crfAttribute.Position;
                var pw    = (_PinnedWordsBufferPtrBase + index);

                switch (pw->posTaggerOutputType)
                {
                case PosTaggerOutputType.Noun:
                    #region
                {
                    var ma = pw->morphoAttribute;
                    *(_AttributeBufferPtr++) = MA.get_Case(ma);
                    *(_AttributeBufferPtr++) = MA.get_Number(ma);
                    *(_AttributeBufferPtr++) = MA.get_NounType(ma);
                }
                    #endregion
                    break;

                case PosTaggerOutputType.Verb:
                case PosTaggerOutputType.Infinitive:
                case PosTaggerOutputType.AdverbialParticiple:
                case PosTaggerOutputType.AuxiliaryVerb:
                case PosTaggerOutputType.Participle:
                    #region
                {
                    var ma = pw->morphoAttribute;
                    *(_AttributeBufferPtr++) = MA.get_Case(ma);
                    *(_AttributeBufferPtr++) = MA.get_Number(ma);
                    *(_AttributeBufferPtr++) = MA.get_Mood(ma);
                    *(_AttributeBufferPtr++) = MA.get_Voice(ma);
                    *(_AttributeBufferPtr++) = MA.get_VerbTransitivity(ma);
                }
                    #endregion
                    break;

                case PosTaggerOutputType.Pronoun:
                case PosTaggerOutputType.AdjectivePronoun:
                case PosTaggerOutputType.PossessivePronoun:
                case PosTaggerOutputType.AdverbialPronoun:
                    #region
                {
                    var ma = pw->morphoAttribute;
                    *(_AttributeBufferPtr++) = MA.get_Case(ma);
                    *(_AttributeBufferPtr++) = MA.get_Number(ma);
                    *(_AttributeBufferPtr++) = MA.get_Form(ma);
                    *(_AttributeBufferPtr++) = MA.get_PronounType(ma);
                }
                    #endregion
                    break;

                case PosTaggerOutputType.Numeral:
                    #region
                {
                    *(_AttributeBufferPtr++) = MA.get_Case(pw->morphoAttribute);
                }
                    #endregion
                    break;

                case PosTaggerOutputType.Adjective:
                    #region
                {
                    var ma = pw->morphoAttribute;
                    *(_AttributeBufferPtr++) = MA.get_Case(ma);
                    *(_AttributeBufferPtr++) = MA.get_Number(ma);
                    *(_AttributeBufferPtr++) = MA.get_Form(ma);
                }
                    #endregion
                    break;

                case PosTaggerOutputType.Preposition:
                    #region
                {
                    *(_AttributeBufferPtr++) = MA.get_Case(pw->morphoAttribute);
                }
                    #endregion
                    break;

                case PosTaggerOutputType.Conjunction:
                    #region
                {
                    *(_AttributeBufferPtr++) = MA.get_ConjunctionType(pw->morphoAttribute);
                }
                    #endregion
                    break;

                default:
                    #region
                {
                    *(_AttributeBufferPtr++) = MA.U_BYTE;
                }
                    #endregion
                    break;
                }
            }
                #endregion
                break;

            //y – искомое значение
            case 'y':
                #region
            {
                *(_AttributeBufferPtr++) = O;     //SINTAXINPUTTYPE_OTHER == "O"
            }
                #endregion
                break;

                #if DEBUG
            default: throw (new InvalidDataException("Invalid column-name: '" + crfAttribute.AttributeName + "'"));
                #endif
            }
        }