Exemplo n.º 1
0
        public void   SetNextPrev(word_t next, NerOutputType nerOutputType)
        {
            nerNext      = next;
            next.nerPrev = this;

            //System.Diagnostics.Debug.Assert( next != this );

            this.nerOutputType = next.nerOutputType = nerOutputType;
        }
Exemplo n.º 2
0
        private void TryCreateWordAndPut2List()
        {
            const int    THAT_LENGTH          = 2;
            const int    DUSH_LENGTH          = 1;
            const int    PARTICLE_THAT_LENGTH = DUSH_LENGTH + THAT_LENGTH;
            const string THAT_UPPER           = "ТО";
            const string THAT_LOWER           = "то";
            const string DUSH = "-";
            const char   T_RU = 'Т';
            const char   O_RU = 'О';

            if (_Length != 0)
            {
                #region [.to upper invariant & pos-tagger-list & etc.]
                var startPtr = _BASE + _StartIndex;

                if (_WordToUpperBufferSize < _Length)
                {
                    ReAllocWordToUpperBuffer(_Length);
                }
                for (int i = 0; i < _Length; i++)
                {
                    *(_WordToUpperBufferPtrBase + i) = *(_UIM + *(startPtr + i));
                }

                var valueUpper = new string( _WordToUpperBufferPtrBase, 0, _Length );

                #region [.detect particle-that.]
                var len = _Length - PARTICLE_THAT_LENGTH;
                if (0 < len)
                {
                    //ends-with "-ТО"
                    if ((*(_CTM + *(_WordToUpperBufferPtrBase + len)) & CharType.IsHyphen) == CharType.IsHyphen &&
                        (*(_WordToUpperBufferPtrBase + ++len) == T_RU) &&
                        (*(_WordToUpperBufferPtrBase + ++len) == O_RU)
                        )
                    {
                        //need split particle-that
                        if (!_ParticleThatExclusion.Contains(valueUpper))
                        {
                            #region [.create word with split particle-that.]
                            len        = _Length - PARTICLE_THAT_LENGTH;
                            valueUpper = new string( _WordToUpperBufferPtrBase, 0, len );

                            #region [.create word without particle-that.]
                            var _valueOriginal = new string( _BASE, _StartIndex, len );
                            var _word          = new word_t()
                            {
                                startIndex    = _StartIndex,
                                length        = len,
                                valueOriginal = _valueOriginal,
                                valueUpper    = valueUpper,
                            };
                            #endregion

                            #region [.nerInputType.]
                            _word.nerInputType = _NerInputTypeProcessor.GetNerInputType(startPtr, len);
                            #endregion

                            #region [.posTaggerInputType.]
                            //if ( _Make_PosTagger )
                            {
                                var _result = _PosTaggerInputTypeProcessor.GetResult(startPtr, len, _word);
                                _word.posTaggerInputType                    = _result.posTaggerInputType;
                                _word.posTaggerExtraWordType                = _result.posTaggerExtraWordType;
                                _word.posTaggerFirstCharIsUpper             = ((*(_CTM + *startPtr) & CharType.IsUpper) == CharType.IsUpper);
                                _word.posTaggerLastValueUpperInNumeralChain = (_result.posTaggerLastValueUpperInNumeralChainIsValueOriginal)
                                                                              ? _word.valueUpper : _result.posTaggerLastValueUpperInNumeralChain;
                            }
                            #endregion

                            #region [.put-2-list.]
                            _Words.Add(_word);
                            #endregion

                            #region [.create word particle-that.]
                            var startIndex = _StartIndex + len;
                            //_valueOriginal = new string( _BASE, startIndex, 1 );
                            _word = new word_t()
                            {
                                startIndex             = startIndex,
                                length                 = DUSH_LENGTH,
                                valueOriginal          = DUSH,
                                valueUpper             = DUSH,
                                posTaggerInputType     = PosTaggerInputType.Dush,
                                posTaggerExtraWordType = PosTaggerExtraWordType.Punctuation,
                                //nerInputType           = NerInputType.O,
                            };
                            _Words.Add(_word);

                            startIndex++;
                            //valueUpper     = new string( chars_ptr, len, 2 );
                            //_valueOriginal = new string( _BASE, startIndex, 2 );
                            _word = new word_t()
                            {
                                startIndex         = startIndex,
                                length             = THAT_LENGTH,
                                valueOriginal      = THAT_LOWER,
                                valueUpper         = THAT_UPPER,
                                posTaggerInputType = PosTaggerInputType.O,
                                //nerInputType           = NerInputType.O,
                            };
                            _Words.Add(_word);
                            #endregion

                            #region [.inctement start-index.]
                            _StartIndex += _Length;
                            _Length      = 0;
                            #endregion

                            #region [.return.]
                            return;

                            #endregion
                            #endregion
                        }
                    }
                }
                #endregion
                #endregion

                #region [.create word.]
                var valueOriginal = new string( _BASE, _StartIndex, _Length );
                var word          = new word_t()
                {
                    startIndex    = _StartIndex,
                    length        = _Length,
                    valueOriginal = valueOriginal,
                    valueUpper    = valueUpper,
                };
                #endregion

                #region [.nerInputType.]
                word.nerInputType = _NerInputTypeProcessor.GetNerInputType(_BASE + _StartIndex, _Length);
                #endregion

                #region [.posTaggerInputType.]
                //if ( _Make_PosTagger )
                {
                    var result = _PosTaggerInputTypeProcessor.GetResult(startPtr, _Length, word);
                    word.posTaggerInputType                    = result.posTaggerInputType;
                    word.posTaggerExtraWordType                = result.posTaggerExtraWordType;
                    word.posTaggerFirstCharIsUpper             = ((*(_CTM + *startPtr) & CharType.IsUpper) == CharType.IsUpper);
                    word.posTaggerLastValueUpperInNumeralChain = (result.posTaggerLastValueUpperInNumeralChainIsValueOriginal)
                                                                 ? word.valueUpper : result.posTaggerLastValueUpperInNumeralChain;
                }
                #endregion

                #region [.put-2-list.]
                _Words.Add(word);
                #endregion

                #region [.inctement start-index.]
                _StartIndex += _Length;
                _Length      = 0;
                #endregion
            }
        }