public void SetNextPrev(word_t next, NerOutputType nerOutputType) { nerNext = next; next.nerPrev = this; //System.Diagnostics.Debug.Assert( next != this ); this.nerOutputType = next.nerOutputType = nerOutputType; }
private void TryCreateWordAndPut2List() { const int THAT_LENGTH = 2; const int DUSH_LENGTH = 1; const int PARTICLE_THAT_LENGTH = DUSH_LENGTH + THAT_LENGTH; const string THAT_UPPER = "ТО"; const string THAT_LOWER = "то"; const string DUSH = "-"; const char T_RU = 'Т'; const char O_RU = 'О'; if (_Length != 0) { #region [.to upper invariant & pos-tagger-list & etc.] var startPtr = _BASE + _StartIndex; if (_WordToUpperBufferSize < _Length) { ReAllocWordToUpperBuffer(_Length); } for (int i = 0; i < _Length; i++) { *(_WordToUpperBufferPtrBase + i) = *(_UIM + *(startPtr + i)); } var valueUpper = new string( _WordToUpperBufferPtrBase, 0, _Length ); #region [.detect particle-that.] var len = _Length - PARTICLE_THAT_LENGTH; if (0 < len) { //ends-with "-ТО" if ((*(_CTM + *(_WordToUpperBufferPtrBase + len)) & CharType.IsHyphen) == CharType.IsHyphen && (*(_WordToUpperBufferPtrBase + ++len) == T_RU) && (*(_WordToUpperBufferPtrBase + ++len) == O_RU) ) { //need split particle-that if (!_ParticleThatExclusion.Contains(valueUpper)) { #region [.create word with split particle-that.] len = _Length - PARTICLE_THAT_LENGTH; valueUpper = new string( _WordToUpperBufferPtrBase, 0, len ); #region [.create word without particle-that.] var _valueOriginal = new string( _BASE, _StartIndex, len ); var _word = new word_t() { startIndex = _StartIndex, length = len, valueOriginal = _valueOriginal, valueUpper = valueUpper, }; #endregion #region [.nerInputType.] _word.nerInputType = _NerInputTypeProcessor.GetNerInputType(startPtr, len); #endregion #region [.posTaggerInputType.] //if ( _Make_PosTagger ) { var _result = _PosTaggerInputTypeProcessor.GetResult(startPtr, len, _word); _word.posTaggerInputType = _result.posTaggerInputType; _word.posTaggerExtraWordType = _result.posTaggerExtraWordType; _word.posTaggerFirstCharIsUpper = ((*(_CTM + *startPtr) & CharType.IsUpper) == CharType.IsUpper); _word.posTaggerLastValueUpperInNumeralChain = (_result.posTaggerLastValueUpperInNumeralChainIsValueOriginal) ? _word.valueUpper : _result.posTaggerLastValueUpperInNumeralChain; } #endregion #region [.put-2-list.] _Words.Add(_word); #endregion #region [.create word particle-that.] var startIndex = _StartIndex + len; //_valueOriginal = new string( _BASE, startIndex, 1 ); _word = new word_t() { startIndex = startIndex, length = DUSH_LENGTH, valueOriginal = DUSH, valueUpper = DUSH, posTaggerInputType = PosTaggerInputType.Dush, posTaggerExtraWordType = PosTaggerExtraWordType.Punctuation, //nerInputType = NerInputType.O, }; _Words.Add(_word); startIndex++; //valueUpper = new string( chars_ptr, len, 2 ); //_valueOriginal = new string( _BASE, startIndex, 2 ); _word = new word_t() { startIndex = startIndex, length = THAT_LENGTH, valueOriginal = THAT_LOWER, valueUpper = THAT_UPPER, posTaggerInputType = PosTaggerInputType.O, //nerInputType = NerInputType.O, }; _Words.Add(_word); #endregion #region [.inctement start-index.] _StartIndex += _Length; _Length = 0; #endregion #region [.return.] return; #endregion #endregion } } } #endregion #endregion #region [.create word.] var valueOriginal = new string( _BASE, _StartIndex, _Length ); var word = new word_t() { startIndex = _StartIndex, length = _Length, valueOriginal = valueOriginal, valueUpper = valueUpper, }; #endregion #region [.nerInputType.] word.nerInputType = _NerInputTypeProcessor.GetNerInputType(_BASE + _StartIndex, _Length); #endregion #region [.posTaggerInputType.] //if ( _Make_PosTagger ) { var result = _PosTaggerInputTypeProcessor.GetResult(startPtr, _Length, word); word.posTaggerInputType = result.posTaggerInputType; word.posTaggerExtraWordType = result.posTaggerExtraWordType; word.posTaggerFirstCharIsUpper = ((*(_CTM + *startPtr) & CharType.IsUpper) == CharType.IsUpper); word.posTaggerLastValueUpperInNumeralChain = (result.posTaggerLastValueUpperInNumeralChainIsValueOriginal) ? word.valueUpper : result.posTaggerLastValueUpperInNumeralChain; } #endregion #region [.put-2-list.] _Words.Add(word); #endregion #region [.inctement start-index.] _StartIndex += _Length; _Length = 0; #endregion } }