public void Run(List <Word> words) { #region init if (!Init(words)) { return; } var wordsCount = words.Count; var wordsCount_Minus1 = wordsCount - 1; #if DEBUG var sb_attr_debug = new StringBuilder(); #endif #endregion native.crf_tagger_beginAddItemSequence(_tagger); #region put attr values to crf for (var wordIndex = 0; wordIndex < wordsCount; wordIndex++) { native.crf_tagger_beginAddItemAttribute(_tagger); #region process crf attributes by word native.crf_tagger_addItemAttributeNameOnly(_tagger, XlatUnsafe.Inst.PosInputtypeOtherPtrBase); #if DEBUG sb_attr_debug.Append(PosTaggerInputType.O.ToText()).Append('\t'); #endif var ngrams = _crfTemplateFile.GetCRFNgramsWhichCanTemplateBeApplied(wordIndex, wordsCount); for (int i = 0, ngramsLength = ngrams.Length; i < ngramsLength; i++) { var ngram = ngrams[i]; _attributeBufferPtr = ngram.CopyAttributesHeaderChars(_attributeBufferPtrBase); #region build attr values switch (ngram.CRFAttributesLength) { case 1: { AppendAttrValue(wordIndex, ngram.CRFAttribute_0); } break; case 2: { AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_attributeBufferPtr++) = VERTICAL_SLASH; AppendAttrValue(wordIndex, ngram.CRFAttribute_1); } break; case 3: { AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_attributeBufferPtr++) = VERTICAL_SLASH; AppendAttrValue(wordIndex, ngram.CRFAttribute_1); *(_attributeBufferPtr++) = VERTICAL_SLASH; AppendAttrValue(wordIndex, ngram.CRFAttribute_2); } break; default: { for (var j = 0; j < ngram.CRFAttributesLength; j++) { var crfAttr = ngram.CRFAttributes[j]; AppendAttrValue(wordIndex, crfAttr); *(_attributeBufferPtr++) = VERTICAL_SLASH; } // Удалить последний '|' _attributeBufferPtr--; } break; } #endregion #region add attr values *(_attributeBufferPtr++) = '\0'; var attr_len_with_zero = Math.Min(ATTRIBUTE_MAX_LENGTH, (int)(_attributeBufferPtr - _attributeBufferPtrBase)); UTF8_ENCODING.GetBytes(_attributeBufferPtrBase, attr_len_with_zero, _UTF8BufferPtrBase, UTF8_BUFFER_SIZE); native.crf_tagger_addItemAttributeNameOnly(_tagger, _UTF8BufferPtrBase); #endregion } #region BOS & EOS if (wordIndex == 0) { native.crf_tagger_addItemAttributeNameOnly(_tagger, XlatUnsafe.Inst.BeginOfSentencePtrBase); } else if (wordIndex == wordsCount_Minus1) { native.crf_tagger_addItemAttributeNameOnly(_tagger, XlatUnsafe.Inst.EndOfSentencePtrBase); } #endregion #endregion native.crf_tagger_endAddItemAttribute(_tagger); } #endregion native.crf_tagger_endAddItemSequence(_tagger); native.crf_tagger_tag(_tagger); #region get crf tagging data System.Diagnostics.Debug.Assert(native.crf_tagger_getResultLength(_tagger) == wordsCount, "(native.crf_tagger_getResultLength( _Tagger ) != _WordsCount)"); for (var i = 0; i < wordsCount; i++) { var ptr = native.crf_tagger_getResultValue(_tagger, (uint)i); var value = (byte *)ptr.ToPointer(); words[i].posTaggerOutputType = PosTaggerExtensions.ToPosTaggerOutputType(value); //free pinned-gcHandle (_pinnedWordsBufferPtrBase + i)->gcHandle.Free(); } #endregion }
public void Run(List <word_t> words) { #region [.init.] if (!Init(words)) { return; } var wordsCount = words.Count; var wordsCount_Minus1 = wordsCount - 1; #if DEBUG var sb_attr_debug = new StringBuilder(); #endif #endregion native.crf_tagger_beginAddItemSequence(_Tagger); #region [.put-attr-values-to-crf.] for (var wordIndex = 0; wordIndex < wordsCount; wordIndex++) { #region [.commented. debug-assert.] /* * var _w = _Words[ wordIndex ]; * System.Diagnostics.Debug.Assert( _w.valueUpper.ToUpperInvariant() == _w.valueUpper * , "(_w.valueUpper.ToUpperInvariant() != _w.valueUpper) => '" + * _w.valueOriginal + '\'' ); * * System.Diagnostics.Debug.Assert( (_w.valueUpper != null) && * (_w.valueOriginal.ToUpperInvariant() == _w.valueUpper) * , "(_w.valueUpper == null) || " + * "(_w.valueOriginal.ToUpperInvariant() != _w.valueUpper) => '" + * _w.valueOriginal + '\'' ); */ #endregion native.crf_tagger_beginAddItemAttribute(_Tagger); #region [.process-crf-attributes-by-word.] native.crf_tagger_addItemAttributeNameOnly(_Tagger, xlat_Unsafe.Inst._PosInputtypeOtherPtrBase); #if DEBUG sb_attr_debug.Append(PosTaggerInputType.O.ToText()).Append('\t'); #endif var ngrams = _CrfTemplateFile.GetCRFNgramsWhichCanTemplateBeApplied(wordIndex, wordsCount); for (int i = 0, ngramsLength = ngrams.Length; i < ngramsLength; i++) { var ngram = ngrams[i]; _AttributeBufferPtr = ngram.CopyAttributesHeaderChars(_AttributeBufferPtrBase); #region [.build attr-values.] switch (ngram.CRFAttributesLength) { case 1: #region { AppendAttrValue(wordIndex, ngram.CRFAttribute_0); } #endregion break; case 2: #region { AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_AttributeBufferPtr++) = VERTICAL_SLASH; AppendAttrValue(wordIndex, ngram.CRFAttribute_1); } #endregion break; case 3: #region { AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_AttributeBufferPtr++) = VERTICAL_SLASH; AppendAttrValue(wordIndex, ngram.CRFAttribute_1); *(_AttributeBufferPtr++) = VERTICAL_SLASH; AppendAttrValue(wordIndex, ngram.CRFAttribute_2); } #endregion break; default: #region { for (var j = 0; j < ngram.CRFAttributesLength; j++) { var crfAttr = ngram.CRFAttributes[j]; AppendAttrValue(wordIndex, crfAttr); *(_AttributeBufferPtr++) = VERTICAL_SLASH; } // Удалить последний '|' _AttributeBufferPtr--; } #endregion break; } #endregion #region [.add-attr-values.] *(_AttributeBufferPtr++) = '\0'; var attr_len_with_zero = Math.Min(ATTRIBUTE_MAX_LENGTH, (int)(_AttributeBufferPtr - _AttributeBufferPtrBase)); UTF8_ENCODING.GetBytes(_AttributeBufferPtrBase, attr_len_with_zero, _UTF8BufferPtrBase, UTF8_BUFFER_SIZE); //var bytesWritten = UTF8_ENCODER.GetBytes( attr_ptr, attr_len, utf8buffer, UTF8_BUFFER_SIZE, true ); native.crf_tagger_addItemAttributeNameOnly(_Tagger, _UTF8BufferPtrBase); #if DEBUG var s_debug = new string( _AttributeBufferPtrBase, 0, attr_len_with_zero - 1 ); sb_attr_debug.Append(s_debug).Append('\t'); #endif #endregion } #region [.BOS-&-EOS.] if (wordIndex == 0) { native.crf_tagger_addItemAttributeNameOnly(_Tagger, xlat_Unsafe.Inst._BeginOfSentencePtrBase); #if DEBUG sb_attr_debug.Append(xlat_Unsafe.BEGIN_OF_SENTENCE).Append('\t'); #endif } else if (wordIndex == wordsCount_Minus1) { native.crf_tagger_addItemAttributeNameOnly(_Tagger, xlat_Unsafe.Inst._EndOfSentencePtrBase); #if DEBUG sb_attr_debug.Append(xlat_Unsafe.END_OF_SENTENCE).Append('\t'); #endif } #endregion #endregion native.crf_tagger_endAddItemAttribute(_Tagger); #if DEBUG sb_attr_debug.Append('\n'); #endif } #endregion native.crf_tagger_endAddItemSequence(_Tagger); #if DEBUG var attr_debug = sb_attr_debug.ToString(); #endif #region [.run-crf-tagging-words.] native.crf_tagger_tag(_Tagger); #endregion #region [.get-crf-tagging-data.] System.Diagnostics.Debug.Assert(native.crf_tagger_getResultLength(_Tagger) == wordsCount, "(native.crf_tagger_getResultLength( _Tagger ) != _WordsCount)"); for (var i = 0; i < wordsCount; i++) { var ptr = native.crf_tagger_getResultValue(_Tagger, (uint)i); var value = (byte *)ptr.ToPointer(); words[i].posTaggerOutputType = PosTaggerExtensions.ToPosTaggerOutputType(value); //free pinned-gcHandle (_PinnedWordsBufferPtrBase + i)->gcHandle.Free(); } #endregion #region [.un-init.] //Uninit(); #endregion }
public void Run(IList <word_t> words) { #region [.init.] if (!Init(words)) { return; } var wordsCount = words.Count; var wordsCount_Minus1 = wordsCount - 1; #if DEBUG var sb_attr_debug = new StringBuilder(); #endif #endregion Native.crf_tagger_beginAddItemSequence(_Tagger); #region [.put-attr-values-to-crf.] for (int wordIndex = 0; wordIndex < wordsCount; wordIndex++) { Native.crf_tagger_beginAddItemAttribute(_Tagger); #region [.process-crf-attributes-by-word.] Native.crf_tagger_addItemAttributeNameOnly(_Tagger, xlat_Unsafe.Inst._PosInputtypeOtherPtrBase); #if DEBUG sb_attr_debug.Append(PosTaggerInputType.O.ToText()).Append('\t'); #endif var ngrams = _CrfTemplateFile.GetCRFNgramsWhichCanTemplateBeApplied(wordIndex, wordsCount); for (int i = 0, ngramsLength = ngrams.Length; i < ngramsLength; i++) { var ngram = ngrams[i]; _AttributeBufferPtr = ngram.CopyAttributesHeaderChars(_AttributeBufferPtrBase); #region [.build attr-values.] switch (ngram.CRFAttributesLength) { case 1: #region { AppendAttrValue(wordIndex, ngram.CRFAttribute_0); } #endregion break; case 2: #region { AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_AttributeBufferPtr++) = VERTICAL_SLASH; AppendAttrValue(wordIndex, ngram.CRFAttribute_1); } #endregion break; case 3: #region { AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_AttributeBufferPtr++) = VERTICAL_SLASH; AppendAttrValue(wordIndex, ngram.CRFAttribute_1); *(_AttributeBufferPtr++) = VERTICAL_SLASH; AppendAttrValue(wordIndex, ngram.CRFAttribute_2); } #endregion break; default: #region { for (var j = 0; j < ngram.CRFAttributesLength; j++) { var crfAttr = ngram.CRFAttributes[j]; AppendAttrValue(wordIndex, crfAttr); *(_AttributeBufferPtr++) = VERTICAL_SLASH; } // Удалить последний '|' _AttributeBufferPtr--; } #endregion break; } #endregion #region [.add-attr-values.] *(_AttributeBufferPtr++) = ZERO; Native.crf_tagger_addItemAttributeNameOnly(_Tagger, _AttributeBufferPtrBase); #if DEBUG var attr_len_with_zero = (int)(_AttributeBufferPtr - _AttributeBufferPtrBase); var s_debug = new string((sbyte *)_AttributeBufferPtrBase, 0, attr_len_with_zero - 1 ); sb_attr_debug.Append(s_debug).Append('\t'); #endif #endregion } #region [.BOS-&-EOS.] if (wordIndex == 0) { Native.crf_tagger_addItemAttributeNameOnly(_Tagger, xlat_Unsafe.Inst._BeginOfSentencePtrBase); #if DEBUG sb_attr_debug.Append(xlat_Unsafe.BEGIN_OF_SENTENCE).Append('\t'); #endif } else if (wordIndex == wordsCount_Minus1) { Native.crf_tagger_addItemAttributeNameOnly(_Tagger, xlat_Unsafe.Inst._EndOfSentencePtrBase); #if DEBUG sb_attr_debug.Append(xlat_Unsafe.END_OF_SENTENCE).Append('\t'); #endif } #endregion #endregion Native.crf_tagger_endAddItemAttribute(_Tagger); #if DEBUG sb_attr_debug.Append('\n'); #endif } #endregion Native.crf_tagger_endAddItemSequence(_Tagger); #if DEBUG var attr_debug = sb_attr_debug.ToString(); #endif #region [.run-crf-tagging-words.] Native.crf_tagger_tag(_Tagger); #endregion #region [.get-crf-tagging-data.] for (uint i = 0, len = Native.crf_tagger_getResultLength(_Tagger); i < len; i++) { var ptr = Native.crf_tagger_getResultValue(_Tagger, i); var value = (byte *)ptr.ToPointer(); words[(int)i].syntaxRoleType = SyntaxExtensions.ToSyntaxRoleType(value); } #endregion #region [.un-init.] //Uninit(); #endregion }