コード例 #1
0
        public void Run(List <Word> words)
        {
            #region init
            if (!Init(words))
            {
                return;
            }
            var wordsCount        = words.Count;
            var wordsCount_Minus1 = wordsCount - 1;
#if DEBUG
            var sb_attr_debug = new StringBuilder();
#endif
            #endregion

            native.crf_tagger_beginAddItemSequence(_tagger);

            #region put attr values to crf
            for (var wordIndex = 0; wordIndex < wordsCount; wordIndex++)
            {
                native.crf_tagger_beginAddItemAttribute(_tagger);

                #region process crf attributes by word
                native.crf_tagger_addItemAttributeNameOnly(_tagger, XlatUnsafe.Inst.PosInputtypeOtherPtrBase);
#if DEBUG
                sb_attr_debug.Append(PosTaggerInputType.O.ToText()).Append('\t');
#endif

                var ngrams = _crfTemplateFile.GetCRFNgramsWhichCanTemplateBeApplied(wordIndex, wordsCount);
                for (int i = 0, ngramsLength = ngrams.Length; i < ngramsLength; i++)
                {
                    var ngram = ngrams[i];

                    _attributeBufferPtr = ngram.CopyAttributesHeaderChars(_attributeBufferPtrBase);

                    #region build attr values
                    switch (ngram.CRFAttributesLength)
                    {
                    case 1:
                    {
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_0);
                    }
                    break;

                    case 2:
                    {
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_attributeBufferPtr++) = VERTICAL_SLASH;
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_1);
                    }
                    break;

                    case 3:
                    {
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_attributeBufferPtr++) = VERTICAL_SLASH;
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_1); *(_attributeBufferPtr++) = VERTICAL_SLASH;
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_2);
                    }
                    break;

                    default:
                    {
                        for (var j = 0; j < ngram.CRFAttributesLength; j++)
                        {
                            var crfAttr = ngram.CRFAttributes[j];
                            AppendAttrValue(wordIndex, crfAttr); *(_attributeBufferPtr++) = VERTICAL_SLASH;
                        }
                        // Удалить последний '|'
                        _attributeBufferPtr--;
                    }
                    break;
                    }
                    #endregion

                    #region add attr values
                    *(_attributeBufferPtr++) = '\0';
                    var attr_len_with_zero = Math.Min(ATTRIBUTE_MAX_LENGTH, (int)(_attributeBufferPtr - _attributeBufferPtrBase));
                    UTF8_ENCODING.GetBytes(_attributeBufferPtrBase, attr_len_with_zero, _UTF8BufferPtrBase, UTF8_BUFFER_SIZE);
                    native.crf_tagger_addItemAttributeNameOnly(_tagger, _UTF8BufferPtrBase);
                    #endregion
                }

                #region BOS & EOS
                if (wordIndex == 0)
                {
                    native.crf_tagger_addItemAttributeNameOnly(_tagger, XlatUnsafe.Inst.BeginOfSentencePtrBase);
                }
                else
                if (wordIndex == wordsCount_Minus1)
                {
                    native.crf_tagger_addItemAttributeNameOnly(_tagger, XlatUnsafe.Inst.EndOfSentencePtrBase);
                }
                #endregion
                #endregion

                native.crf_tagger_endAddItemAttribute(_tagger);
            }
            #endregion

            native.crf_tagger_endAddItemSequence(_tagger);

            native.crf_tagger_tag(_tagger);

            #region get crf tagging data
            System.Diagnostics.Debug.Assert(native.crf_tagger_getResultLength(_tagger) == wordsCount, "(native.crf_tagger_getResultLength( _Tagger ) != _WordsCount)");
            for (var i = 0; i < wordsCount; i++)
            {
                var ptr = native.crf_tagger_getResultValue(_tagger, (uint)i);

                var value = (byte *)ptr.ToPointer();
                words[i].posTaggerOutputType = PosTaggerExtensions.ToPosTaggerOutputType(value);

                //free pinned-gcHandle
                (_pinnedWordsBufferPtrBase + i)->gcHandle.Free();
            }
            #endregion
        }
コード例 #2
0
        public void Run(List <word_t> words)
        {
            #region [.init.]
            if (!Init(words))
            {
                return;
            }
            var wordsCount        = words.Count;
            var wordsCount_Minus1 = wordsCount - 1;
            #if DEBUG
            var sb_attr_debug = new StringBuilder();
            #endif
            #endregion

            native.crf_tagger_beginAddItemSequence(_Tagger);

            #region [.put-attr-values-to-crf.]
            for (var wordIndex = 0; wordIndex < wordsCount; wordIndex++)
            {
                #region [.commented. debug-assert.]

                /*
                 * var _w = _Words[ wordIndex ];
                 * System.Diagnostics.Debug.Assert( _w.valueUpper.ToUpperInvariant() == _w.valueUpper
                 *                             , "(_w.valueUpper.ToUpperInvariant() != _w.valueUpper) => '" +
                 *                               _w.valueOriginal + '\'' );
                 *
                 * System.Diagnostics.Debug.Assert( (_w.valueUpper != null) &&
                 *                               (_w.valueOriginal.ToUpperInvariant() == _w.valueUpper)
                 *                             , "(_w.valueUpper == null) || " +
                 *                               "(_w.valueOriginal.ToUpperInvariant() != _w.valueUpper) => '" +
                 *                               _w.valueOriginal + '\'' );
                 */
                #endregion

                native.crf_tagger_beginAddItemAttribute(_Tagger);

                #region [.process-crf-attributes-by-word.]
                native.crf_tagger_addItemAttributeNameOnly(_Tagger, xlat_Unsafe.Inst._PosInputtypeOtherPtrBase);
                #if DEBUG
                sb_attr_debug.Append(PosTaggerInputType.O.ToText()).Append('\t');
                #endif

                var ngrams = _CrfTemplateFile.GetCRFNgramsWhichCanTemplateBeApplied(wordIndex, wordsCount);
                for (int i = 0, ngramsLength = ngrams.Length; i < ngramsLength; i++)
                {
                    var ngram = ngrams[i];

                    _AttributeBufferPtr = ngram.CopyAttributesHeaderChars(_AttributeBufferPtrBase);

                    #region [.build attr-values.]
                    switch (ngram.CRFAttributesLength)
                    {
                    case 1:
                        #region
                    {
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_0);
                    }
                        #endregion
                        break;

                    case 2:
                        #region
                    {
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_AttributeBufferPtr++) = VERTICAL_SLASH;
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_1);
                    }
                        #endregion
                        break;

                    case 3:
                        #region
                    {
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_AttributeBufferPtr++) = VERTICAL_SLASH;
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_1); *(_AttributeBufferPtr++) = VERTICAL_SLASH;
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_2);
                    }
                        #endregion
                        break;

                    default:
                        #region
                    {
                        for (var j = 0; j < ngram.CRFAttributesLength; j++)
                        {
                            var crfAttr = ngram.CRFAttributes[j];
                            AppendAttrValue(wordIndex, crfAttr); *(_AttributeBufferPtr++) = VERTICAL_SLASH;
                        }
                        // Удалить последний '|'
                        _AttributeBufferPtr--;
                    }
                        #endregion
                        break;
                    }
                    #endregion

                    #region [.add-attr-values.]
                    *(_AttributeBufferPtr++) = '\0';
                    var attr_len_with_zero = Math.Min(ATTRIBUTE_MAX_LENGTH, (int)(_AttributeBufferPtr - _AttributeBufferPtrBase));
                    UTF8_ENCODING.GetBytes(_AttributeBufferPtrBase, attr_len_with_zero, _UTF8BufferPtrBase, UTF8_BUFFER_SIZE);   //var bytesWritten = UTF8_ENCODER.GetBytes( attr_ptr, attr_len, utf8buffer, UTF8_BUFFER_SIZE, true );
                    native.crf_tagger_addItemAttributeNameOnly(_Tagger, _UTF8BufferPtrBase);
                    #if DEBUG
                    var s_debug = new string( _AttributeBufferPtrBase, 0, attr_len_with_zero - 1 );
                    sb_attr_debug.Append(s_debug).Append('\t');
                    #endif
                    #endregion
                }

                #region [.BOS-&-EOS.]
                if (wordIndex == 0)
                {
                    native.crf_tagger_addItemAttributeNameOnly(_Tagger, xlat_Unsafe.Inst._BeginOfSentencePtrBase);
                    #if DEBUG
                    sb_attr_debug.Append(xlat_Unsafe.BEGIN_OF_SENTENCE).Append('\t');
                    #endif
                }
                else
                if (wordIndex == wordsCount_Minus1)
                {
                    native.crf_tagger_addItemAttributeNameOnly(_Tagger, xlat_Unsafe.Inst._EndOfSentencePtrBase);
                    #if DEBUG
                    sb_attr_debug.Append(xlat_Unsafe.END_OF_SENTENCE).Append('\t');
                    #endif
                }
                #endregion
                #endregion

                native.crf_tagger_endAddItemAttribute(_Tagger);
                #if DEBUG
                sb_attr_debug.Append('\n');
                #endif
            }
            #endregion

            native.crf_tagger_endAddItemSequence(_Tagger);
            #if DEBUG
            var attr_debug = sb_attr_debug.ToString();
            #endif

            #region [.run-crf-tagging-words.]
            native.crf_tagger_tag(_Tagger);
            #endregion

            #region [.get-crf-tagging-data.]
            System.Diagnostics.Debug.Assert(native.crf_tagger_getResultLength(_Tagger) == wordsCount, "(native.crf_tagger_getResultLength( _Tagger ) != _WordsCount)");
            for (var i = 0; i < wordsCount; i++)
            {
                var ptr = native.crf_tagger_getResultValue(_Tagger, (uint)i);

                var value = (byte *)ptr.ToPointer();
                words[i].posTaggerOutputType = PosTaggerExtensions.ToPosTaggerOutputType(value);

                //free pinned-gcHandle
                (_PinnedWordsBufferPtrBase + i)->gcHandle.Free();
            }
            #endregion

            #region [.un-init.]
            //Uninit();
            #endregion
        }
コード例 #3
0
        public void Run(IList <word_t> words)
        {
            #region [.init.]
            if (!Init(words))
            {
                return;
            }
            var wordsCount        = words.Count;
            var wordsCount_Minus1 = wordsCount - 1;
            #if DEBUG
            var sb_attr_debug = new StringBuilder();
            #endif
            #endregion

            Native.crf_tagger_beginAddItemSequence(_Tagger);

            #region [.put-attr-values-to-crf.]
            for (int wordIndex = 0; wordIndex < wordsCount; wordIndex++)
            {
                Native.crf_tagger_beginAddItemAttribute(_Tagger);

                #region [.process-crf-attributes-by-word.]
                Native.crf_tagger_addItemAttributeNameOnly(_Tagger, xlat_Unsafe.Inst._PosInputtypeOtherPtrBase);
                #if DEBUG
                sb_attr_debug.Append(PosTaggerInputType.O.ToText()).Append('\t');
                #endif

                var ngrams = _CrfTemplateFile.GetCRFNgramsWhichCanTemplateBeApplied(wordIndex, wordsCount);
                for (int i = 0, ngramsLength = ngrams.Length; i < ngramsLength; i++)
                {
                    var ngram = ngrams[i];

                    _AttributeBufferPtr = ngram.CopyAttributesHeaderChars(_AttributeBufferPtrBase);

                    #region [.build attr-values.]
                    switch (ngram.CRFAttributesLength)
                    {
                    case 1:
                        #region
                    {
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_0);
                    }
                        #endregion
                        break;

                    case 2:
                        #region
                    {
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_AttributeBufferPtr++) = VERTICAL_SLASH;
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_1);
                    }
                        #endregion
                        break;

                    case 3:
                        #region
                    {
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_0); *(_AttributeBufferPtr++) = VERTICAL_SLASH;
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_1); *(_AttributeBufferPtr++) = VERTICAL_SLASH;
                        AppendAttrValue(wordIndex, ngram.CRFAttribute_2);
                    }
                        #endregion
                        break;

                    default:
                        #region
                    {
                        for (var j = 0; j < ngram.CRFAttributesLength; j++)
                        {
                            var crfAttr = ngram.CRFAttributes[j];
                            AppendAttrValue(wordIndex, crfAttr); *(_AttributeBufferPtr++) = VERTICAL_SLASH;
                        }
                        // Удалить последний '|'
                        _AttributeBufferPtr--;
                    }
                        #endregion
                        break;
                    }
                    #endregion

                    #region [.add-attr-values.]
                    *(_AttributeBufferPtr++) = ZERO;
                    Native.crf_tagger_addItemAttributeNameOnly(_Tagger, _AttributeBufferPtrBase);
                    #if DEBUG
                    var attr_len_with_zero = (int)(_AttributeBufferPtr - _AttributeBufferPtrBase);
                    var s_debug            = new string((sbyte *)_AttributeBufferPtrBase, 0, attr_len_with_zero - 1 );
                    sb_attr_debug.Append(s_debug).Append('\t');
                    #endif
                    #endregion
                }

                #region [.BOS-&-EOS.]
                if (wordIndex == 0)
                {
                    Native.crf_tagger_addItemAttributeNameOnly(_Tagger, xlat_Unsafe.Inst._BeginOfSentencePtrBase);
                    #if DEBUG
                    sb_attr_debug.Append(xlat_Unsafe.BEGIN_OF_SENTENCE).Append('\t');
                    #endif
                }
                else
                if (wordIndex == wordsCount_Minus1)
                {
                    Native.crf_tagger_addItemAttributeNameOnly(_Tagger, xlat_Unsafe.Inst._EndOfSentencePtrBase);
                    #if DEBUG
                    sb_attr_debug.Append(xlat_Unsafe.END_OF_SENTENCE).Append('\t');
                    #endif
                }
                #endregion
                #endregion

                Native.crf_tagger_endAddItemAttribute(_Tagger);
                #if DEBUG
                sb_attr_debug.Append('\n');
                #endif
            }
            #endregion

            Native.crf_tagger_endAddItemSequence(_Tagger);
            #if DEBUG
            var attr_debug = sb_attr_debug.ToString();
            #endif

            #region [.run-crf-tagging-words.]
            Native.crf_tagger_tag(_Tagger);
            #endregion

            #region [.get-crf-tagging-data.]
            for (uint i = 0, len = Native.crf_tagger_getResultLength(_Tagger); i < len; i++)
            {
                var ptr = Native.crf_tagger_getResultValue(_Tagger, i);

                var value = (byte *)ptr.ToPointer();
                words[(int)i].syntaxRoleType = SyntaxExtensions.ToSyntaxRoleType(value);
            }
            #endregion

            #region [.un-init.]
            //Uninit();
            #endregion
        }