コード例 #1
0
        unsafe public int CreateCrfInputFormatFile(TextWriter textWriter,
                                                   TextReader textReader,
                                                   ProcessSentCallbackDelegate processSentCallback,
                                                   int sentNumberCallbackStep)
        {
            var sentNumber = 1;
            var lineNumber = 1;

            for (; ;)
            {
                var r = ReadNextSent(textReader, ref lineNumber);
                if (!r)
                {
                    break;
                }

                if (0 < _words.Count)
                {
                    _posTaggerScriber.WriteCrfAttributesWords4ModelBuilder(textWriter, _words);
                }

                if ((sentNumber % sentNumberCallbackStep) == 0)
                {
                    processSentCallback(sentNumber);
                }
                sentNumber++;
            }
            if ((sentNumber % sentNumberCallbackStep) != 0)
            {
                processSentCallback(sentNumber);
            }

            return(sentNumber);
        }
コード例 #2
0
 public int Build(TextReader textReader,
                  ProcessSentCallbackDelegate processSentCallback,
                  int sentNumberCallbackStep,
                  ProcessXmlErrorSentCallbackDelegate processXmlErrorSentCallback,
                  StartBuildCallbackDelegate startBuildCallback)
 {
     throw (new NotImplementedException("learner. not used"));
 }
コード例 #3
0
        public void Run(string text, bool splitBySmiles, ProcessSentCallbackDelegate processSentCallback)
        {
            _OuterProcessSentCallback_Delegate = processSentCallback;

            fixed(char *_base = text)
            {
                _BASE = _base;

                _SentSplitter.SplitBySmiles = splitBySmiles;
                _SentSplitter.AllocateSents(text, _SentSplitterProcessSentCallback_Delegate);
            }

            _OuterProcessSentCallback_Delegate = null;
        }
コード例 #4
0
        public void run(string text, bool splitBySmiles, ProcessSentCallbackDelegate processSentCallback)
        {
            _ProcessSentCallback = processSentCallback;

            fixed(char *_base = text)
            fixed(CharType * ctm     = xlat.CHARTYPE_MAP)
            fixed(NERCharType * nctm = NER_CHARTYPE_MAP)
            {
                _BASE = _base;
                _CTM  = ctm;
                _NCTM = nctm;

                _SentSplitter.SplitBySmiles = splitBySmiles;
                _SentSplitter.AllocateSents(text, ProcessSentSplitterCallback);
            }

            _ProcessSentCallback = null;
        }
コード例 #5
0
        /*
         * public int Build( TextReader                          textReader,
         *                ProcessSentCallbackDelegate         processSentCallback,
         *                int                                 sentNumberCallbackStep,
         *                ProcessXmlErrorSentCallbackDelegate processXmlErrorSentCallback,
         *                StartBuildCallbackDelegate          startBuildCallback )
         * {
         *  var sentNumber = 1;
         *  for ( var line = textReader.ReadLine(); line != null; line = textReader.ReadLine() )
         *  {
         *      var xe = ToXElement( line, sentNumber );
         *      if ( xe != null )
         *      {
         *          var words = from n in xe.Nodes()
         *                      from word in CreateWords( n )
         *                      select word;
         *
         *          _Words.Clear();
         *          _Words.AddRange( words );
         *
         *          _NerCRFSuiteModelBuilderAdapter.AppendWords( _Words );
         *      }
         *      else
         *      {
         *          processXmlErrorSentCallback( line, sentNumber );
         *      }
         *
         *
         *      if ( (sentNumber % sentNumberCallbackStep) == 0 )
         *      {
         *          processSentCallback( sentNumber );
         *      }
         *      sentNumber++;
         *  }
         *  if ( (sentNumber % sentNumberCallbackStep) != 0 )
         *  {
         *      processSentCallback( sentNumber );
         *  }
         *
         *  startBuildCallback();
         *
         *  _NerCRFSuiteModelBuilderAdapter.Build();
         *
         *  return (sentNumber);
         * }
         */
        #endregion

        public int CreateCrfInputFormatFile(TextWriter textWriter,
                                            TextReader textReader,
                                            ProcessSentCallbackDelegate processSentCallback,
                                            int sentNumberCallbackStep,
                                            ProcessXmlErrorSentCallbackDelegate processXmlErrorSentCallback)
        {
            var sentNumber = 1;

            for (var line = textReader.ReadLine(); line != null; line = textReader.ReadLine())
            {
                var xe = ToXElement(line, sentNumber);
                if (xe != null)
                {
                    var words = from n in xe.Nodes()
                                from word in CreateWords(n)
                                select word;

                    _Words.Clear();
                    _Words.AddRange(words);

                    if (0 < _Words.Count)
                    {
                        _NerScriber.WriteCrfAttributesWords4ModelBuilder(textWriter, _Words);
                    }
                }
                else
                {
                    processXmlErrorSentCallback(line, sentNumber);
                }

                if ((sentNumber % sentNumberCallbackStep) == 0)
                {
                    processSentCallback(sentNumber);
                }
                sentNumber++;
            }
            if ((sentNumber % sentNumberCallbackStep) != 0)
            {
                processSentCallback(sentNumber);
            }

            return(sentNumber);
        }
コード例 #6
0
        public List <buildmodel_word_t> run4ModelBuilder(
            string partOfSentText,
            bool isLastPartOfSentText,
            NerOutputType nerOutputType,
            bool prevPartOfSentTextSameNerOutputType)
        {
            _BuildModelWords.Clear();
            if (prevPartOfSentTextSameNerOutputType)
            {
                _BuildModelNerInputTypeI = nerOutputType.ToBuildModelNerInputTypeI();
                _BuildModelNerInputTypeB = _BuildModelNerInputTypeI;
            }
            else
            {
                _BuildModelNerInputTypeI = nerOutputType.ToBuildModelNerInputTypeI();
                _BuildModelNerInputTypeB = nerOutputType.ToBuildModelNerInputTypeB();
            }

            _ProcessSentCallback = ProcessSentCallbackModelBuilder;

            fixed(char *_base = partOfSentText)
            fixed(CharType * ctm     = xlat.CHARTYPE_MAP)
            fixed(NERCharType * nctm = NER_CHARTYPE_MAP)
            {
                _BASE = _base;
                _CTM  = ctm;
                _NCTM = nctm;
                _NotSkipNonLetterAndNonDigitToTheEnd = !isLastPartOfSentText;

                var urls = _UrlDetector.AllocateUrls(partOfSentText);

                _BuildModelSent.Set4ModelBuilder(0, partOfSentText.Length, (0 < urls.Count) ? urls : null);

                ProcessSentSplitterCallback(_BuildModelSent);

                _NotSkipNonLetterAndNonDigitToTheEnd = false;
            }

            _ProcessSentCallback = null;

            return(_BuildModelWords);
        }
コード例 #7
0
ファイル: Tokenizer.cs プロジェクト: lavrenkova/sw-auto
        public List <Buildmodel_word_t> Run4ModelBuilder(
            string partOfSentText,
            bool isLastPartOfSentText,
            NerOutputType nerOutputType,
            bool prevPartOfSentTextSameNerOutputType)
        {
            _buildModelWords.Clear();
            if (prevPartOfSentTextSameNerOutputType)
            {
                _buildModelNerInputTypeI = nerOutputType.ToBuildModelNerInputTypeI();
                _buildModelNerInputTypeB = _buildModelNerInputTypeI;
            }
            else
            {
                _buildModelNerInputTypeI = nerOutputType.ToBuildModelNerInputTypeI();
                _buildModelNerInputTypeB = nerOutputType.ToBuildModelNerInputTypeB();
            }

            _outerProcessSentCallbackDelegate = ProcessSentCallbackModelBuilder;

            fixed(char *_base = partOfSentText)
            {
                _BASE = _base;
                _dontSkipNonLetterAndNonDigitToTheEnd = !isLastPartOfSentText;

                var urls = _urlDetector.AllocateUrls(partOfSentText);

                _buildModelSentence.Set4ModelBuilder(0, partOfSentText.Length, (0 < urls.Count) ? urls : null);

                SentSplitterProcessSentCallback(_buildModelSentence);

                _dontSkipNonLetterAndNonDigitToTheEnd = false;
            }

            _outerProcessSentCallbackDelegate = null;

            return(_buildModelWords);
        }