Beispiel #1
0
 public SentSplitterConfig(string sentSplitterResourcesXmlFilename,
                           string urlDetectorResourcesXmlFilename)
 {
     Model             = new SentSplitterModel(sentSplitterResourcesXmlFilename);
     UrlDetectorConfig = new UrlDetectorConfig(urlDetectorResourcesXmlFilename);
     SplitBySmiles     = true;
 }
        public ner_tokenizer(UrlDetectorConfig config)
        {
            config.UrlExtractMode = UrlDetector.UrlExtractModeEnum.Position;

            _UrlDetector     = new UrlDetector(config);
            _BuildModelSent  = sent_t.CreateEmpty();
            _Words           = new List <word_t>(DEFAULT_WORDSLIST_CAPACITY);
            _BuildModelWords = new List <buildmodel_word_t>(DEFAULT_WORDSLIST_CAPACITY);
        }
Beispiel #3
0
 public NerModelBuilderConfig(string tokenizerResourcesXmlFilename,
                              LanguageTypeEnum languageType,
                              UrlDetectorConfig urlDetectorConfig)
 {
     TokenizerConfig4NerModelBuilder = new TokenizerConfig4NerModelBuilder(tokenizerResourcesXmlFilename)
     {
         UrlDetectorConfig            = urlDetectorConfig,
         NerInputTypeProcessorFactory = new NerInputTypeProcessorFactory(languageType),
         LanguageType = languageType,
     };
 }
Beispiel #4
0
        public PosTaggerModelBuilder(string templateFilename,
                                     LanguageTypeEnum languageType,
                                     UrlDetectorConfig urlDetectorConfig)
        {
            templateFilename.ThrowIfNullOrWhiteSpace("templateFilename");
            urlDetectorConfig.ThrowIfNull("urlDetectorConfig");

            _posTaggerScriber            = PosTaggerScriber.Create4ModelBuilder(templateFilename);
            _posTaggerInputTypeProcessor = CreatePosTaggerInputTypeProcessor(languageType);
            _urlDetector = new UrlDetector(urlDetectorConfig);
            _words       = new List <Word>();
        }
        public ConcurrentFactory(UrlDetectorConfig config, int instanceCount)
        {
            if (instanceCount <= 0)
            {
                throw (new ArgumentException("instanceCount"));
            }

            _Semaphore = new Semaphore(instanceCount, instanceCount);
            _Stack     = new ConcurrentStack <UrlDetector>();
            for (int i = 0; i < instanceCount; i++)
            {
                _Stack.Push(new UrlDetector(config));
            }
        }
Beispiel #6
0
        public mld_tokenizer(UrlDetectorModel urlModel, int wordCapacity)
        {
            var urlConfig = new UrlDetectorConfig()
            {
                Model          = urlModel,
                UrlExtractMode = UrlDetector.UrlExtractModeEnum.Position,
            };

            _UrlDetector         = new UrlDetector(urlConfig);
            _Words               = new List <string>(Math.Max(DEFAULT_WORDCAPACITY, wordCapacity));
            _NgramsSB            = new StringBuilder();
            _AddWordToListAction = new Action <string>(AddWordToList);

            _UIM = xlat_Unsafe.Inst._UPPER_INVARIANT_MAP;
            _CTM = xlat_Unsafe.Inst._CHARTYPE_MAP;
            _IAW = UnsafeConst.Inst._INTERPRETE_AS_WHITESPACE;
            _DWC = UnsafeConst.Inst._DIGIT_WORD_CHARS;

            //--//
            ReAllocWordToUpperBuffer(DEFAULT_WORDTOUPPERBUFFER);
        }
Beispiel #7
0
            public ConcurrentFactory GetConcurrentFactory()
            {
                var f = _ConcurrentFactory;

                if (f == null)
                {
                    lock ( _SyncLock )
                    {
                        f = _ConcurrentFactory;
                        if (f == null)
                        {
                            var config = new UrlDetectorConfig(Config.URL_DETECTOR_RESOURCES_XML_FILENAME)
                            {
                                UrlExtractMode = UrlDetector.UrlExtractModeEnum.Position,
                            };
                            f = new ConcurrentFactory(config, Config.CONCURRENT_FACTORY_INSTANCE_COUNT);
                            _ConcurrentFactory = f;
                        }
                    }
                }
                return(f);
            }