Esempio n. 1
0
        public static IEnumerable <Sentence> TokenizeSentences(this ISentenceTokenizer tokenizer, TextReader textReader)
        {
            if (tokenizer == null)
            {
                throw new ArgumentNullException("tokenizer");
            }
            if (textReader == null)
            {
                throw new ArgumentNullException("textReader");
            }

            return(tokenizer.TokenizeSentences(textReader.ReadAll()));
        }
 public OpenNLPTextSplitter(ILogger <OpenNLPTextSplitter> log,
                            ILexiconConfig configuration,
                            ICachedDocumentsSource cache,
                            ISentenceTokenizerFactory tokenizerFactory,
                            ISentenceRepairHandler repairHandler,
                            IEnumerable <INamedEntityRecognition> neResolver)
     : base(log, cache)
 {
     this.log           = log ?? throw new ArgumentNullException(nameof(log));
     this.configuration = configuration ?? throw new ArgumentNullException(nameof(configuration));
     this.repairHandler = repairHandler ?? throw new ArgumentNullException(nameof(repairHandler));
     this.neResolver    = neResolver;
     log.LogDebug("Creating with resource path: {0}", configuration.Resources);
     tokenizer        = TreebankWordTokenizer.Tokenizer;
     sentenceSplitter = tokenizerFactory.Create(true, false);
     LoadModels();
 }
Esempio n. 3
0
        public static IEnumerable <Sentence> TokenizeSentences(this ISentenceTokenizer tokenizer, Stream stream)
        {
            if (tokenizer == null)
            {
                throw new ArgumentNullException("tokenizer");
            }
            if (stream == null)
            {
                throw new ArgumentNullException("stream");
            }

            using (TextReader reader = new StreamReader(stream))
                foreach (Sentence sentence in tokenizer.TokenizeSentences(reader.ReadAll()))
                {
                    yield return(sentence);
                }
        }
        public OpenNLPTextSplitter(ILogger <OpenNLPTextSplitter> log,
                                   ILexiconConfiguration configuration,
                                   ICachedDocumentsSource cache,
                                   ISentenceTokenizerFactory tokenizerFactory,
                                   ISentenceRepairHandler repairHandler)
            : base(log, cache)
        {
            if (configuration == null)
            {
                throw new ArgumentNullException(nameof(configuration));
            }

            this.log           = log ?? throw new ArgumentNullException(nameof(log));
            this.repairHandler = repairHandler ?? throw new ArgumentNullException(nameof(repairHandler));
            log.LogDebug("Creating with resource path: {0}", configuration.ResourcePath);
            tokenizer        = TreebankWordTokenizer.Tokenizer;
            sentenceSplitter = tokenizerFactory.Create(true, false);
            LoadModels(configuration.ResourcePath);
        }
 public SimpleWordsExtraction(ISentenceTokenizer tokenizer)
 {
     this.tokenizer = tokenizer ?? throw new System.ArgumentNullException(nameof(tokenizer));
 }