public static IEnumerable <Sentence> TokenizeSentences(this ISentenceTokenizer tokenizer, TextReader textReader) { if (tokenizer == null) { throw new ArgumentNullException("tokenizer"); } if (textReader == null) { throw new ArgumentNullException("textReader"); } return(tokenizer.TokenizeSentences(textReader.ReadAll())); }
public OpenNLPTextSplitter(ILogger <OpenNLPTextSplitter> log, ILexiconConfig configuration, ICachedDocumentsSource cache, ISentenceTokenizerFactory tokenizerFactory, ISentenceRepairHandler repairHandler, IEnumerable <INamedEntityRecognition> neResolver) : base(log, cache) { this.log = log ?? throw new ArgumentNullException(nameof(log)); this.configuration = configuration ?? throw new ArgumentNullException(nameof(configuration)); this.repairHandler = repairHandler ?? throw new ArgumentNullException(nameof(repairHandler)); this.neResolver = neResolver; log.LogDebug("Creating with resource path: {0}", configuration.Resources); tokenizer = TreebankWordTokenizer.Tokenizer; sentenceSplitter = tokenizerFactory.Create(true, false); LoadModels(); }
public static IEnumerable <Sentence> TokenizeSentences(this ISentenceTokenizer tokenizer, Stream stream) { if (tokenizer == null) { throw new ArgumentNullException("tokenizer"); } if (stream == null) { throw new ArgumentNullException("stream"); } using (TextReader reader = new StreamReader(stream)) foreach (Sentence sentence in tokenizer.TokenizeSentences(reader.ReadAll())) { yield return(sentence); } }
public OpenNLPTextSplitter(ILogger <OpenNLPTextSplitter> log, ILexiconConfiguration configuration, ICachedDocumentsSource cache, ISentenceTokenizerFactory tokenizerFactory, ISentenceRepairHandler repairHandler) : base(log, cache) { if (configuration == null) { throw new ArgumentNullException(nameof(configuration)); } this.log = log ?? throw new ArgumentNullException(nameof(log)); this.repairHandler = repairHandler ?? throw new ArgumentNullException(nameof(repairHandler)); log.LogDebug("Creating with resource path: {0}", configuration.ResourcePath); tokenizer = TreebankWordTokenizer.Tokenizer; sentenceSplitter = tokenizerFactory.Create(true, false); LoadModels(configuration.ResourcePath); }
public SimpleWordsExtraction(ISentenceTokenizer tokenizer) { this.tokenizer = tokenizer ?? throw new System.ArgumentNullException(nameof(tokenizer)); }