public OpenNLPTokenizer(AttributeFactory factory, TextReader reader, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp) // LUCENENET: Added reader param for compatibility with 4.8 - remove when upgrading : base(factory, reader, new OpenNLPSentenceBreakIterator(sentenceOp)) { if (sentenceOp == null || tokenizerOp == null) { throw new ArgumentException("OpenNLPTokenizer: both a Sentence Detector and a Tokenizer are required"); } this.sentenceOp = sentenceOp; this.tokenizerOp = tokenizerOp; this.termAtt = AddAttribute <ICharTermAttribute>(); this.flagsAtt = AddAttribute <IFlagsAttribute>(); this.offsetAtt = AddAttribute <IOffsetAttribute>(); }
public override Tokenizer Create(AttributeFactory factory, TextReader reader) { try { NLPSentenceDetectorOp sentenceOp = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile); NLPTokenizerOp tokenizerOp = OpenNLPOpsFactory.GetTokenizer(tokenizerModelFile); return(new OpenNLPTokenizer(factory, reader, sentenceOp, tokenizerOp)); } catch (IOException e) { throw new Exception(e.ToString(), e); } }
public override Tokenizer Create(AttributeFactory factory, TextReader reader) { try { NLPSentenceDetectorOp sentenceOp = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile); NLPTokenizerOp tokenizerOp = OpenNLPOpsFactory.GetTokenizer(tokenizerModelFile); return(new OpenNLPTokenizer(factory, reader, sentenceOp, tokenizerOp)); } catch (Exception e) when(e.IsIOException()) { throw RuntimeException.Create(e); } }
public OpenNLPTokenizer(AttributeFactory factory, TextReader reader, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp) // LUCENENET: Added reader param for compatibility with 4.8 - remove when upgrading : base(factory, reader, new OpenNLPSentenceBreakIterator(sentenceOp)) { // LUCENENET specific - changed from IllegalArgumentException to ArgumentNullException (.NET convention) and refactored to throw on each one separately if (sentenceOp is null) { throw new ArgumentNullException(nameof(sentenceOp), "OpenNLPTokenizer: both a Sentence Detector and a Tokenizer are required"); } if (tokenizerOp is null) { throw new ArgumentNullException(nameof(tokenizerOp), "OpenNLPTokenizer: both a Sentence Detector and a Tokenizer are required"); } //this.sentenceOp = sentenceOp; // LUCENENET: Never read this.tokenizerOp = tokenizerOp; this.termAtt = AddAttribute <ICharTermAttribute>(); this.flagsAtt = AddAttribute <IFlagsAttribute>(); this.offsetAtt = AddAttribute <IOffsetAttribute>(); }
/// <summary> /// Creates a new <see cref="OpenNLPTokenizer"/> </summary> public OpenNLPTokenizer(TextReader reader, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp) // LUCENENET 4.8.0 specific overload to default AttributeFactory : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, sentenceOp, tokenizerOp) { }