Ejemplo n.º 1
0
 public OpenNLPTokenizer(AttributeFactory factory, TextReader reader, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp) // LUCENENET: Added reader param for compatibility with 4.8 - remove when upgrading
     : base(factory, reader, new OpenNLPSentenceBreakIterator(sentenceOp))
 {
     if (sentenceOp == null || tokenizerOp == null)
     {
         throw new ArgumentException("OpenNLPTokenizer: both a Sentence Detector and a Tokenizer are required");
     }
     this.sentenceOp  = sentenceOp;
     this.tokenizerOp = tokenizerOp;
     this.termAtt     = AddAttribute <ICharTermAttribute>();
     this.flagsAtt    = AddAttribute <IFlagsAttribute>();
     this.offsetAtt   = AddAttribute <IOffsetAttribute>();
 }
Ejemplo n.º 2
0
 public override Tokenizer Create(AttributeFactory factory, TextReader reader)
 {
     try
     {
         NLPSentenceDetectorOp sentenceOp  = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile);
         NLPTokenizerOp        tokenizerOp = OpenNLPOpsFactory.GetTokenizer(tokenizerModelFile);
         return(new OpenNLPTokenizer(factory, reader, sentenceOp, tokenizerOp));
     }
     catch (IOException e)
     {
         throw new Exception(e.ToString(), e);
     }
 }
Ejemplo n.º 3
0
 public override Tokenizer Create(AttributeFactory factory, TextReader reader)
 {
     try
     {
         NLPSentenceDetectorOp sentenceOp  = OpenNLPOpsFactory.GetSentenceDetector(sentenceModelFile);
         NLPTokenizerOp        tokenizerOp = OpenNLPOpsFactory.GetTokenizer(tokenizerModelFile);
         return(new OpenNLPTokenizer(factory, reader, sentenceOp, tokenizerOp));
     }
     catch (Exception e) when(e.IsIOException())
     {
         throw RuntimeException.Create(e);
     }
 }
Ejemplo n.º 4
0
 public OpenNLPTokenizer(AttributeFactory factory, TextReader reader, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp) // LUCENENET: Added reader param for compatibility with 4.8 - remove when upgrading
     : base(factory, reader, new OpenNLPSentenceBreakIterator(sentenceOp))
 {
     // LUCENENET specific - changed from IllegalArgumentException to ArgumentNullException (.NET convention) and refactored to throw on each one separately
     if (sentenceOp is null)
     {
         throw new ArgumentNullException(nameof(sentenceOp), "OpenNLPTokenizer: both a Sentence Detector and a Tokenizer are required");
     }
     if (tokenizerOp is null)
     {
         throw new ArgumentNullException(nameof(tokenizerOp), "OpenNLPTokenizer: both a Sentence Detector and a Tokenizer are required");
     }
     //this.sentenceOp = sentenceOp; // LUCENENET: Never read
     this.tokenizerOp = tokenizerOp;
     this.termAtt     = AddAttribute <ICharTermAttribute>();
     this.flagsAtt    = AddAttribute <IFlagsAttribute>();
     this.offsetAtt   = AddAttribute <IOffsetAttribute>();
 }
Ejemplo n.º 5
0
 /// <summary>
 /// Creates a new <see cref="OpenNLPTokenizer"/> </summary>
 public OpenNLPTokenizer(TextReader reader, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp) // LUCENENET 4.8.0 specific overload to default AttributeFactory
     : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, sentenceOp, tokenizerOp)
 {
 }