Beispiel #1
0
 /// <summary>
 /// Initializes a new instance of the <see cref="SentenceDetectorEvaluator"/> class.
 /// </summary>
 /// <param name="sentenceDetector">The sentence detector.</param>
 /// <param name="listeners">The evaluation sample listeners.</param>
 public SentenceDetectorEvaluator(
     ISentenceDetector sentenceDetector,
     params IEvaluationMonitor <SentenceSample>[] listeners) : base(listeners)
 {
     this.sentenceDetector = sentenceDetector;
     FMeasure = new FMeasure <Span>();
 }
Beispiel #2
0
        /// <summary>
        /// Summarizes the specified input using the specified <paramref name="sentenceDetector"/> and <paramref name="tokenizer"/>.
        /// </summary>
        /// <param name="input">The input string to be summarized.</param>
        /// <param name="sentenceDetector">The sentence detector.</param>
        /// <param name="tokenizer">The tokenizer.</param>
        /// <returns>The summarized string.</returns>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="sentenceDetector"/>
        /// or
        /// <paramref name="tokenizer"/>
        /// </exception>
        public string Summarize(string input, ISentenceDetector sentenceDetector, ITokenizer tokenizer)
        {
            if (string.IsNullOrEmpty(input))
            {
                return(string.Empty);
            }

            if (sentenceDetector == null)
            {
                throw new ArgumentNullException(nameof(sentenceDetector));
            }

            if (tokenizer == null)
            {
                throw new ArgumentNullException(nameof(tokenizer));
            }

            var doc = new Document("x-unspecified", input);
            var anl = new AggregateAnalyzer {
                new SentenceDetectorAnalyzer(sentenceDetector),
                new TokenizerAnalyzer(tokenizer)
            };

            anl.Analyze(doc);

            return(ProcessSummarization(doc));
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="AbstractAnalyzer" /> with the specified weight.
        /// </summary>
        /// <param name="sentenceDetector">The sentence detector.</param>
        /// <param name="weight">The analyzer weight.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="sentenceDetector"/>
        /// </exception>
        public SentenceDetectorAnalyzer(ISentenceDetector sentenceDetector, float weight)
            : base(weight) {
            if (sentenceDetector == null)
                throw new ArgumentNullException("sentenceDetector");

            SentenceDetector = sentenceDetector;
        }
 public WikimediaSentencesReader(List <string> localDumpFilePaths, Predicate <string> pageFilterer, WikiMarkupCleaner wikiMarkupCleaner,
                                 ISentenceDetector sentenceDetector)
 {
     this.xmlDumpFileReaders = localDumpFilePaths.Select(dp => new XmlDumpFileReader(dp)).ToList();
     currentReaderIndex      = 0;
     this.pageFilterer       = pageFilterer;
     this.sentenceDetector   = sentenceDetector;
     this.wikiMarkupCleaner  = wikiMarkupCleaner;
 }
Beispiel #5
0
        public NLPToolsController()
        {
            string modelPath = @"C:\Users\Garrett\Documents\Visual Studio 2015\Projects\MindysTermExtractionLibrary\src\sharpnlp-nbin-files\";

            sentenceDetector = new EnglishMaximumEntropySentenceDetector(modelPath + "EnglishSD.nbin");
            tokenizer        = new EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin");
            posTagger        = new EnglishMaximumEntropyPosTagger(modelPath + "EnglishPOS.nbin");
            phraseChunker    = new EnglishTreebankChunker(modelPath + "EnglishChunk.nbin");
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="AbstractAnalyzer" /> with the specified weight.
        /// </summary>
        /// <param name="sentenceDetector">The sentence detector.</param>
        /// <param name="weight">The analyzer weight.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="sentenceDetector"/>
        /// </exception>
        public SentenceDetectorAnalyzer(ISentenceDetector sentenceDetector, float weight)
            : base(weight)
        {
            if (sentenceDetector == null)
            {
                throw new ArgumentNullException("sentenceDetector");
            }

            SentenceDetector = sentenceDetector;
        }
Beispiel #7
0
 /// <summary>
 /// Initializes a new instance of the <see cref="Pipeline"/> class.
 /// </summary>
 /// <param name="normalizerManager">The normalizer manager.</param>
 /// <param name="pOSTagger">The p os tagger.</param>
 /// <param name="sentenceDetector">The sentence detector.</param>
 /// <param name="stemmer">The stemmer.</param>
 /// <param name="stopWordsManager">The stop words manager.</param>
 /// <param name="tokenizer">The tokenizer.</param>
 /// <param name="featureExtractor">The feature extractor.</param>
 /// <param name="textSummarizer">The text summarizer.</param>
 /// <param name="entityFinder">The entity finder.</param>
 /// <exception cref="ArgumentNullException">
 /// normalizerManager or pOSTagger or sentenceDetector or stemmer or stopWordsManager or
 /// tokenizer or featureExtractor or textSummarizer
 /// </exception>
 public Pipeline(
     INormalizerManager normalizerManager,
     IPOSTagger pOSTagger,
     ISentenceDetector sentenceDetector,
     IStemmer stemmer,
     IStopWordsManager stopWordsManager,
     ITokenizer tokenizer,
     IFeatureExtractor featureExtractor,
     ITextSummarizer textSummarizer,
     IEntityFinder entityFinder)
 {
     NormalizerManager = normalizerManager ?? throw new ArgumentNullException(nameof(normalizerManager));
     POSTagger         = pOSTagger ?? throw new ArgumentNullException(nameof(pOSTagger));
     SentenceDetector  = sentenceDetector ?? throw new ArgumentNullException(nameof(sentenceDetector));
     Stemmer           = stemmer ?? throw new ArgumentNullException(nameof(stemmer));
     StopWordsManager  = stopWordsManager ?? throw new ArgumentNullException(nameof(stopWordsManager));
     Tokenizer         = tokenizer ?? throw new ArgumentNullException(nameof(tokenizer));
     FeatureExtractor  = featureExtractor ?? throw new ArgumentNullException(nameof(featureExtractor));
     TextSummarizer    = textSummarizer ?? throw new ArgumentNullException(nameof(textSummarizer));
     EntityFinder      = entityFinder ?? throw new ArgumentNullException(nameof(entityFinder));
     SetLanguage(Languages.English);
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="SentenceDetectorAnalyzer" /> using the default weight value.
 /// </summary>
 /// <param name="sentenceDetector">The sentence detector used to analyze the sentences.</param>
 /// <exception cref="System.ArgumentNullException">
 /// <paramref name="sentenceDetector"/>
 /// </exception>
 public SentenceDetectorAnalyzer(ISentenceDetector sentenceDetector) : this(sentenceDetector, 0f)
 {
 }
        /// <summary>
        /// Summarizes the specified input using the specified <paramref name="sentenceDetector"/> and <paramref name="tokenizer"/>.
        /// </summary>
        /// <param name="input">The input string to be summarized.</param>
        /// <param name="sentenceDetector">The sentence detector.</param>
        /// <param name="tokenizer">The tokenizer.</param>
        /// <returns>The summarized string.</returns>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="sentenceDetector"/>
        /// or
        /// <paramref name="tokenizer"/>
        /// </exception>
        public string Summarize(string input, ISentenceDetector sentenceDetector, ITokenizer tokenizer) {
            if (string.IsNullOrEmpty(input))
                return string.Empty;

            if (sentenceDetector == null)
                throw new ArgumentNullException("sentenceDetector");

            if (tokenizer == null)
                throw new ArgumentNullException("tokenizer");

            var doc = new Document("x-unspecified", input);
            var anl = new AggregateAnalyzer {
                new SentenceDetectorAnalyzer(sentenceDetector),
                new TokenizerAnalyzer(tokenizer)
            };

            anl.Analyze(doc);

            return ProcessSummarization(doc);
        }
Beispiel #10
0
 public EnglishTextProcessor(ISentenceDetectorFactory sdFact, ITokenizerFactory tokenizerFact, IStemmerFactory stemmerFact) : base(Language.English)
 {
     sd        = sdFact.GetSentenceDetector(Language);
     tokenizer = tokenizerFact.GetTokenizer(Language);
     stemmer   = stemmerFact.GetStemmer(Language);
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="SentenceDetectorAnalyzer" /> using the default weight value.
 /// </summary>
 /// <param name="sentenceDetector">The sentence detector used to analyze the sentences.</param>
 /// <exception cref="System.ArgumentNullException">
 /// <paramref name="sentenceDetector"/>
 /// </exception>
 public SentenceDetectorAnalyzer(ISentenceDetector sentenceDetector) : this(sentenceDetector, 0f) { }
 /// <summary>
 /// Initializes a new instance of the <see cref="SentenceDetectorEvaluator"/> class.
 /// </summary>
 /// <param name="sentenceDetector">The sentence detector.</param>
 /// <param name="listeners">The evaluation sample listeners.</param>
 public SentenceDetectorEvaluator(
     ISentenceDetector sentenceDetector,
     params IEvaluationMonitor <SentenceSample>[] listeners) : base(listeners)
 {
     this.sentenceDetector = sentenceDetector;
 }