示例#1
0
        /// <summary>
        /// Summarizes the specified input using the specified <paramref name="sentenceDetector"/> and <paramref name="tokenizer"/>.
        /// </summary>
        /// <param name="input">The input string to be summarized.</param>
        /// <param name="sentenceDetector">The sentence detector.</param>
        /// <param name="tokenizer">The tokenizer.</param>
        /// <returns>The summarized string.</returns>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="sentenceDetector"/>
        /// or
        /// <paramref name="tokenizer"/>
        /// </exception>
        public string Summarize(string input, ISentenceDetector sentenceDetector, ITokenizer tokenizer)
        {
            if (string.IsNullOrEmpty(input))
            {
                return(string.Empty);
            }

            if (sentenceDetector == null)
            {
                throw new ArgumentNullException(nameof(sentenceDetector));
            }

            if (tokenizer == null)
            {
                throw new ArgumentNullException(nameof(tokenizer));
            }

            var doc = new Document("x-unspecified", input);
            var anl = new AggregateAnalyzer {
                new SentenceDetectorAnalyzer(sentenceDetector),
                new TokenizerAnalyzer(tokenizer)
            };

            anl.Analyze(doc);

            return(ProcessSummarization(doc));
        }
示例#2
0
        public static NaturalLanguageData AnalyzeMessage(Chat chat)
        {
            var document  = new Document("en", chat.message);
            var sentences = new List <Sentence>();

            try
            {
                analyzer.Analyze(document);
                foreach (var s in document.Sentences)
                {
                    var sentence = new Sentence();
                    sentence.tokens = new List <Token>();
                    foreach (var t in s.Tokens)
                    {
                        var token = new Token();
                        token.POSTag = t.POSTag;
                        token.Lexeme = t.Lexeme;
                        token.Stem   = wordStemmer.GetSteamWord(t.Lexeme);
                        sentence.tokens.Add(token);
                    }

                    sentence.chunks = new List <Chunk>();
                    foreach (var c in s.Chunks)
                    {
                        var chunk = new Chunk();
                        chunk.tag    = c.Tag;
                        chunk.tokens = new List <Token>();
                        foreach (var t in c.Tokens)
                        {
                            var token = new Token();
                            token.POSTag = t.POSTag;
                            token.Lexeme = t.Lexeme;
                            token.Stem   = wordStemmer.GetSteamWord(t.Lexeme);
                            chunk.tokens.Add(token);
                        }
                        sentence.chunks.Add(chunk);
                    }

                    sentence.interrogative = isInterrogative(s);

                    sentence.triplets = tripletService.GetSentenceTriplets(sentence);

                    sentences.Add(sentence);
                }
            }
            catch (AnalyzerException)
            {
            }

            var naturalLanguageData = new NaturalLanguageData();

            naturalLanguageData.sentences = sentences;

            return(naturalLanguageData);
        }
示例#3
0
        public void TestEverything()
        {
            var doc = new Document("en",
                                   "Bart, with $10,000, we'd be millionaires! We could buy all kinds of useful things like... love!");

            analyzer.Analyze(doc);

            Assert.NotNull(doc);
            Assert.AreEqual(2, doc.Sentences.Count);
            Assert.AreEqual(true, doc.IsTokenized);
            Assert.AreEqual(true, doc.IsTagged);
            Assert.AreEqual(true, doc.IsChunked);
            Assert.AreEqual(true, doc.IsParsed);
            Assert.AreEqual(1, doc.Sentences[0].Entities.Count);
        }
        /// <summary>
        /// Summarizes the specified input using the specified <paramref name="sentenceDetector"/> and <paramref name="tokenizer"/>.
        /// </summary>
        /// <param name="input">The input string to be summarized.</param>
        /// <param name="sentenceDetector">The sentence detector.</param>
        /// <param name="tokenizer">The tokenizer.</param>
        /// <returns>The summarized string.</returns>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="sentenceDetector"/>
        /// or
        /// <paramref name="tokenizer"/>
        /// </exception>
        public string Summarize(string input, ISentenceDetector sentenceDetector, ITokenizer tokenizer) {
            if (string.IsNullOrEmpty(input))
                return string.Empty;

            if (sentenceDetector == null)
                throw new ArgumentNullException("sentenceDetector");

            if (tokenizer == null)
                throw new ArgumentNullException("tokenizer");

            var doc = new Document("x-unspecified", input);
            var anl = new AggregateAnalyzer {
                new SentenceDetectorAnalyzer(sentenceDetector),
                new TokenizerAnalyzer(tokenizer)
            };

            anl.Analyze(doc);

            return ProcessSummarization(doc);
        }
示例#5
0
        public IList <string> ScrapeMovieTitles(string actorWikipediaUrl, string[] ignoreWords)
        {
            var result = new List <string>();

            var web  = new HtmlWeb();
            var page = web.Load(actorWikipediaUrl);

            //read the content of "Career" section of the article
            var careerNode = page.DocumentNode.SelectSingleNode("//*/h2[starts-with(.,'Career')]");

            if (careerNode != null)
            {
                string text = "";
                var    node = careerNode.NextSibling;
                while (node != null && node.Name != "h2")
                {
                    text += node.InnerText;
                    node  = node.NextSibling;
                }

                var doc = new SharpNL.Document("en", text);

                _analyzer.Analyze(doc);

                foreach (var sentence in doc.Sentences)
                {
                    Token  prevToken   = null;
                    string title       = "";
                    bool   withinTitle = false;
                    foreach (var token in sentence.Tokens)
                    {
                        if (!withinTitle && IsTitleStart(token, prevToken))
                        {
                            withinTitle = true;
                            title       = "";
                        }
                        else if (withinTitle)
                        {
                            if (IsTitleEnd(token, prevToken))
                            {
                                if (prevToken != null && prevToken.POSTag.IsNamePosTag())
                                {
                                    title = title.AddWord(prevToken.Lexeme);
                                }
                                if (IsMovieTitle(title))
                                {
                                    result.Add(title);
                                }
                                title       = "";
                                withinTitle = false;
                            }
                            else
                            {
                                title = title.AddWord(prevToken.Lexeme);
                            }
                        }
                        prevToken = token;
                    }
                }
            }

            return(result);
        }