/// <summary> /// Summarizes the specified input using the specified <paramref name="sentenceDetector"/> and <paramref name="tokenizer"/>. /// </summary> /// <param name="input">The input string to be summarized.</param> /// <param name="sentenceDetector">The sentence detector.</param> /// <param name="tokenizer">The tokenizer.</param> /// <returns>The summarized string.</returns> /// <exception cref="System.ArgumentNullException"> /// <paramref name="sentenceDetector"/> /// or /// <paramref name="tokenizer"/> /// </exception> public string Summarize(string input, ISentenceDetector sentenceDetector, ITokenizer tokenizer) { if (string.IsNullOrEmpty(input)) { return(string.Empty); } if (sentenceDetector == null) { throw new ArgumentNullException(nameof(sentenceDetector)); } if (tokenizer == null) { throw new ArgumentNullException(nameof(tokenizer)); } var doc = new Document("x-unspecified", input); var anl = new AggregateAnalyzer { new SentenceDetectorAnalyzer(sentenceDetector), new TokenizerAnalyzer(tokenizer) }; anl.Analyze(doc); return(ProcessSummarization(doc)); }
public static NaturalLanguageData AnalyzeMessage(Chat chat) { var document = new Document("en", chat.message); var sentences = new List <Sentence>(); try { analyzer.Analyze(document); foreach (var s in document.Sentences) { var sentence = new Sentence(); sentence.tokens = new List <Token>(); foreach (var t in s.Tokens) { var token = new Token(); token.POSTag = t.POSTag; token.Lexeme = t.Lexeme; token.Stem = wordStemmer.GetSteamWord(t.Lexeme); sentence.tokens.Add(token); } sentence.chunks = new List <Chunk>(); foreach (var c in s.Chunks) { var chunk = new Chunk(); chunk.tag = c.Tag; chunk.tokens = new List <Token>(); foreach (var t in c.Tokens) { var token = new Token(); token.POSTag = t.POSTag; token.Lexeme = t.Lexeme; token.Stem = wordStemmer.GetSteamWord(t.Lexeme); chunk.tokens.Add(token); } sentence.chunks.Add(chunk); } sentence.interrogative = isInterrogative(s); sentence.triplets = tripletService.GetSentenceTriplets(sentence); sentences.Add(sentence); } } catch (AnalyzerException) { } var naturalLanguageData = new NaturalLanguageData(); naturalLanguageData.sentences = sentences; return(naturalLanguageData); }
public void TestEverything() { var doc = new Document("en", "Bart, with $10,000, we'd be millionaires! We could buy all kinds of useful things like... love!"); analyzer.Analyze(doc); Assert.NotNull(doc); Assert.AreEqual(2, doc.Sentences.Count); Assert.AreEqual(true, doc.IsTokenized); Assert.AreEqual(true, doc.IsTagged); Assert.AreEqual(true, doc.IsChunked); Assert.AreEqual(true, doc.IsParsed); Assert.AreEqual(1, doc.Sentences[0].Entities.Count); }
/// <summary> /// Summarizes the specified input using the specified <paramref name="sentenceDetector"/> and <paramref name="tokenizer"/>. /// </summary> /// <param name="input">The input string to be summarized.</param> /// <param name="sentenceDetector">The sentence detector.</param> /// <param name="tokenizer">The tokenizer.</param> /// <returns>The summarized string.</returns> /// <exception cref="System.ArgumentNullException"> /// <paramref name="sentenceDetector"/> /// or /// <paramref name="tokenizer"/> /// </exception> public string Summarize(string input, ISentenceDetector sentenceDetector, ITokenizer tokenizer) { if (string.IsNullOrEmpty(input)) return string.Empty; if (sentenceDetector == null) throw new ArgumentNullException("sentenceDetector"); if (tokenizer == null) throw new ArgumentNullException("tokenizer"); var doc = new Document("x-unspecified", input); var anl = new AggregateAnalyzer { new SentenceDetectorAnalyzer(sentenceDetector), new TokenizerAnalyzer(tokenizer) }; anl.Analyze(doc); return ProcessSummarization(doc); }
public IList <string> ScrapeMovieTitles(string actorWikipediaUrl, string[] ignoreWords) { var result = new List <string>(); var web = new HtmlWeb(); var page = web.Load(actorWikipediaUrl); //read the content of "Career" section of the article var careerNode = page.DocumentNode.SelectSingleNode("//*/h2[starts-with(.,'Career')]"); if (careerNode != null) { string text = ""; var node = careerNode.NextSibling; while (node != null && node.Name != "h2") { text += node.InnerText; node = node.NextSibling; } var doc = new SharpNL.Document("en", text); _analyzer.Analyze(doc); foreach (var sentence in doc.Sentences) { Token prevToken = null; string title = ""; bool withinTitle = false; foreach (var token in sentence.Tokens) { if (!withinTitle && IsTitleStart(token, prevToken)) { withinTitle = true; title = ""; } else if (withinTitle) { if (IsTitleEnd(token, prevToken)) { if (prevToken != null && prevToken.POSTag.IsNamePosTag()) { title = title.AddWord(prevToken.Lexeme); } if (IsMovieTitle(title)) { result.Add(title); } title = ""; withinTitle = false; } else { title = title.AddWord(prevToken.Lexeme); } } prevToken = token; } } } return(result); }