public void TestSummarizeExampleText() { string sample = @"Automatic text summarization is the technique which automatically creates an abstract or summary of a text. The technique has been developed for many years. According to Hovy and Lin there are two ways to view text summarization either as text extraction or text abstraction. Text extraction means to extract pieces of an original text on a statistical basis or with heuristic methods and put them together into a shorter text with the same information content. Sometimes, the extracted fragments are post-edited, for example by deleting subordinate clauses or joining incomplete clauses to form complete clauses. Text abstraction is to parse the original text in a linguistic way, interpret the text and find new concepts to describe the text and then generate a new shorter text with the same information content. This is in many aspects similar to what human abstractors do when writing an abstract, using surface level information like headings, key phrases, positions and so on."; string summary = Summarizer.Summarize(sample); Assert.NotNull(summary); Assert.IsInstanceOf <string>(summary); Assert.IsNotEmpty(summary); Console.WriteLine(summary); }
/// <summary> /// Summarizes the specified text. /// </summary> /// <param name="text">The text.</param> /// <param name="percentage">The percentage of sentences to have in the final text.</param> /// <param name="language">The language.</param> /// <returns>The summarized text</returns> public Document Summarize(Document text, float percentage, TextSummarizationLanguage language) { if (!Summarizers.TryGetValue(language, out var Summarizer)) { return(text); } return(Summarizer.Summarize(text, percentage)); }
/// <summary> /// Summarizes the specified text. /// </summary> /// <param name="text">The text.</param> /// <param name="sentenceCount">The number of sentences to have in the final text.</param> /// <param name="language">The language.</param> /// <returns>The summarized text</returns> public Document Summarize(Document text, int sentenceCount, TextSummarizationLanguage language) { if (!Summarizers.TryGetValue(language, out var Summarizer)) { return(text); } return(Summarizer.Summarize(text, sentenceCount)); }
private static void Main() { SummarizedDocument summarizedDocument = Summarizer.Summarize( new FileContentProvider(Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "TextualData\\AutomaticSummarization.txt")), new SummarizerArguments { Language = "en", MaxSummarySentences = 5 }); string summery = string.Join(Environment.NewLine, summarizedDocument.Sentences); Console.Write(summery); Console.ReadLine(); }
private static void Main() { SummarizerArguments summarizerArguments = new SummarizerArguments { InputFile = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "TextualData", "AutomaticSummarization.txt") }; SummarizedDocument summarizedDocument = Summarizer.Summarize(summarizerArguments); string summery = string.Join(Environment.NewLine, summarizedDocument.Sentences); Console.Write(summery); Console.ReadLine(); }
private string SummarizeText(string text) { // Set text summarize arguments SummarizerArguments sumargs = new SummarizerArguments { DictionaryLanguage = "en", DisplayLines = 5, DisplayPercent = 0, InputFile = "", InputString = text }; // Summarize text SummarizedDocument doc = Summarizer.Summarize(sumargs); // Return result return(string.Join("\r\n\r\n", doc.Sentences.ToArray())); }
public static string OpenText(string inputText, int maxSummSentences) { var sw = Stopwatch.StartNew(); var args = new SummarizerArguments { MaxSummarySentences = maxSummSentences, MaxSummarySizeInPercent = 100 }; var summarizedDocument = Summarizer.Summarize(new DirectTextContentProvider(inputText), args); var sb = new StringBuilder(); sb.AppendLine($"Summarised content in {sw.ElapsedMilliseconds} ms"); sb.AppendLine(" ===== Summary =============================== "); summarizedDocument.Sentences.ForEach(s => sb.AppendLine(string.Format("{0}\r\n", s))); sb.AppendLine(" ===== Keywords =============================== "); summarizedDocument.Concepts.ForEach(c => sb.AppendLine(string.Format("\t{0}", c))); return(sb.ToString()); }
private void SummarizeButton_Click(object sender, EventArgs e) { int sentCount = 1; int.TryParse(numericUpDown1.Text, out sentCount); SummarizerArguments sumargs = new SummarizerArguments { DictionaryLanguage = "en", DisplayLines = sentCount, DisplayPercent = 0, InputFile = "", InputString = OriginalTextBox.Text }; SummarizedDocument doc = Summarizer.Summarize(sumargs); string summary = string.Join("\r\n\r\n", doc.Sentences.ToArray()); SummaryTextBox.Text = summary; }
public void TestPipelineWithSourceFile() { // TODO: rewrite unit test to handle all the methods in a file // For now, just pull the first method from the file and proceed as TestPipelineXMLSnipper(). var srcmlMethod = MethodExtractor.ExtractAllMethodsFromFile("../Sample Methods/sample methods.cpp").First().Item3; // Verify the method definition Assert.IsInstanceOf <MethodDefinition>(srcmlMethod, "MethodDefinition found."); Console.WriteLine(srcmlMethod.ToString()); // Extract SUnit Statements from MethodDefinition var statements = SUnitExtractor.ExtractAll(srcmlMethod).ToList(); // verify the statements selected Assert.IsNotEmpty(statements, "statements selected from method definition"); Console.WriteLine(statements.ToString()); // Translate Statements into SUnits List <SUnit> sunits = statements.ConvertAll( new Converter <Statement, SUnit>(SUnitTranslator.Translate)); // verify sunits have been translated Assert.That(sunits.TrueForAll(s => s.action != null), "All SUnits initialized."); Console.WriteLine(sunits.ToString()); // Generate text from SUnits List <string> sentences = sunits.ConvertAll( new Converter <SUnit, string>(TextGenerator.GenerateText)); // verify string generated Assert.That(sentences.TrueForAll(s => s.Length > 0)); Console.WriteLine(sentences); // Collect text and summarize var methodDocument = String.Join <string>(" ", sentences); var summary = Summarizer.Summarize(methodDocument); // verify summary Assert.That(!summary.Equals("")); Console.WriteLine(summary); }
private void button3_Click(object sender, EventArgs e) { string path = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\00\\000.xml"; string path_data_docCollection = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\data\\00_" + maxNoDoc + "_docCollection.xml"; string path_data_mainCLusterNodeList = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\data\\00_" + maxNoDoc + "_mainCLusterNodeList.xml"; if (cboxDataSet.Text == "Reu_01") { path = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\Reu_01\\reut2-000_small.xml"; path_data_docCollection = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\data\\01_" + maxNoDoc + "_docCollection.xml"; path_data_mainCLusterNodeList = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\data\\01_" + maxNoDoc + "_mainCLusterNodeList.xml"; } else if (cboxDataSet.Text == "Re0") { } else { //this is test ds so already assigned } if (File.Exists(path_data_docCollection) && File.Exists(path_data_mainCLusterNodeList)) { docCollection.DocumentList = DeSerializeObject <List <string> >(path_data_docCollection); mainCLusterNodeList = DeSerializeObject <List <ClusterNode> >(path_data_mainCLusterNodeList); } else { XmlDocument doc2 = new XmlDocument(); doc2.Load(path); XmlNodeList nodelist = doc2.DocumentElement.SelectNodes("/main/REUTERS"); int n = 1; int sentCount = 5; foreach (XmlNode node in nodelist) { ClusterNode item = new ClusterNode(); item.cLusters = new List <string>(); XmlNode body = node.SelectSingleNode("TEXT/BODY"); if (body == null) { continue; } string text = body.InnerText; if (body.InnerText.Length > 400) { SummarizerArguments sumargs = new SummarizerArguments { DictionaryLanguage = "en", DisplayLines = sentCount, DisplayPercent = 0, InputFile = "", InputString = body.InnerText }; SummarizedDocument doc = Summarizer.Summarize(sumargs); string summary = string.Join("\r\n\r\n", doc.Sentences.ToArray()); text = summary; int len = summary.Length; } XmlNodeList nodelistPLACES = node.SelectNodes("PLACES/D"); foreach (XmlNode place in nodelistPLACES) { item.cLusters.Add(place.InnerText); } //XmlNodeList nodelistTOPICS = node.SelectNodes("TOPICS/D"); //foreach (XmlNode topic in nodelistTOPICS) //{ // item.cLusters.Add(topic.InnerText); //} docCollection.DocumentList.Add(text); item.id = n; item.content = text; //item.cLuster = cLuster.InnerXml; mainCLusterNodeList.Add(item); n++; if (n > maxNoDoc) { break; } } SerializeObject(docCollection.DocumentList, path_data_docCollection); SerializeObject(mainCLusterNodeList, path_data_mainCLusterNodeList); } int totalDoc = 0; if (int.TryParse(docCollection.DocumentList.Count.ToString(), out totalDoc)) { lblTotalDoc.Text = totalDoc.ToString(); } MessageBox.Show("Done"); }
//TODO : Make summarizer send completion data , pass Bg worker as argument private void BackgroundWorker_DoWork(object sender, DoWorkEventArgs e) { ThreadObjectHelper toh = (ThreadObjectHelper)e.Argument; Summarizer.Summarize(toh._files.ToList(), toh._outputFolderPath, bgw, ColorTranslator.ToHtml(toh._identifierColour), ColorTranslator.ToHtml(toh._dataTypeColour)); }
public void TestPipelineXMLSnippet() { // SrcML sample method string srcmlOutput = @"<function><type><name> bool </name></type> <name> findInFiles </name><parameter_list> () </parameter_list> <block>{ <decl_stmt><decl><type><specifier>const</specifier> <name>TCHAR</name> <modifier>*</modifier></type><name>dir2Search</name> <init>= <expr><call><name><name>_findReplaceDlg</name><operator>.</operator><name>getDir2Search</name></name><argument_list>()</argument_list></call></expr></init></decl>;</decl_stmt> <expr_stmt><expr><call><name>findFilesInOut</name><argument_list>()</argument_list></call></expr>;</expr_stmt> <if>if <condition>(<expr><operator>!</operator><name><name>dir2Search</name><index>[<expr><literal type=""number"">0</literal></expr>]</index></name> <operator>||</operator> <operator>!</operator><call><name><operator>::</operator><name>PathFileExists</name></name><argument_list>(<argument><expr><name>dir2Search</name></expr></argument>)</argument_list></call></expr>)</condition><then> <block>{ <return>return <expr><literal type = ""boolean"" > false </literal></expr>;</return> }</block></then></if> <decl_stmt><decl><type><name>string</name></type> <name>findString</name> <init>= <expr><literal type = ""string"" > """" </literal ></expr ></init ></decl >;</decl_stmt> <expr_stmt><expr><call><name>gethurry</name><argument_list>()</argument_list></call></expr>;</expr_stmt> <macro><name>findInOne</name><argument_list>(<argument>int a</argument>, <argument>findString</argument>)</argument_list></macro><empty_stmt>;</empty_stmt> <decl_stmt><decl><type><name>bool</name></type> <name>isRecursive</name> <init>= <expr><call><name><name>_findReplaceDlg</name><operator >.</operator><name>isRecursive</name></name><argument_list>()</argument_list></call></expr></init></decl>;</decl_stmt> <decl_stmt><decl><type><name>bool</name></type> <name>isInHiddenDir</name> <init>= <expr><call><name><name>_findReplaceDlg</name><operator >.</operator><name>isInHiddenDir</name></name><argument_list>()</argument_list></call></expr></init></decl>;</decl_stmt> <if>if <condition>(<expr><call><name><name>a</name><operator >.</operator><name>size</name></name><argument_list>()</argument_list></call> <operator >==</operator> <literal type = ""number"" > 0 </literal></expr>)</condition><then> <block>{ <expr_stmt><expr><call><name><name>a</name><operator >.</operator><name>setFindInFilesDirFilter</name></name><argument_list>(<argument><expr><literal type = ""string""> ""dddd"" </literal ></expr ></argument>, <argument><expr><call><name>TEXT</name><argument_list>(<argument><expr><literal type = ""string"" > ""*.*"" </literal ></expr></argument>)</argument_list></call></expr></argument>)</argument_list></call></expr>;</expr_stmt> <expr_stmt><expr><call><name><name>a</name><operator >.</operator><name>getPatterns</name></name><argument_list>(<argument><expr><name>findString</name></expr></argument>)</argument_list></call></expr>;</expr_stmt> }</block></then></if> <return>return <expr><literal type = ""boolean"" > true </literal ></expr>;</return> }</block></function>"; // Convert raw string to MethodDefinition var fileSetup = new SrcMLFileUnitSetup(Language.CPlusPlus); var parser = new CPlusPlusCodeParser(); var fileUnit = fileSetup.GetFileUnitForXmlSnippet(srcmlOutput, "sampletestmethods.cpp"); var scope = parser.ParseFileUnit(fileUnit); var srcmlMethod = scope.GetDescendants <MethodDefinition>().First(); // Verify the method definition Assert.IsInstanceOf <MethodDefinition>(srcmlMethod, "MethodDefinition found."); Console.WriteLine(srcmlMethod.ToString()); // Extract SUnit Statements from MethodDefinition var statements = new List <Statement>(); statements.AddRange(SUnitExtractor.ExtractEnding(srcmlMethod)); statements.AddRange(SUnitExtractor.ExtractSameAction(srcmlMethod)); statements.AddRange(SUnitExtractor.ExtractVoidReturn(srcmlMethod)); // verify the statements selected Assert.IsNotEmpty(statements, "statements selected from method definition"); Console.WriteLine(statements.ToString()); // Translate Statements into SUnits List <SUnit> sunits = statements.ConvertAll( new Converter <Statement, SUnit> (SUnitTranslator.Translate)); // verify sunits have been translated Assert.That(sunits.TrueForAll(s => s.action != null), "All SUnits initialized."); Console.WriteLine(sunits.ToString()); // Generate text from SUnits List <string> sentences = sunits.ConvertAll( new Converter <SUnit, string> (TextGenerator.GenerateText)); // verify string generated Assert.That(sentences.TrueForAll(s => s.Length > 0)); Console.WriteLine(sentences); // Collect text and summarize var methodDocument = String.Join <string>(" ", sentences); var summary = Summarizer.Summarize(methodDocument); // verify summary Assert.That(!summary.Equals("")); Console.WriteLine(summary); }