Summarize() публичный статический Метод

public static Summarize ( String text ) : String
text String
Результат String
Пример #1
0
    public void TestSummarizeExampleText()
    {
        string sample = @"Automatic text summarization is the technique which 
                    automatically creates an abstract or summary of a text. The
                    technique has been developed for many years.
                    According to Hovy and Lin there are two ways to view
                    text summarization either as text extraction or text abstraction.
                    Text extraction means to extract pieces of an original text
                    on a statistical basis or with heuristic methods and put them
                    together into a shorter text with the same information content.
                    Sometimes, the extracted fragments are post-edited, for example
                    by deleting subordinate clauses or joining incomplete clauses
                    to form complete clauses. Text abstraction is to parse
                    the original text in a linguistic way, interpret the text and find
                    new concepts to describe the text and then generate a new
                    shorter text with the same information content. This is in many
                    aspects similar to what human abstractors do when writing
                    an abstract, using surface level information like headings, key
                    phrases, positions and so on.";

        string summary = Summarizer.Summarize(sample);

        Assert.NotNull(summary);
        Assert.IsInstanceOf <string>(summary);
        Assert.IsNotEmpty(summary);
        Console.WriteLine(summary);
    }
Пример #2
0
 /// <summary>
 /// Summarizes the specified text.
 /// </summary>
 /// <param name="text">The text.</param>
 /// <param name="percentage">The percentage of sentences to have in the final text.</param>
 /// <param name="language">The language.</param>
 /// <returns>The summarized text</returns>
 public Document Summarize(Document text, float percentage, TextSummarizationLanguage language)
 {
     if (!Summarizers.TryGetValue(language, out var Summarizer))
     {
         return(text);
     }
     return(Summarizer.Summarize(text, percentage));
 }
Пример #3
0
 /// <summary>
 /// Summarizes the specified text.
 /// </summary>
 /// <param name="text">The text.</param>
 /// <param name="sentenceCount">The number of sentences to have in the final text.</param>
 /// <param name="language">The language.</param>
 /// <returns>The summarized text</returns>
 public Document Summarize(Document text, int sentenceCount, TextSummarizationLanguage language)
 {
     if (!Summarizers.TryGetValue(language, out var Summarizer))
     {
         return(text);
     }
     return(Summarizer.Summarize(text, sentenceCount));
 }
Пример #4
0
        private static void Main()
        {
            SummarizedDocument summarizedDocument = Summarizer.Summarize(
                new FileContentProvider(Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "TextualData\\AutomaticSummarization.txt")),
                new SummarizerArguments
            {
                Language            = "en",
                MaxSummarySentences = 5
            });

            string summery = string.Join(Environment.NewLine, summarizedDocument.Sentences);

            Console.Write(summery);
            Console.ReadLine();
        }
Пример #5
0
        private static void Main()
        {
            SummarizerArguments summarizerArguments = new SummarizerArguments
            {
                InputFile = Path.Combine(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "TextualData", "AutomaticSummarization.txt")
            };

            SummarizedDocument summarizedDocument = Summarizer.Summarize(summarizerArguments);

            string summery = string.Join(Environment.NewLine, summarizedDocument.Sentences);

            Console.Write(summery);

            Console.ReadLine();
        }
Пример #6
0
        private string SummarizeText(string text)
        {
            // Set text summarize arguments
            SummarizerArguments sumargs = new SummarizerArguments
            {
                DictionaryLanguage = "en",
                DisplayLines       = 5,
                DisplayPercent     = 0,
                InputFile          = "",
                InputString        = text
            };

            // Summarize text
            SummarizedDocument doc = Summarizer.Summarize(sumargs);

            // Return result
            return(string.Join("\r\n\r\n", doc.Sentences.ToArray()));
        }
Пример #7
0
        public static string OpenText(string inputText, int maxSummSentences)
        {
            var sw   = Stopwatch.StartNew();
            var args = new SummarizerArguments
            {
                MaxSummarySentences     = maxSummSentences,
                MaxSummarySizeInPercent = 100
            };
            var summarizedDocument = Summarizer.Summarize(new DirectTextContentProvider(inputText), args);
            var sb = new StringBuilder();

            sb.AppendLine($"Summarised content in {sw.ElapsedMilliseconds} ms");
            sb.AppendLine(" ===== Summary =============================== ");
            summarizedDocument.Sentences.ForEach(s => sb.AppendLine(string.Format("{0}\r\n", s)));
            sb.AppendLine(" ===== Keywords =============================== ");
            summarizedDocument.Concepts.ForEach(c => sb.AppendLine(string.Format("\t{0}", c)));
            return(sb.ToString());
        }
        private void SummarizeButton_Click(object sender, EventArgs e)
        {
            int sentCount = 1;

            int.TryParse(numericUpDown1.Text, out sentCount);
            SummarizerArguments sumargs = new SummarizerArguments
            {
                DictionaryLanguage = "en",
                DisplayLines       = sentCount,
                DisplayPercent     = 0,
                InputFile          = "",
                InputString        = OriginalTextBox.Text
            };
            SummarizedDocument doc     = Summarizer.Summarize(sumargs);
            string             summary = string.Join("\r\n\r\n", doc.Sentences.ToArray());

            SummaryTextBox.Text = summary;
        }
Пример #9
0
        public void TestPipelineWithSourceFile()
        {
            // TODO: rewrite unit test to handle all the methods in a file
            // For now, just pull the first method from the file and proceed as TestPipelineXMLSnipper().
            var srcmlMethod = MethodExtractor.ExtractAllMethodsFromFile("../Sample Methods/sample methods.cpp").First().Item3;

            // Verify the method definition
            Assert.IsInstanceOf <MethodDefinition>(srcmlMethod, "MethodDefinition found.");
            Console.WriteLine(srcmlMethod.ToString());

            // Extract SUnit Statements from MethodDefinition
            var statements = SUnitExtractor.ExtractAll(srcmlMethod).ToList();

            // verify the statements selected
            Assert.IsNotEmpty(statements, "statements selected from method definition");
            Console.WriteLine(statements.ToString());

            // Translate Statements into SUnits
            List <SUnit> sunits = statements.ConvertAll(
                new Converter <Statement, SUnit>(SUnitTranslator.Translate));

            // verify sunits have been translated
            Assert.That(sunits.TrueForAll(s => s.action != null), "All SUnits initialized.");
            Console.WriteLine(sunits.ToString());


            // Generate text from SUnits
            List <string> sentences = sunits.ConvertAll(
                new Converter <SUnit, string>(TextGenerator.GenerateText));

            // verify string generated
            Assert.That(sentences.TrueForAll(s => s.Length > 0));
            Console.WriteLine(sentences);

            // Collect text and summarize
            var methodDocument = String.Join <string>(" ", sentences);
            var summary        = Summarizer.Summarize(methodDocument);


            // verify summary
            Assert.That(!summary.Equals(""));
            Console.WriteLine(summary);
        }
Пример #10
0
        private void button3_Click(object sender, EventArgs e)
        {
            string path = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\00\\000.xml";
            string path_data_docCollection       = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\data\\00_" + maxNoDoc + "_docCollection.xml";
            string path_data_mainCLusterNodeList = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\data\\00_" + maxNoDoc + "_mainCLusterNodeList.xml";


            if (cboxDataSet.Text == "Reu_01")
            {
                path = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\Reu_01\\reut2-000_small.xml";
                path_data_docCollection       = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\data\\01_" + maxNoDoc + "_docCollection.xml";
                path_data_mainCLusterNodeList = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\data\\01_" + maxNoDoc + "_mainCLusterNodeList.xml";
            }
            else if (cboxDataSet.Text == "Re0")
            {
            }
            else
            {
                //this is test ds so already assigned
            }

            if (File.Exists(path_data_docCollection) && File.Exists(path_data_mainCLusterNodeList))
            {
                docCollection.DocumentList = DeSerializeObject <List <string> >(path_data_docCollection);
                mainCLusterNodeList        = DeSerializeObject <List <ClusterNode> >(path_data_mainCLusterNodeList);
            }
            else
            {
                XmlDocument doc2 = new XmlDocument();
                doc2.Load(path);

                XmlNodeList nodelist  = doc2.DocumentElement.SelectNodes("/main/REUTERS");
                int         n         = 1;
                int         sentCount = 5;
                foreach (XmlNode node in nodelist)
                {
                    ClusterNode item = new ClusterNode();
                    item.cLusters = new List <string>();
                    XmlNode body = node.SelectSingleNode("TEXT/BODY");
                    if (body == null)
                    {
                        continue;
                    }
                    string text = body.InnerText;
                    if (body.InnerText.Length > 400)
                    {
                        SummarizerArguments sumargs = new SummarizerArguments
                        {
                            DictionaryLanguage = "en",
                            DisplayLines       = sentCount,
                            DisplayPercent     = 0,
                            InputFile          = "",
                            InputString        = body.InnerText
                        };
                        SummarizedDocument doc     = Summarizer.Summarize(sumargs);
                        string             summary = string.Join("\r\n\r\n", doc.Sentences.ToArray());
                        text = summary;
                        int len = summary.Length;
                    }
                    XmlNodeList nodelistPLACES = node.SelectNodes("PLACES/D");
                    foreach (XmlNode place in nodelistPLACES)
                    {
                        item.cLusters.Add(place.InnerText);
                    }
                    //XmlNodeList nodelistTOPICS = node.SelectNodes("TOPICS/D");
                    //foreach (XmlNode topic in nodelistTOPICS)
                    //{
                    //    item.cLusters.Add(topic.InnerText);
                    //}

                    docCollection.DocumentList.Add(text);
                    item.id      = n;
                    item.content = text;
                    //item.cLuster = cLuster.InnerXml;
                    mainCLusterNodeList.Add(item);
                    n++;
                    if (n > maxNoDoc)
                    {
                        break;
                    }
                }
                SerializeObject(docCollection.DocumentList, path_data_docCollection);
                SerializeObject(mainCLusterNodeList, path_data_mainCLusterNodeList);
            }
            int totalDoc = 0;

            if (int.TryParse(docCollection.DocumentList.Count.ToString(), out totalDoc))
            {
                lblTotalDoc.Text = totalDoc.ToString();
            }
            MessageBox.Show("Done");
        }
Пример #11
0
        //TODO : Make summarizer send completion data , pass Bg worker as argument
        private void BackgroundWorker_DoWork(object sender, DoWorkEventArgs e)
        {
            ThreadObjectHelper toh = (ThreadObjectHelper)e.Argument;

            Summarizer.Summarize(toh._files.ToList(), toh._outputFolderPath, bgw, ColorTranslator.ToHtml(toh._identifierColour), ColorTranslator.ToHtml(toh._dataTypeColour));
        }
Пример #12
0
        public void TestPipelineXMLSnippet()
        {
            // SrcML sample method
            string srcmlOutput = @"<function><type><name> bool </name></type> <name> findInFiles </name><parameter_list> () </parameter_list>
                            <block>{
	                            <decl_stmt><decl><type><specifier>const</specifier> <name>TCHAR</name> <modifier>*</modifier></type><name>dir2Search</name> <init>= <expr><call><name><name>_findReplaceDlg</name><operator>.</operator><name>getDir2Search</name></name><argument_list>()</argument_list></call></expr></init></decl>;</decl_stmt>

	                            <expr_stmt><expr><call><name>findFilesInOut</name><argument_list>()</argument_list></call></expr>;</expr_stmt>
	                            <if>if <condition>(<expr><operator>!</operator><name><name>dir2Search</name><index>[<expr><literal type=""number"">0</literal></expr>]</index></name> <operator>||</operator> <operator>!</operator><call><name><operator>::</operator><name>PathFileExists</name></name><argument_list>(<argument><expr><name>dir2Search</name></expr></argument>)</argument_list></call></expr>)</condition><then>
	                            <block>{
		                            <return>return <expr><literal type = ""boolean"" > false </literal></expr>;</return>
	                            }</block></then></if>
	                            <decl_stmt><decl><type><name>string</name></type> <name>findString</name> <init>= <expr><literal type = ""string"" > """" </literal ></expr ></init ></decl >;</decl_stmt>

	                            <expr_stmt><expr><call><name>gethurry</name><argument_list>()</argument_list></call></expr>;</expr_stmt>
	
	                            <macro><name>findInOne</name><argument_list>(<argument>int a</argument>, <argument>findString</argument>)</argument_list></macro><empty_stmt>;</empty_stmt>

	                            <decl_stmt><decl><type><name>bool</name></type> <name>isRecursive</name> <init>= <expr><call><name><name>_findReplaceDlg</name><operator >.</operator><name>isRecursive</name></name><argument_list>()</argument_list></call></expr></init></decl>;</decl_stmt>
	                            <decl_stmt><decl><type><name>bool</name></type> <name>isInHiddenDir</name> <init>= <expr><call><name><name>_findReplaceDlg</name><operator >.</operator><name>isInHiddenDir</name></name><argument_list>()</argument_list></call></expr></init></decl>;</decl_stmt>

	                            <if>if <condition>(<expr><call><name><name>a</name><operator >.</operator><name>size</name></name><argument_list>()</argument_list></call> <operator >==</operator> <literal type = ""number"" > 0 </literal></expr>)</condition><then>
	                            <block>{
		                            <expr_stmt><expr><call><name><name>a</name><operator >.</operator><name>setFindInFilesDirFilter</name></name><argument_list>(<argument><expr><literal type = ""string""> ""dddd"" </literal ></expr ></argument>, <argument><expr><call><name>TEXT</name><argument_list>(<argument><expr><literal type = ""string"" > ""*.*"" </literal ></expr></argument>)</argument_list></call></expr></argument>)</argument_list></call></expr>;</expr_stmt>
		                            <expr_stmt><expr><call><name><name>a</name><operator >.</operator><name>getPatterns</name></name><argument_list>(<argument><expr><name>findString</name></expr></argument>)</argument_list></call></expr>;</expr_stmt>
	                            }</block></then></if>
	                            <return>return <expr><literal type = ""boolean"" > true </literal ></expr>;</return>
                            }</block></function>";

            // Convert raw string to MethodDefinition
            var fileSetup = new SrcMLFileUnitSetup(Language.CPlusPlus);
            var parser    = new CPlusPlusCodeParser();

            var fileUnit = fileSetup.GetFileUnitForXmlSnippet(srcmlOutput, "sampletestmethods.cpp");
            var scope    = parser.ParseFileUnit(fileUnit);

            var srcmlMethod = scope.GetDescendants <MethodDefinition>().First();

            // Verify the method definition
            Assert.IsInstanceOf <MethodDefinition>(srcmlMethod, "MethodDefinition found.");
            Console.WriteLine(srcmlMethod.ToString());

            // Extract SUnit Statements from MethodDefinition
            var statements = new List <Statement>();

            statements.AddRange(SUnitExtractor.ExtractEnding(srcmlMethod));
            statements.AddRange(SUnitExtractor.ExtractSameAction(srcmlMethod));
            statements.AddRange(SUnitExtractor.ExtractVoidReturn(srcmlMethod));

            // verify the statements selected
            Assert.IsNotEmpty(statements, "statements selected from method definition");
            Console.WriteLine(statements.ToString());

            // Translate Statements into SUnits
            List <SUnit> sunits = statements.ConvertAll(
                new Converter <Statement, SUnit> (SUnitTranslator.Translate));

            // verify sunits have been translated
            Assert.That(sunits.TrueForAll(s => s.action != null), "All SUnits initialized.");
            Console.WriteLine(sunits.ToString());


            // Generate text from SUnits
            List <string> sentences = sunits.ConvertAll(
                new Converter <SUnit, string> (TextGenerator.GenerateText));

            // verify string generated
            Assert.That(sentences.TrueForAll(s => s.Length > 0));
            Console.WriteLine(sentences);

            // Collect text and summarize
            var methodDocument = String.Join <string>(" ", sentences);
            var summary        = Summarizer.Summarize(methodDocument);


            // verify summary
            Assert.That(!summary.Equals(""));
            Console.WriteLine(summary);
        }