private static Article ParseDocument(string text, SummarizerArguments args) { Dictionary rules = Dictionary.LoadFromFile(args.DictionaryLanguage); Article article = new Article(rules); article.ParseText(text); return(article); }
private static SummarizedDocument CreateSummarizedDocument(Article article, SummarizerArguments args) { SummarizedDocument sumDoc = new SummarizedDocument(); sumDoc.Concepts = article.Concepts; foreach (Sentence sentence in article.Sentences) { if (sentence.Selected) { sumDoc.Sentences.Add(sentence.OriginalSentence); } } return(sumDoc); }
internal static void Highlight(Article article, SummarizerArguments args) { if (args.DisplayPercent == 0 && args.DisplayLines == 0) { return; } if (args.DisplayPercent == 0) { //get the highest scored n lines, without reordering the list. SelectNumberOfSentences(article, args.DisplayLines); } else { SelectSentencesByPercent(article, args.DisplayPercent); } }
private void button2_Click(object sender, EventArgs e) { string text; string word = textBox2.Text; WebClient web = new WebClient(); HtmlAgilityPack.HtmlDocument Htmldoc = new HtmlAgilityPack.HtmlDocument(); Process.Start("https://en.wikipedia.org/wiki/" + word); byte[] byteArray = web.DownloadData(new Uri("https://en.wikipedia.org/wiki/" + word)); Stream stream = new MemoryStream(byteArray); Htmldoc.Load(stream); FileStream fs = new FileStream("D:\\htmltext.pdf", FileMode.Create, FileAccess.Write, FileShare.None); Document pdfDoc = new Document(); PdfWriter writer = PdfWriter.GetInstance(pdfDoc, fs); pdfDoc.Open(); foreach (HtmlNode node in Htmldoc.DocumentNode.SelectNodes("//text()")) { text = node.InnerText.Trim(); pdfDoc.Add(new Paragraph(text)); } pdfDoc.Close(); if (textBox2.Text != null) { int sentCount = 1; int.TryParse(textBox3.Text, out sentCount); SummarizerArguments sumargs = new SummarizerArguments { DictionaryLanguage = "en", DisplayLines = sentCount, DisplayPercent = 0, InputFile = @"D:\\htmltext.pdf", }; SummarizedDocument doc = Summarizer.Summarize(sumargs); string summary = string.Join("\r\n\r\n", doc.Sentences.ToArray()); richTextBox1.Text = summary; } else { richTextBox1.Text = "Please give the query value!!!"; } }
public static SummarizedDocument Summarize(SummarizerArguments args) { if (args == null) { return(null); } Article article = null; if (args.InputString.Length > 0 && args.InputFile.Length == 0) { article = ParseDocument(args.InputString, args); } else { article = ParseFile(args.InputFile, args); } Grader.Grade(article); Highlighter.Highlight(article, args); SummarizedDocument sumdoc = CreateSummarizedDocument(article, args); return(sumdoc); }
private void button3_Click(object sender, EventArgs e) { if (textBox1.Text != null) { int sentCount = 1; int.TryParse(SentenceCountTextBox.Text, out sentCount); SummarizerArguments sumargs = new SummarizerArguments { DictionaryLanguage = "en", DisplayLines = sentCount, DisplayPercent = 0, InputFile = textBox1.Text }; SummarizedDocument doc = Summarizer.Summarize(sumargs); string summary = string.Join("\r\n\r\n", doc.Sentences.ToArray()); richTextBox1.Text = summary; } else { richTextBox1.Text = "Sorry there is no file!!!"; } }
private static Article ParseFile(string fileName, SummarizerArguments args) { string text = LoadFile(fileName); return(ParseDocument(text, args)); }