private void btn_WordFreq_Click(object sender, RibbonControlEventArgs e) { ProgressDialog d = new ProgressDialog(); d.Show(); Stopwatch watch = new Stopwatch(); watch.Start(); Word.Document doc = Globals.ThisAddIn.Application.ActiveDocument; Dictionary <string, uint> wordlist = new Dictionary <string, uint>(); Regex re_allnums = new Regex(@"^\d+$"); IEnumerable <Word.Range> textranges = TextHelpers.GetText(doc); d.pbMax = textranges.Count(); d.pbVal = 0; foreach (Word.Range rng in textranges) { d.pbVal++; string txt = rng.Text; //strip punctuation txt = TextHelpers.StripPunctuation(txt); string[] substrs = Regex.Split(txt, @"\s+"); foreach (string word in substrs) { Match m = re_allnums.Match(word); if (!m.Success) { if (word.Trim() != "") { if (wordlist.ContainsKey(word)) { wordlist[word]++; } else { wordlist.Add(word, 1); } } } } } Debug.WriteLine("Counts tabulated. Time elapsed: " + watch.Elapsed.ToString()); watch.Restart(); //Create new document Word.Document newdoc = Globals.ThisAddIn.Application.Documents.Add(); Word.Paragraph pgraph; //Intro text pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Heading 1"]); pgraph.Range.Text = "Word Frequency List\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Normal"]); pgraph.Range.Text = "Capitalization is retained as is. That means that words that appear at the beginning of a sentence will appear capitalized. Don't forget that you can sort the table!\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Normal"]); pgraph.Range.Text = "Total words found (case sensitive): " + wordlist.Count.ToString() + "\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous); Word.Section sec = newdoc.Sections[2]; sec.PageSetup.TextColumns.SetCount(3); var words = wordlist.ToList(); words.Sort((pair1, pair2) => pair2.Value.CompareTo(pair1.Value)); newdoc.Tables.Add(pgraph.Range, words.Count, 2); //newdoc.Tables.Add(pgraph.Range, 1, 2); newdoc.Tables[1].AutoFitBehavior(Word.WdAutoFitBehavior.wdAutoFitContent); newdoc.Tables[1].AllowAutoFit = true; d.pbMax = words.Count; d.pbVal = 0; int row = 1; foreach (var pair in words) { d.pbVal++; //newdoc.Tables[1].Rows.Add(); Word.Cell cell = newdoc.Tables[1].Cell(row, 1); cell.Range.Text = pair.Key; cell = newdoc.Tables[1].Cell(row, 2); cell.Range.Text = pair.Value.ToString(); row++; } pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous); newdoc.GrammarChecked = true; Debug.WriteLine("All done. Time elapsed: " + watch.Elapsed.ToString()); watch.Stop(); d.Hide(); }
private void btn_SingData_Click(object sender, RibbonControlEventArgs e) { Word.Document doc = Globals.ThisAddIn.Application.ActiveDocument; Word.Paragraphs pgraphs; Word.Selection sel = Globals.ThisAddIn.Application.Selection; bool fromSelection = false; if (sel != null && sel.Range != null && sel.Characters.Count > 1) { pgraphs = sel.Paragraphs; fromSelection = true; } else { pgraphs = doc.Paragraphs; } Debug.WriteLine("From selection: " + fromSelection.ToString()); ProgressDialog d = new ProgressDialog(); d.pbMax = pgraphs.Count; d.pbVal = 0; d.Show(); foreach (Word.Paragraph pgraph in pgraphs) { d.pbVal++; Word.Range rng = pgraph.Range; foreach (Word.Range sentence in rng.Sentences) { // POS var tsentence = MaxentTagger.tokenizeText(new java.io.StringReader(sentence.Text)).toArray(); var taggedSentence = tagger.tagSentence((ArrayList)tsentence[0]); var taglist = taggedSentence.toArray(); Boolean singular = false; Boolean hasdata = false; //First find obviously singular "data" foreach (TaggedWord entry in taglist) { if (entry.word().ToLower() == "data") { hasdata = true; if ((entry.tag() == "NN") || (entry.tag() == "NNP")) { singular = true; break; } } } //Now look for plural tags with singular verbs if ((hasdata) && (!singular)) { foreach (TaggedWord entry in taglist) { if (entry.tag() == "VBZ") { singular = true; break; } } } //Highlight problematic sentences if ((hasdata) && (singular)) { sentence.HighlightColorIndex = Word.WdColorIndex.wdGray50; } } //var sentences = MaxentTagger.tokenizeText(new java.io.StringReader(rng.Text)).toArray(); //foreach (ArrayList sentence in sentences) //{ // String origsent = String.Join(" ", sentence.toArray()); // Debug.WriteLine(origsent); // var taggedSentence = tagger.tagSentence(sentence); // var taglist = taggedSentence.toArray(); // foreach (TaggedWord entry in taglist) // { // if (entry.word().ToLower() == "data") // { // if ( (entry.tag() == "NN") || (entry.tag() == "NNP") ) // { // Debug.WriteLine("Found singular 'data' in the following sentence: " + origsent); // TextHelpers.highlightText(pgraph.Range, "", Word.WdColorIndex.wdGray50); // } // } // } //} //// Typed Dependencies //foreach (Word.Range sentence in rng.Sentences) //{ // var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); // var sent2Reader = new java.io.StringReader(sentence.Text); // var rawWords = tokenizerFactory.getTokenizer(sent2Reader).tokenize(); // sent2Reader.close(); // var tree = lp.apply(rawWords); // var tlp = new PennTreebankLanguagePack(); // var gsf = tlp.grammaticalStructureFactory(); // var gs = gsf.newGrammaticalStructure(tree); // var tdl = gs.typedDependenciesCCprocessed(); // foreach (var dep in tdl.toArray()) // { // Debug.WriteLine(dep); // } // Debug.WriteLine("=-=-=-=-=-"); //} } d.Hide(); MessageBox.Show("Possible uses of 'data' as a singular noun have been highlighted in grey."); }