public void ApplyLanguage() { Word.Document doc = Globals.ThisAddIn.Application.ActiveDocument; Properties.Settings.Default.Save(); foreach (Word.Range rng in TextHelpers.GetText(doc)) { rng.LanguageID = Properties.Settings.Default.Options_ProofLanguageID; rng.NoProofing = 0; } if (Properties.Settings.Default.Options_DisplayLanguageDialog) { Globals.ThisAddIn.Application.CommandBars.ExecuteMso("SetLanguage"); } }
private void btn_WordList_Click(object sender, RibbonControlEventArgs e) { Word.Document doc = Globals.ThisAddIn.Application.ActiveDocument; HashSet <string> wordlist = new HashSet <string>(); foreach (Word.Range rng in TextHelpers.GetText(doc)) { string txt = rng.Text; //strip punctuation txt = TextHelpers.StripPunctuation(txt); //get word list HashSet <string> newwords = TextHelpers.ToWords(txt); wordlist.UnionWith(newwords); } //strip words that are all numbers wordlist = TextHelpers.RemoveNumbers(wordlist); //Create new document Word.Document newdoc = Globals.ThisAddIn.Application.Documents.Add(); Word.Paragraph pgraph; //Intro text pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Heading 1"]); pgraph.Range.Text = "Word List\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Normal"]); pgraph.Range.Text = "This is a proofreading tool. It takes every word in the document, strips the punctuation, removes words that consist only of numbers, and then presents them all in alphabetical order. This is a great way to find typos and inconsistencies.\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.Text = "Capitalization is retained as is. That means that words that appear at the beginning of a sentence will appear capitalized.\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous); Word.Section sec = newdoc.Sections[2]; sec.PageSetup.TextColumns.SetCount(3); string[] words = wordlist.ToArray(); Array.Sort(words); pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.Text = string.Join("\n", words) + "\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous); newdoc.GrammarChecked = true; }
private void btn_FixLang_Click(object sender, RibbonControlEventArgs e) { Word.Document doc = Globals.ThisAddIn.Application.ActiveDocument; RibbonDropDownItem item = dd_Langs.SelectedItem; Properties.Settings.Default.lastlang = item.Label; Properties.Settings.Default.Save(); foreach (Word.Range rng in TextHelpers.GetText(doc)) { rng.LanguageID = (Word.WdLanguageID)item.Tag; rng.NoProofing = 0; } MessageBox.Show("All text marked as '" + item.Label + "'."); }
static public Dictionary <string, uint> GetWordFrequencyList() { Word.Document doc = Globals.ThisAddIn.Application.ActiveDocument; Dictionary <string, uint> wordlist = new Dictionary <string, uint>(); Regex re_allnums = new Regex(@"^\d+$"); IEnumerable <Word.Range> textranges = TextHelpers.GetText(doc); //d.pbMax = textranges.Count(); //d.pbVal = 0; foreach (Word.Range rng in textranges) { //d.pbVal++; //Application.StatusBar = Left("Importing Data... | " & Format(App.EndTime - App.StartTime, "hh:mm:ss") & " | (" & Ribbon.fileNbr & " of " & App.FileTotal & ") " & Format(Ribbon.fileNbr / App.FileTotal, "0.0%") & " | " & filePath, 255) //Word.Application.StatusBar = ""; //Word.Application.StatusBar = "test in status bar"; string txt = rng.Text; //strip punctuation txt = TextHelpers.StripPunctuation(txt); string[] substrs = Regex.Split(txt, @"\s+"); foreach (string word in substrs) { Match m = re_allnums.Match(word); if (!m.Success) { if (word.Trim() != "") { if (wordlist.ContainsKey(word)) { wordlist[word]++; } else { wordlist.Add(word, 1); } } } } } return(wordlist); }
static public HashSet <string> GetWordList() { Word.Document doc = Globals.ThisAddIn.Application.ActiveDocument; HashSet <string> wordlist = new HashSet <string>(); foreach (Word.Range rng in TextHelpers.GetText(doc)) { string txt = rng.Text; //strip punctuation txt = TextHelpers.StripPunctuation(txt); //get word list HashSet <string> newwords = TextHelpers.ToWords(txt); wordlist.UnionWith(newwords); } //strip words that are all numbers wordlist = TextHelpers.RemoveNumbers(wordlist); return(wordlist); }
private void btn_PhraseFrequency_Click(object sender, RibbonControlEventArgs e) { Word.Document doc = Globals.ThisAddIn.Application.ActiveDocument; uint newminlen; uint newmaxlen; UInt32.TryParse(edit_MinPhraseLen.Text, out newminlen); UInt32.TryParse(edit_MaxPhraseLen.Text, out newmaxlen); if ((newminlen != 0) && (newmaxlen != 0) && (newminlen <= newmaxlen)) { Properties.Settings.Default.minphraselen = newminlen; Properties.Settings.Default.maxphraselen = newmaxlen; Properties.Settings.Default.Save(); Dictionary <string, uint> phrases = new Dictionary <string, uint>(); //Iterate through all text foreach (Word.Range rng in TextHelpers.GetText(doc)) { //Break into sentences foreach (Word.Range sentence in rng.Sentences) { //Strip punctuation string nopunc = TextHelpers.StripPunctuation(sentence.Text); nopunc = nopunc.Replace(" ", " "); //Break into words string[] words = nopunc.Split(' '); //Extract phrases for (uint i = newminlen; i <= newmaxlen; i++) { for (int start = 0; start < words.Length - i; start++) { List <string> phraselst = new List <string>(); for (int idx = 0; idx < i; idx++) { phraselst.Add(words[start + idx]); } string phrase = string.Join(" ", phraselst).ToLower(); //Add to data structre if (phrases.ContainsKey(phrase)) { phrases[phrase]++; } else { phrases[phrase] = 1; } } } } } //Display results //Create new document Word.Document newdoc = Globals.ThisAddIn.Application.Documents.Add(); Word.Paragraph pgraph; //Intro text pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Heading 1"]); pgraph.Range.Text = "Phrase Frequency List\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Normal"]); pgraph.Range.Text = "Punctuation (other than apostrophes) has been removed. All words have been lowercased for comparison.\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous); Word.Section sec = newdoc.Sections[2]; sec.PageSetup.TextColumns.SetCount(2); var phraselist = phrases.Where(x => x.Value > 1).ToList(); phraselist.Sort((pair1, pair2) => pair2.Value.CompareTo(pair1.Value)); foreach (var pair in phraselist) { pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Normal"]); pgraph.Range.Text = pair.Key + "\t" + pair.Value.ToString() + "\n"; } pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous); newdoc.GrammarChecked = true; } else { MessageBox.Show("The phrase length fields must contain numbers greater than zero, and the minimum length must be less than or equal to the maximum length."); } }
private void btn_WordFreq_Click(object sender, RibbonControlEventArgs e) { ProgressDialog d = new ProgressDialog(); d.Show(); Stopwatch watch = new Stopwatch(); watch.Start(); Word.Document doc = Globals.ThisAddIn.Application.ActiveDocument; Dictionary <string, uint> wordlist = new Dictionary <string, uint>(); Regex re_allnums = new Regex(@"^\d+$"); IEnumerable <Word.Range> textranges = TextHelpers.GetText(doc); d.pbMax = textranges.Count(); d.pbVal = 0; foreach (Word.Range rng in textranges) { d.pbVal++; string txt = rng.Text; //strip punctuation txt = TextHelpers.StripPunctuation(txt); string[] substrs = Regex.Split(txt, @"\s+"); foreach (string word in substrs) { Match m = re_allnums.Match(word); if (!m.Success) { if (word.Trim() != "") { if (wordlist.ContainsKey(word)) { wordlist[word]++; } else { wordlist.Add(word, 1); } } } } } Debug.WriteLine("Counts tabulated. Time elapsed: " + watch.Elapsed.ToString()); watch.Restart(); //Create new document Word.Document newdoc = Globals.ThisAddIn.Application.Documents.Add(); Word.Paragraph pgraph; //Intro text pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Heading 1"]); pgraph.Range.Text = "Word Frequency List\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Normal"]); pgraph.Range.Text = "Capitalization is retained as is. That means that words that appear at the beginning of a sentence will appear capitalized. Don't forget that you can sort the table!\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Normal"]); pgraph.Range.Text = "Total words found (case sensitive): " + wordlist.Count.ToString() + "\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous); Word.Section sec = newdoc.Sections[2]; sec.PageSetup.TextColumns.SetCount(3); var words = wordlist.ToList(); words.Sort((pair1, pair2) => pair2.Value.CompareTo(pair1.Value)); newdoc.Tables.Add(pgraph.Range, words.Count, 2); //newdoc.Tables.Add(pgraph.Range, 1, 2); newdoc.Tables[1].AutoFitBehavior(Word.WdAutoFitBehavior.wdAutoFitContent); newdoc.Tables[1].AllowAutoFit = true; d.pbMax = words.Count; d.pbVal = 0; int row = 1; foreach (var pair in words) { d.pbVal++; //newdoc.Tables[1].Rows.Add(); Word.Cell cell = newdoc.Tables[1].Cell(row, 1); cell.Range.Text = pair.Key; cell = newdoc.Tables[1].Cell(row, 2); cell.Range.Text = pair.Value.ToString(); row++; } pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous); newdoc.GrammarChecked = true; Debug.WriteLine("All done. Time elapsed: " + watch.Elapsed.ToString()); watch.Stop(); d.Hide(); }
private void btn_ProperNouns_Click(object sender, RibbonControlEventArgs e) { Word.Document doc = Globals.ThisAddIn.Application.ActiveDocument; HashSet <string> wordlist = new HashSet <string>(); foreach (Word.Range rng in TextHelpers.GetText(doc)) { string txt = rng.Text; Word.Style style = rng.get_Style(); if (style != null) { Regex re_heading = new Regex(@"(?i)(heading|title|date|toc)"); Match m = re_heading.Match(style.NameLocal); if (m.Success) { continue; } } HashSet <string> propers = new HashSet <string>(); propers = TextHelpers.ProperNouns(txt); wordlist.UnionWith(propers); } //Produce the groupings HashSet <string> capped = TextHelpers.KeepCaps(wordlist); //DoubleMetaphone Dictionary <ushort, List <string> > mpgroups = new Dictionary <ushort, List <string> >(); //Dictionary<string, List<string>> mpgroups = new Dictionary<string, List<string>>(); ShortDoubleMetaphone sdm = new ShortDoubleMetaphone(); //HashSet<string> tested = new HashSet<string>(); foreach (string word in capped) { /* * if (tested.Contains(word)) * { * continue; * } * else * { * tested.Add(word); * } */ sdm.computeKeys(word); ushort pri = sdm.PrimaryShortKey; ushort alt = sdm.AlternateShortKey; if (mpgroups.ContainsKey(pri)) { mpgroups[pri].Add(word); } else { List <string> node = new List <string>(); node.Add(word); mpgroups[pri] = node; } if (mpgroups.ContainsKey(alt)) { mpgroups[alt].Add(word); } else { List <string> node = new List <string>(); node.Add(word); mpgroups[alt] = node; } } //Edit Distance List <string> dtested = new List <string>(); int mindist; int.TryParse(Properties.Settings.Default.mindist, out mindist); if (mindist == 0) { mindist = 2; } Dictionary <string, List <string> > distgroups = new Dictionary <string, List <string> >(); foreach (string word1 in capped) { if (dtested.Contains(word1)) { continue; } else { dtested.Add(word1); } if (word1.Length <= mindist) { continue; } foreach (string word2 in capped) { if (word2.Length <= mindist) { continue; } int dist = TextHelpers.EditDistance(word1, word2); //int percent = (int)Math.Round((dist / word1.Length) * 100.0); if ((dist > 0) && (dist <= mindist)) //if ((dist > 0) && (percent <= distpercent)) { dtested.Add(word2); if (distgroups.ContainsKey(word1)) { distgroups[word1].Add(word2); } else { List <string> node = new List <string>(); node.Add(word2); distgroups[word1] = node; } } } } //Create new document Word.Document newdoc = Globals.ThisAddIn.Application.Documents.Add(); Word.View view = Globals.ThisAddIn.Application.ActiveWindow.View; view.DisplayPageBoundaries = false; Word.Paragraph pgraph; //Intro text pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Heading 1"]); pgraph.Range.Text = "Proper Noun Checker\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Normal"]); pgraph.Range.Text = "This tool only looks at words that start with a capital letter. It then uses phonetic comparison and edit distance to find other proper nouns that are similar. Words in all caps (acronyms) are not included.\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.Text = "The system tries to ignore words at the beginning of sentences and in headers. This means some errors may go unseen, so use multiple tools!\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.Text = "Most of what you see here are false positives! That's unavoidable. But it still catches certain otherwise-hard-to-find misspellings.\n"; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous); Word.Section sec = newdoc.Sections[2]; sec.PageSetup.TextColumns.SetCount(2); sec.PageSetup.TextColumns.LineBetween = -1; //Distance pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Heading 2"]); //pgraph.KeepWithNext = 0; pgraph.Range.Text = "Edit Distance (" + mindist + ")\n"; foreach (string key in distgroups.Keys) { List <string> group = distgroups[key]; pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Normal"]); pgraph.Range.Text = key + ", " + string.Join(", ", group) + "\n"; } pgraph.Range.InsertBreak(Word.WdBreakType.wdPageBreak); //pgraph = newdoc.Content.Paragraphs.Add(); //pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous); //Word.InlineShape line = pgraph.Range.InlineShapes.AddHorizontalLineStandard(); //line.Height = 2; //line.Fill.Solid(); //line.HorizontalLineFormat.NoShade = true; //line.Fill.ForeColor.RGB = System.Drawing.ColorTranslator.ToOle(System.Drawing.Color.Black); //line.HorizontalLineFormat.PercentWidth = 90; //line.HorizontalLineFormat.Alignment = WdHorizontalLineAlignment.wdHorizontalLineAlignCenter; //sec = newdoc.Sections[3]; //sec.PageSetup.TextColumns.SetCount(2); //sec.PageSetup.TextColumns.LineBetween = -1; //Metaphone pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Heading 2"]); //pgraph.KeepWithNext = 0; pgraph.Range.Text = "Phonetic Comparisons\n"; foreach (ushort key in mpgroups.Keys) { if (key == 65535) { continue; } List <string> group = mpgroups[key]; if (group.Count > 1) { pgraph = newdoc.Content.Paragraphs.Add(); pgraph.set_Style(newdoc.Styles["Normal"]); pgraph.Range.Text = string.Join(", ", group) + "\n"; } } //pgraph = newdoc.Content.Paragraphs.Add(); //pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous); newdoc.GrammarChecked = true; }