예제 #1
0
        private void btn_WordFreq_Click(object sender, RibbonControlEventArgs e)
        {
            ProgressDialog d = new ProgressDialog();

            d.Show();

            Stopwatch watch = new Stopwatch();

            watch.Start();
            Word.Document             doc      = Globals.ThisAddIn.Application.ActiveDocument;
            Dictionary <string, uint> wordlist = new Dictionary <string, uint>();
            Regex re_allnums = new Regex(@"^\d+$");

            IEnumerable <Word.Range> textranges = TextHelpers.GetText(doc);

            d.pbMax = textranges.Count();
            d.pbVal = 0;
            foreach (Word.Range rng in textranges)
            {
                d.pbVal++;
                string txt = rng.Text;

                //strip punctuation
                txt = TextHelpers.StripPunctuation(txt);


                string[] substrs = Regex.Split(txt, @"\s+");
                foreach (string word in substrs)
                {
                    Match m = re_allnums.Match(word);
                    if (!m.Success)
                    {
                        if (word.Trim() != "")
                        {
                            if (wordlist.ContainsKey(word))
                            {
                                wordlist[word]++;
                            }
                            else
                            {
                                wordlist.Add(word, 1);
                            }
                        }
                    }
                }
            }
            Debug.WriteLine("Counts tabulated. Time elapsed: " + watch.Elapsed.ToString());
            watch.Restart();

            //Create new document
            Word.Document  newdoc = Globals.ThisAddIn.Application.Documents.Add();
            Word.Paragraph pgraph;

            //Intro text
            pgraph = newdoc.Content.Paragraphs.Add();
            pgraph.set_Style(newdoc.Styles["Heading 1"]);
            pgraph.Range.Text = "Word Frequency List\n";
            pgraph            = newdoc.Content.Paragraphs.Add();
            pgraph.set_Style(newdoc.Styles["Normal"]);
            pgraph.Range.Text = "Capitalization is retained as is. That means that words that appear at the beginning of a sentence will appear capitalized. Don't forget that you can sort the table!\n";
            pgraph            = newdoc.Content.Paragraphs.Add();
            pgraph.set_Style(newdoc.Styles["Normal"]);
            pgraph.Range.Text = "Total words found (case sensitive): " + wordlist.Count.ToString() + "\n";

            pgraph = newdoc.Content.Paragraphs.Add();
            pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous);
            Word.Section sec = newdoc.Sections[2];
            sec.PageSetup.TextColumns.SetCount(3);

            var words = wordlist.ToList();

            words.Sort((pair1, pair2) => pair2.Value.CompareTo(pair1.Value));
            newdoc.Tables.Add(pgraph.Range, words.Count, 2);
            //newdoc.Tables.Add(pgraph.Range, 1, 2);
            newdoc.Tables[1].AutoFitBehavior(Word.WdAutoFitBehavior.wdAutoFitContent);
            newdoc.Tables[1].AllowAutoFit = true;
            d.pbMax = words.Count;
            d.pbVal = 0;
            int row = 1;

            foreach (var pair in words)
            {
                d.pbVal++;
                //newdoc.Tables[1].Rows.Add();
                Word.Cell cell = newdoc.Tables[1].Cell(row, 1);
                cell.Range.Text = pair.Key;
                cell            = newdoc.Tables[1].Cell(row, 2);
                cell.Range.Text = pair.Value.ToString();
                row++;
            }

            pgraph = newdoc.Content.Paragraphs.Add();
            pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous);
            newdoc.GrammarChecked = true;
            Debug.WriteLine("All done. Time elapsed: " + watch.Elapsed.ToString());
            watch.Stop();
            d.Hide();
        }
예제 #2
0
        private void btn_SingData_Click(object sender, RibbonControlEventArgs e)
        {
            Word.Document   doc = Globals.ThisAddIn.Application.ActiveDocument;
            Word.Paragraphs pgraphs;
            Word.Selection  sel           = Globals.ThisAddIn.Application.Selection;
            bool            fromSelection = false;

            if (sel != null && sel.Range != null && sel.Characters.Count > 1)
            {
                pgraphs       = sel.Paragraphs;
                fromSelection = true;
            }
            else
            {
                pgraphs = doc.Paragraphs;
            }
            Debug.WriteLine("From selection: " + fromSelection.ToString());

            ProgressDialog d = new ProgressDialog();

            d.pbMax = pgraphs.Count;
            d.pbVal = 0;
            d.Show();

            foreach (Word.Paragraph pgraph in pgraphs)
            {
                d.pbVal++;
                Word.Range rng = pgraph.Range;
                foreach (Word.Range sentence in rng.Sentences)
                {
                    // POS
                    var     tsentence      = MaxentTagger.tokenizeText(new java.io.StringReader(sentence.Text)).toArray();
                    var     taggedSentence = tagger.tagSentence((ArrayList)tsentence[0]);
                    var     taglist        = taggedSentence.toArray();
                    Boolean singular       = false;
                    Boolean hasdata        = false;

                    //First find obviously singular "data"
                    foreach (TaggedWord entry in taglist)
                    {
                        if (entry.word().ToLower() == "data")
                        {
                            hasdata = true;
                            if ((entry.tag() == "NN") || (entry.tag() == "NNP"))
                            {
                                singular = true;
                                break;
                            }
                        }
                    }

                    //Now look for plural tags with singular verbs
                    if ((hasdata) && (!singular))
                    {
                        foreach (TaggedWord entry in taglist)
                        {
                            if (entry.tag() == "VBZ")
                            {
                                singular = true;
                                break;
                            }
                        }
                    }

                    //Highlight problematic sentences
                    if ((hasdata) && (singular))
                    {
                        sentence.HighlightColorIndex = Word.WdColorIndex.wdGray50;
                    }
                }

                //var sentences = MaxentTagger.tokenizeText(new java.io.StringReader(rng.Text)).toArray();
                //foreach (ArrayList sentence in sentences)
                //{
                //    String origsent = String.Join(" ", sentence.toArray());
                //    Debug.WriteLine(origsent);
                //    var taggedSentence = tagger.tagSentence(sentence);
                //    var taglist = taggedSentence.toArray();
                //    foreach (TaggedWord entry in taglist)
                //    {
                //        if (entry.word().ToLower() == "data")
                //        {
                //            if ( (entry.tag() == "NN") || (entry.tag() == "NNP") )
                //            {
                //                Debug.WriteLine("Found singular 'data' in the following sentence: " + origsent);
                //                TextHelpers.highlightText(pgraph.Range, "", Word.WdColorIndex.wdGray50);
                //            }
                //        }
                //    }
                //}

                //// Typed Dependencies
                //foreach (Word.Range sentence in rng.Sentences)
                //{
                //    var tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
                //    var sent2Reader = new java.io.StringReader(sentence.Text);
                //    var rawWords = tokenizerFactory.getTokenizer(sent2Reader).tokenize();
                //    sent2Reader.close();
                //    var tree = lp.apply(rawWords);

                //    var tlp = new PennTreebankLanguagePack();
                //    var gsf = tlp.grammaticalStructureFactory();
                //    var gs = gsf.newGrammaticalStructure(tree);
                //    var tdl = gs.typedDependenciesCCprocessed();
                //    foreach (var dep in tdl.toArray())
                //    {
                //        Debug.WriteLine(dep);
                //    }
                //    Debug.WriteLine("=-=-=-=-=-");
                //}
            }
            d.Hide();
            MessageBox.Show("Possible uses of 'data' as a singular noun have been highlighted in grey.");
        }