C# (CSharp) EditTools TextHelpers.StripPunctuation примеры использования

Язык программирования: C# (CSharp)

Пространство имен/Пакет: EditTools

Класс/Тип: TextHelpers

Метод/Функция: StripPunctuation

Примеров на hotexamples.com: 5

C# (CSharp) EditTools TextHelpers.StripPunctuation - 5 примеров найдено. Это лучшие примеры C# (CSharp) кода для EditTools.TextHelpers.StripPunctuation, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GetText(8)

StripPunctuation(5)

RemoveNumbers(2)

ToWords(2)

EditDistance(1)

KeepCaps(1)

ProperNouns(1)

Пример #1

Показать файл

Файл: Ribbon1.cs Проект: ashwanikaggarwal/editing-tools

        private void btn_WordList_Click(object sender, RibbonControlEventArgs e)
        {
            Word.Document    doc      = Globals.ThisAddIn.Application.ActiveDocument;
            HashSet <string> wordlist = new HashSet <string>();

            foreach (Word.Range rng in TextHelpers.GetText(doc))
            {
                string txt = rng.Text;

                //strip punctuation
                txt = TextHelpers.StripPunctuation(txt);

                //get word list
                HashSet <string> newwords = TextHelpers.ToWords(txt);
                wordlist.UnionWith(newwords);
            }

            //strip words that are all numbers
            wordlist = TextHelpers.RemoveNumbers(wordlist);

            //Create new document
            Word.Document  newdoc = Globals.ThisAddIn.Application.Documents.Add();
            Word.Paragraph pgraph;

            //Intro text
            pgraph = newdoc.Content.Paragraphs.Add();
            pgraph.set_Style(newdoc.Styles["Heading 1"]);
            pgraph.Range.Text = "Word List\n";
            pgraph            = newdoc.Content.Paragraphs.Add();
            pgraph.set_Style(newdoc.Styles["Normal"]);
            pgraph.Range.Text = "This is a proofreading tool. It takes every word in the document, strips the punctuation, removes words that consist only of numbers, and then presents them all in alphabetical order. This is a great way to find typos and inconsistencies.\n";
            pgraph            = newdoc.Content.Paragraphs.Add();
            pgraph.Range.Text = "Capitalization is retained as is. That means that words that appear at the beginning of a sentence will appear capitalized.\n";

            pgraph = newdoc.Content.Paragraphs.Add();
            pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous);
            Word.Section sec = newdoc.Sections[2];
            sec.PageSetup.TextColumns.SetCount(3);

            string[] words = wordlist.ToArray();
            Array.Sort(words);
            pgraph            = newdoc.Content.Paragraphs.Add();
            pgraph.Range.Text = string.Join("\n", words) + "\n";

            pgraph = newdoc.Content.Paragraphs.Add();
            pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous);
            newdoc.GrammarChecked = true;
        }

Пример #2

Показать файл

Файл: Ribbon.cs Проект: yawarkhan15/Word-Editing-Tools

        static public Dictionary <string, uint> GetWordFrequencyList()
        {
            Word.Document             doc      = Globals.ThisAddIn.Application.ActiveDocument;
            Dictionary <string, uint> wordlist = new Dictionary <string, uint>();
            Regex re_allnums = new Regex(@"^\d+$");

            IEnumerable <Word.Range> textranges = TextHelpers.GetText(doc);

            //d.pbMax = textranges.Count();
            //d.pbVal = 0;
            foreach (Word.Range rng in textranges)
            {
                //d.pbVal++;
                //Application.StatusBar = Left("Importing Data... | " & Format(App.EndTime - App.StartTime, "hh:mm:ss") & " | (" & Ribbon.fileNbr & " of " & App.FileTotal & ") " & Format(Ribbon.fileNbr / App.FileTotal, "0.0%") & " | " & filePath, 255)
                //Word.Application.StatusBar = "";
                //Word.Application.StatusBar = "test in status bar";
                string txt = rng.Text;

                //strip punctuation
                txt = TextHelpers.StripPunctuation(txt);

                string[] substrs = Regex.Split(txt, @"\s+");
                foreach (string word in substrs)
                {
                    Match m = re_allnums.Match(word);
                    if (!m.Success)
                    {
                        if (word.Trim() != "")
                        {
                            if (wordlist.ContainsKey(word))
                            {
                                wordlist[word]++;
                            }
                            else
                            {
                                wordlist.Add(word, 1);
                            }
                        }
                    }
                }
            }
            return(wordlist);
        }

Пример #3

Показать файл

Файл: Ribbon.cs Проект: yawarkhan15/Word-Editing-Tools

        static public HashSet <string> GetWordList()
        {
            Word.Document    doc      = Globals.ThisAddIn.Application.ActiveDocument;
            HashSet <string> wordlist = new HashSet <string>();

            foreach (Word.Range rng in TextHelpers.GetText(doc))
            {
                string txt = rng.Text;

                //strip punctuation
                txt = TextHelpers.StripPunctuation(txt);

                //get word list
                HashSet <string> newwords = TextHelpers.ToWords(txt);
                wordlist.UnionWith(newwords);
            }

            //strip words that are all numbers
            wordlist = TextHelpers.RemoveNumbers(wordlist);
            return(wordlist);
        }

Пример #4

Показать файл

Файл: Ribbon1.cs Проект: ashwanikaggarwal/editing-tools

        private void btn_PhraseFrequency_Click(object sender, RibbonControlEventArgs e)
        {
            Word.Document doc = Globals.ThisAddIn.Application.ActiveDocument;
            uint          newminlen;
            uint          newmaxlen;

            UInt32.TryParse(edit_MinPhraseLen.Text, out newminlen);
            UInt32.TryParse(edit_MaxPhraseLen.Text, out newmaxlen);
            if ((newminlen != 0) && (newmaxlen != 0) && (newminlen <= newmaxlen))
            {
                Properties.Settings.Default.minphraselen = newminlen;
                Properties.Settings.Default.maxphraselen = newmaxlen;
                Properties.Settings.Default.Save();

                Dictionary <string, uint> phrases = new Dictionary <string, uint>();
                //Iterate through all text
                foreach (Word.Range rng in TextHelpers.GetText(doc))
                {
                    //Break into sentences
                    foreach (Word.Range sentence in rng.Sentences)
                    {
                        //Strip punctuation
                        string nopunc = TextHelpers.StripPunctuation(sentence.Text);
                        nopunc = nopunc.Replace("  ", " ");
                        //Break into words
                        string[] words = nopunc.Split(' ');
                        //Extract phrases
                        for (uint i = newminlen; i <= newmaxlen; i++)
                        {
                            for (int start = 0; start < words.Length - i; start++)
                            {
                                List <string> phraselst = new List <string>();
                                for (int idx = 0; idx < i; idx++)
                                {
                                    phraselst.Add(words[start + idx]);
                                }
                                string phrase = string.Join(" ", phraselst).ToLower();
                                //Add to data structre
                                if (phrases.ContainsKey(phrase))
                                {
                                    phrases[phrase]++;
                                }
                                else
                                {
                                    phrases[phrase] = 1;
                                }
                            }
                        }
                    }
                }

                //Display results

                //Create new document
                Word.Document  newdoc = Globals.ThisAddIn.Application.Documents.Add();
                Word.Paragraph pgraph;

                //Intro text
                pgraph = newdoc.Content.Paragraphs.Add();
                pgraph.set_Style(newdoc.Styles["Heading 1"]);
                pgraph.Range.Text = "Phrase Frequency List\n";
                pgraph            = newdoc.Content.Paragraphs.Add();
                pgraph.set_Style(newdoc.Styles["Normal"]);
                pgraph.Range.Text = "Punctuation (other than apostrophes) has been removed. All words have been lowercased for comparison.\n";

                pgraph = newdoc.Content.Paragraphs.Add();
                pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous);
                Word.Section sec = newdoc.Sections[2];
                sec.PageSetup.TextColumns.SetCount(2);

                var phraselist = phrases.Where(x => x.Value > 1).ToList();
                phraselist.Sort((pair1, pair2) => pair2.Value.CompareTo(pair1.Value));
                foreach (var pair in phraselist)
                {
                    pgraph = newdoc.Content.Paragraphs.Add();
                    pgraph.set_Style(newdoc.Styles["Normal"]);
                    pgraph.Range.Text = pair.Key + "\t" + pair.Value.ToString() + "\n";
                }

                pgraph = newdoc.Content.Paragraphs.Add();
                pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous);
                newdoc.GrammarChecked = true;
            }
            else
            {
                MessageBox.Show("The phrase length fields must contain numbers greater than zero, and the minimum length must be less than or equal to the maximum length.");
            }
        }

Пример #5

Показать файл

Файл: Ribbon1.cs Проект: ashwanikaggarwal/editing-tools

        private void btn_WordFreq_Click(object sender, RibbonControlEventArgs e)
        {
            ProgressDialog d = new ProgressDialog();

            d.Show();

            Stopwatch watch = new Stopwatch();

            watch.Start();
            Word.Document             doc      = Globals.ThisAddIn.Application.ActiveDocument;
            Dictionary <string, uint> wordlist = new Dictionary <string, uint>();
            Regex re_allnums = new Regex(@"^\d+$");

            IEnumerable <Word.Range> textranges = TextHelpers.GetText(doc);

            d.pbMax = textranges.Count();
            d.pbVal = 0;
            foreach (Word.Range rng in textranges)
            {
                d.pbVal++;
                string txt = rng.Text;

                //strip punctuation
                txt = TextHelpers.StripPunctuation(txt);


                string[] substrs = Regex.Split(txt, @"\s+");
                foreach (string word in substrs)
                {
                    Match m = re_allnums.Match(word);
                    if (!m.Success)
                    {
                        if (word.Trim() != "")
                        {
                            if (wordlist.ContainsKey(word))
                            {
                                wordlist[word]++;
                            }
                            else
                            {
                                wordlist.Add(word, 1);
                            }
                        }
                    }
                }
            }
            Debug.WriteLine("Counts tabulated. Time elapsed: " + watch.Elapsed.ToString());
            watch.Restart();

            //Create new document
            Word.Document  newdoc = Globals.ThisAddIn.Application.Documents.Add();
            Word.Paragraph pgraph;

            //Intro text
            pgraph = newdoc.Content.Paragraphs.Add();
            pgraph.set_Style(newdoc.Styles["Heading 1"]);
            pgraph.Range.Text = "Word Frequency List\n";
            pgraph            = newdoc.Content.Paragraphs.Add();
            pgraph.set_Style(newdoc.Styles["Normal"]);
            pgraph.Range.Text = "Capitalization is retained as is. That means that words that appear at the beginning of a sentence will appear capitalized. Don't forget that you can sort the table!\n";
            pgraph            = newdoc.Content.Paragraphs.Add();
            pgraph.set_Style(newdoc.Styles["Normal"]);
            pgraph.Range.Text = "Total words found (case sensitive): " + wordlist.Count.ToString() + "\n";

            pgraph = newdoc.Content.Paragraphs.Add();
            pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous);
            Word.Section sec = newdoc.Sections[2];
            sec.PageSetup.TextColumns.SetCount(3);

            var words = wordlist.ToList();

            words.Sort((pair1, pair2) => pair2.Value.CompareTo(pair1.Value));
            newdoc.Tables.Add(pgraph.Range, words.Count, 2);
            //newdoc.Tables.Add(pgraph.Range, 1, 2);
            newdoc.Tables[1].AutoFitBehavior(Word.WdAutoFitBehavior.wdAutoFitContent);
            newdoc.Tables[1].AllowAutoFit = true;
            d.pbMax = words.Count;
            d.pbVal = 0;
            int row = 1;

            foreach (var pair in words)
            {
                d.pbVal++;
                //newdoc.Tables[1].Rows.Add();
                Word.Cell cell = newdoc.Tables[1].Cell(row, 1);
                cell.Range.Text = pair.Key;
                cell            = newdoc.Tables[1].Cell(row, 2);
                cell.Range.Text = pair.Value.ToString();
                row++;
            }

            pgraph = newdoc.Content.Paragraphs.Add();
            pgraph.Range.InsertBreak(Word.WdBreakType.wdSectionBreakContinuous);
            newdoc.GrammarChecked = true;
            Debug.WriteLine("All done. Time elapsed: " + watch.Elapsed.ToString());
            watch.Stop();
            d.Hide();
        }