Beispiel #1
0
        public void Create_Constructor_InstantiatesObjects()
        {
            Scrub st = new Scrub("");

            Assert.IsNotNull(st.CharTransDict);
            Assert.IsNotNull(st.RegxTuples);
            Assert.IsNotNull(st.StringTransDict);
            Assert.IsNotNull(st.RegxMatchesDefined);
        }
Beispiel #2
0
        public void SetRegxOptions_SetIgnoreCaseFalse_Matches()
        {
            Scrub        st       = new Scrub("");
            RegexOptions expected = st.RegxOptions & ~RegexOptions.IgnoreCase;

            // set case and check
            st.RegxIgnoreCase(false);

            Assert.AreEqual(expected, st.RegxOptions & ~RegexOptions.IgnoreCase);
        }
        public void Predefined_InvalidName_Untouched()
        {
            string sentence = "¿¡Señor, the Chevrolet guys don't like     Dodge     guys, and and no one like MaZdA, Ola Senor?!    ";

            Scrub st = new Scrub(sentence);

            // Invalid pre-defined patter, should throw

            Assert.ThrowsException <KeyNotFoundException>(() => st.RegxDefined("NotInTheListOfDefined"));
        }
Beispiel #4
0
        public void SetRegxOptions_SetIgnoreCaseTrue_Matches()
        {
            Scrub        st       = new Scrub("");
            RegexOptions expected = RegexOptions.IgnoreCase;

            // set case and check (default setting is `true`)
            st.RegxIgnoreCase();

            Assert.AreEqual(expected, st.RegxOptions & RegexOptions.IgnoreCase);
        }
Beispiel #5
0
        public void Create_Constructor_ExpectSameString()
        {
            string expect = "Randy Butternubs";
            Scrub  st     = new Scrub(expect);

            Assert.IsNotNull(st.CharTransDict);
            Assert.IsNotNull(st.RegxTuples);
            Assert.IsNotNull(st.StringTransDict);
            Assert.AreEqual(expect, st.ToString());
            Assert.IsNotNull(st.RegxMatchesDefined);
        }
Beispiel #6
0
        public void Translate_EmptyEverything_ExpectSameString()
        {
            string expect = "Randy Butternubs";
            Scrub  st     = new Scrub(expect);

            st.Strip("").MapChars().MapWords().RegxTranslate().Strip("");

            Assert.IsNotNull(st.CharTransDict);
            Assert.IsNotNull(st.RegxTuples);
            Assert.IsNotNull(st.StringTransDict);
            Assert.AreEqual(expect, st.ToString());
        }
Beispiel #7
0
        public void SetRegxTimeOut_MatchTimeOut_Matches()
        {
            Scrub st = new Scrub("");

            // set and set again
            st.TkoSeconds = 1.25;
            double expectedTKO = 3.76;

            st.TkoSeconds = expectedTKO;

            Assert.AreEqual(expectedTKO, st.TkoSeconds);
        }
Beispiel #8
0
        public void SetRegxCache_CacheCount_Matches()
        {
            Scrub st = new Scrub("");

            // set and set again
            st.CacheSize = 1;
            int expectedSize = 39;

            st.CacheSize = expectedSize;

            Assert.AreEqual(expectedSize, st.CacheSize);
        }
Beispiel #9
0
        public void TestAll()
        {
            // get most of the mapped accent chars, and their non-accented equiv
            // must be 1 to 1 mapping and size of arrays. Easier to do lots of chars this
            // way then with lists

            string matchChar   = "ŠŒŽšœžŸ¥µ�����������������������������‗אבגדהוזחטיךכלםמןנסעףפץצרשת��‎���";
            string replaceChar = "SOZsozYYuAAAAAAACEEEEIIIIDNOOOOOOUUUUYsaaaaaaaceeeeiiiionoooooouuuuyy  ";

            // set up a dictionary, if ignore case, set the dict up with a new comparer

            StringComparer comparer = StringComparer.OrdinalIgnoreCase; // default is just Ordinal
            Dictionary <string, string> wordDictionary = new Dictionary <string, string>(comparer)
            {
                { "chevrolet", "Ford" },
                { "mAzDa", "BMW" },
                { "and and", "and" }  // will never match
            };

            // Need `System.ValueTuple` package to do this style of init
            // on v4.6 and below

            List <(string, string)> regxList = new List <(string, string)>
            {                      // Match, Replace
                ("BMW", "Fiat"),
                (@"\s+", " "),     // multi whitespace to 1 space
                (@"^\s*|\s*$", "") // trims leading/ending spaces
            };

            string expect = "Randy Butternubs";
            Scrub  st     = new Scrub(expect);

            // Set dictionary up, case insensitive match

            st.SetStringTranslator(wordDictionary, true);

            // set up character translators

            st.SetCharTranslator(matchChar, replaceChar);

            // set up list of regx replaces

            st.SetRegxTranslator(regxList);

            st.SetStringTranslator();
            st.SetRegxTranslator();
            st.SetCharTranslator();

            Assert.AreEqual(0, st.CharTransDict.Count);
            Assert.AreEqual(0, st.StringTransDict.Count);
            Assert.AreEqual(0, st.RegxTuples.Count);
        }
Beispiel #10
0
        public void SetString_AddStringAfter_ExpectNewString()
        {
            string expect = "Randy Butternubs";
            Scrub  st     = new Scrub("Haystack Calhoon");

            st.Set(expect);
            st.Strip("").MapChars().MapWords().RegxTranslate().Strip("");

            Assert.IsNotNull(st.CharTransDict);
            Assert.IsNotNull(st.RegxTuples);
            Assert.IsNotNull(st.StringTransDict);
            Assert.AreEqual(expect, st.ToString());
        }
        public void MatchCase_RegxDefined_Matches()
        {
            string sentence         = "wtf does RemoveWTF do? Is WtF Case SeNsItIvE?";
            string expectedSentance = "XXX does RemoveWTF do? Is WtF Case SeNsItIvE?";

            Scrub st = new Scrub(sentence);

            st.Set(sentence);
            st.RegxMatchesDefined.Add("RemoveWTF", @"(wtf)|(what the)\s+(hell|$hit)");
            st.RegxDefined("RemoveWTF", "XXX");

            Assert.AreEqual(expectedSentance, st.ToString());
        }
        public void Predefined_CompactWhitespace_Compacted()
        {
            string sentence         = "¿¡Señor, the Chevrolet guys don't like     Dodge     guys, and and no one like MaZdA, Ola Senor?!    ";
            string expectedSentance = "¿¡Señor, the Chevrolet guys don't like Dodge guys, and and no one like MaZdA, Ola Senor?! ";

            Scrub st = new Scrub(sentence);

            // Compact whitespaces to one space, note does not imply trim!
            // overides default empty string replace to replace with single space
            // note trailing space at end of string

            st.RegxDefined("WhitespaceCompact", " ");

            Assert.AreEqual(expectedSentance, st.ToString());
        }
Beispiel #13
0
        public void SetDict_CharTransDict_Matches()
        {
            Scrub st = new Scrub("");

            var expectedCharMap = new Dictionary <char, char>()
            {
                { 'a', 'A' },
                { 'b', 'B' },
                { 'c', 'C' }
            };

            st.SetCharTranslator(expectedCharMap);

            CollectionAssert.AreEqual(expectedCharMap, st.CharTransDict);
        }
Beispiel #14
0
        public void Set_StringTransDict_Matches()
        {
            Scrub st = new Scrub("");

            Dictionary <string, string> expectedDict = new Dictionary <string, string>()
            {
                { "Haystack", "Calhoon" },
                { "Randy", "Butternubs" }
            };

            st.SetStringTranslator(expectedDict);

            Assert.AreEqual(0, st.CharTransDict.Count);
            Assert.AreEqual(0, st.RegxTuples.Count);
            CollectionAssert.AreEqual(expectedDict, st.StringTransDict);
        }
Beispiel #15
0
        public void SetString_CharTransDict_Matches()
        {
            Scrub st = new Scrub("");

            string expectedMatchChar   = "ŠŒŽšœžŸ¥µ�����������������������������‗אבגדהוזחטיךכלםמןנסעףפץצרשת��‎���";
            string expectedReplaceChar = "SOZsozYYuAAAAAAACEEEEIIIIDNOOOOOOUUUUYsaaaaaaaceeeeiiiionoooooouuuuyy  ";

            st.SetCharTranslator(expectedMatchChar, expectedReplaceChar);

            var match   = new string(st.CharTransDict.Keys.ToArray());
            var replace = new string(st.CharTransDict.Values.ToArray());

            Assert.AreEqual(expectedMatchChar.Length, st.CharTransDict.Count);
            Assert.AreEqual(expectedMatchChar, match);
            Assert.AreEqual(expectedReplaceChar, replace);
        }
Beispiel #16
0
        public void Set_RegxTuples_Matches()
        {
            Scrub st = new Scrub("");

            List <(string, string)> expectedList = new List <(string, string)>()
            {
                ("Haystack", "Calhoon"),
                ("Randy", "Butternubs")
            };

            st.SetRegxTranslator(expectedList);

            Assert.AreEqual(0, st.CharTransDict.Count);
            Assert.AreEqual(0, st.StringTransDict.Count);
            CollectionAssert.AreEqual(expectedList, st.RegxTuples);
        }
Beispiel #17
0
        // ReSharper disable once UnusedParameter.Local
        private static void Main(string[] args)
        {
            // Map any character to any other character. The matchCarArray MUST be only
            // have unique characters. The replaceChar array will have the matching translated char.

            // The example below of accent chars, and their non-accented equiv
            // Both strings must be 1 to 1 mapping and size of strings. This was done as strings
            // to make it easier to deal with lots of characters.

            string matchChar   = "ŠŒŽšœžŸ¥µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿ¡¿";
            string replaceChar = "SOZsozYYuAAAAAAACEEEEIIIIDNOOOOOOUUUUYsaaaaaaaceeeeiiiionoooooouuuuyy  ";

            // Set up a dictionary, if ignore case, set the dict up with a new comparer
            // These words are mapped to any instances of other words. See comments
            // on how this works vs regx, basically each word from a sentence is passed
            // to the dictionary for translation. Current or past changes are not candidates
            // for any further changes

            StringComparer comparer = StringComparer.OrdinalIgnoreCase; // default is just Ordinal
            Dictionary <string, string> wordDictionary = new Dictionary <string, string>(comparer)
            {
                { "chevrolet", "Ford" },
                { "mAzDa", "BMW" },
                { "and and", "and" }  // will never match
            };

            // NOTE : Need `System.ValueTuple` package to do this style of init on v4.6 and below.

            // Regx list each item is executed in order of the list.
            // First element is the Regx match string (C# style) and the second
            // is the replacement string if the pattern matches. Matches can affect the entire
            // string, and each subsequent match can as well.

            List <(string, string)> regxList = new List <(string, string)>
            {                      // Match, Replace
                ("BMW", "Fiat"),   // swaps 'BMW' (case dependent) with 'Fiat'
                (@"\s+", " "),     // multi whitespace to 1 space
                (@"^\s*|\s*$", "") // trims leading/ending spaces
            };

            // Test sentence with odd characters, spaces and other things needing scrubbing

            string sentence = "¿¡Señor, the Chevrolet guys don't like     Dodge     guys, and and no one like MaZdA, Ola Senor?!    ";

            // Dump the orig string

            Console.WriteLine("The Sentence : >{0}<", sentence);

            Scrub st = new Scrub(sentence);

            // Set dictionary up, case insensitive match

            st.SetStringTranslator(wordDictionary, true);

            // set up character translators

            st.SetCharTranslator(matchChar, replaceChar);

            // set up list of regx replaces

            st.SetRegxTranslator(regxList);

            // add a string translation after the fact

            st.StringTransDict.Add("dodge", "Mercedes");

            // add a Regx translation after the fact

            st.RegxTuples.Add(("Senor", "Mr.Magoo"));

            // add a chracter Translation after the fact

            st.CharTransDict.Add('\'', '#');

            // so all sorts of stuff!

            string translated = st.Strip("[,]").MapChars().MapWords().RegxTranslate().Strip(@"Mr\.").ToString();

            // Should be something like the string below -
            // Magoo the Ford guys don#t like Mercedes guys and and no one like Fiat Ola Magoo?!

            Console.WriteLine("Translated   : >{0}<", translated);

            // reset the string with some emails
            st.Set("[email protected] is sending an email to [email protected]");

            translated = st.RegxDefined("Email", "**Email Removed**").ToString();

            Console.WriteLine("Masked   : >{0}<", translated);

            st.Set(" 前に来た時は北側からで、当時の光景はいまでも思い出せる。 Even now I remember the scene I saw approaching the city from the north.  青竜山脈から流れる川が湖へと流れこむ様、湖の中央には純白のホ");
            translated = st.RegxDefined("NonAscii", string.Empty).ToString();

            Console.WriteLine("To all ASCII : >{0}<", translated);

            // reset the string with some emails
            st.Set(@"<h1>Title</h1><script>var a=1; \\comment</script> Not In Script Tags");

            translated = st.RegxDefined("ScriptTags", string.Empty).RegxDefined("TagsSimple", string.Empty).ToString();

            Console.WriteLine("Strip Script and Tags   : >{0}<", translated);

            // reset and set up a predefined match pattern and set regx case sensitivity
            st.Set("wtf does RemoveWTF do? Is WtF Case SeNsItIvE?");
            st.RegxMatchesDefined.Add("RemoveWTF", @"(wtf)|(what the)\s+(hell|$hit)");

            translated = st.RegxIgnoreCase().RegxDefined("RemoveWTF", "XXX").ToString();
            Console.WriteLine("New Pre-defined Match   : >{0}<", translated);
        }
        public void TestAll()
        {
            // get most of the mapped accent chars, and their non-accented equiv
            // must be 1 to 1 mapping and size of arrays. Easier to do lots of chars this
            // way then with lists

            string matchChar   = "ŠŒŽšœžŸ¥µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýÿ¡¿";
            string replaceChar = "SOZsozYYuAAAAAAACEEEEIIIIDNOOOOOOUUUUYsaaaaaaaceeeeiiiionoooooouuuuyy  ";

            // set up a dictionary, if ignore case, set the dict up with a new comparer

            StringComparer comparer = StringComparer.OrdinalIgnoreCase; // default is just Ordinal
            Dictionary <string, string> wordDictionary = new Dictionary <string, string>(comparer)
            {
                { "chevrolet", "Ford" },
                { "mAzDa", "BMW" },
                { "and and", "and" },  // will never match
            };

            // Need `System.ValueTuple` package to do this style of init
            // on v4.6 and below

            List <(string, string)> regxList = new List <(string, string)>
            {                      // Match, Replace
                ("BMW", "Fiat"),
                (@"\s+", " "),     // multi whitespace to 1 space
                (@"^\s*|\s*$", "") // trims leading/ending spaces
            };

            string sentence         = "¿¡Señor, the Chevrolet guys don't like     Dodge     guys, and and no one like MaZdA, Ola Senor?!    ";
            string expectedSentance = "Magoo the Ford guys don#t like Mercedes guys and and no one like Fiat Ola Magoo?!";

            Scrub st = new Scrub(sentence);

            // Set dictionary up, case insensitive match

            st.SetStringTranslator(wordDictionary, true);

            // set up character translators

            st.SetCharTranslator(matchChar, replaceChar);

            // set up list of regx replaces

            st.SetRegxTranslator(regxList);

            // add a string translation after the fact

            st.StringTransDict.Add("dodge", "Mercedes");

            // add a Regx translation after the fact

            st.RegxTuples.Add(("Senor", "Mr.Magoo"));

            // add a chracter Translation after the fact

            st.CharTransDict.Add('\'', '#');

            // so all sorts of stuff!

            st.Strip("[,]").MapChars().MapWords().RegxTranslate().Strip(@"Mr\.");

            Assert.AreEqual(expectedSentance, st.ToString());
        }
Beispiel #19
0
        public void Set_Null_Throws()
        {
            Scrub st = new Scrub("abc");

            Assert.ThrowsException <ArgumentNullException>(() => st.Set(null));
        }