Example #1
0
 public void TestSoundexUtilsNullBehaviour()
 {
     Assert.AreEqual(null, SoundexUtils.Clean(null));
     Assert.AreEqual("", SoundexUtils.Clean(""));
     Assert.AreEqual(0, SoundexUtils.DifferenceEncoded(null, ""));
     Assert.AreEqual(0, SoundexUtils.DifferenceEncoded("", null));
 }
Example #2
0
        /// <summary>
        /// Retrieves the Soundex code for a given string.
        /// </summary>
        /// <param name="str">String to encode using the Soundex algorithm.</param>
        /// <returns>A soundex code for the string supplied.</returns>
        /// <exception cref="ArgumentException">If a character is not mapped.</exception>
        public virtual string GetSoundex(string str)
        {
            if (str == null)
            {
                return(null);
            }
            str = SoundexUtils.Clean(str);
            if (str.Length == 0)
            {
                return(str);
            }
            char[] output = { '0', '0', '0', '0' };
            int    count  = 0;
            char   first  = str[0];

            output[count++] = first;
            char lastDigit = Map(first); // previous digit

            for (int i = 1; i < str.Length && count < output.Length; i++)
            {
                char ch = str[i];
                if ((this.specialCaseHW) && (ch == 'H' || ch == 'W'))
                { // these are ignored completely
                    continue;
                }
                char digit = Map(ch);
                if (digit == SILENT_MARKER)
                {
                    continue;
                }
                if (digit != '0' && digit != lastDigit)
                { // don't store vowels or repeats
                    output[count++] = digit;
                }
                lastDigit = digit;
            }
            return(new string(output));
        }
Example #3
0
        /// <summary>
        /// Retrieves the Refined Soundex code for a given string.
        /// </summary>
        /// <param name="str">String to encode using the Refined Soundex algorithm.</param>
        /// <returns>A soundex code for the string supplied.</returns>
        public virtual string GetSoundex(string str)
        {
            if (str == null)
            {
                return(null);
            }
            str = SoundexUtils.Clean(str);
            if (str.Length == 0)
            {
                return(str);
            }

            StringBuilder sBuf = new StringBuilder();

            sBuf.Append(str[0]);

            char last, current;

            last = '*';

            for (int i = 0; i < str.Length; i++)
            {
                current = GetMappingCode(str[i]);
                if (current == last)
                {
                    continue;
                }
                else if (current != 0)
                {
                    sBuf.Append(current);
                }

                last = current;
            }

            return(sBuf.ToString());
        }
Example #4
0
        /// <summary>
        /// Retrieves the NYSIIS code for a given string.
        /// </summary>
        /// <param name="str">String to encode using the NYSIIS algorithm.</param>
        /// <returns>A NYSIIS code for the string supplied.</returns>
        public virtual string GetNysiis(string str)
        {
            if (str == null)
            {
                return(null);
            }

            // Use the same clean rules as Soundex
            str = SoundexUtils.Clean(str);

            if (str.Length == 0)
            {
                return(str);
            }

            // Translate first characters of name:
            // MAC -> MCC, KN -> NN, K -> C, PH | PF -> FF, SCH -> SSS
            str = PAT_MAC.Replace(str, "MCC", 1);
            str = PAT_KN.Replace(str, "NN", 1);
            str = PAT_K.Replace(str, "C", 1);
            str = PAT_PH_PF.Replace(str, "FF", 1);
            str = PAT_SCH.Replace(str, "SSS", 1);

            // Translate last characters of name:
            // EE -> Y, IE -> Y, DT | RT | RD | NT | ND -> D
            str = PAT_EE_IE.Replace(str, "Y", 1);
            str = PAT_DT_ETC.Replace(str, "D", 1);

            // First character of key = first character of name.
            StringBuilder key = new StringBuilder(str.Length);

            key.Append(str[0]);

            // Transcode remaining characters, incrementing by one character each time
            char[] chars = str.ToCharArray();
            int    len   = chars.Length;

            for (int i = 1; i < len; i++)
            {
                char   next       = i < len - 1 ? chars[i + 1] : SPACE;
                char   aNext      = i < len - 2 ? chars[i + 2] : SPACE;
                char[] transcoded = TranscodeRemaining(chars[i - 1], chars[i], next, aNext);
                System.Array.Copy(transcoded, 0, chars, i, transcoded.Length);

                // only append the current char to the key if it is different from the last one
                if (chars[i] != chars[i - 1])
                {
                    key.Append(chars[i]);
                }
            }

            if (key.Length > 1)
            {
                char lastChar = key[key.Length - 1];

                // If last character is S, remove it.
                if (lastChar == 'S')
                {
                    //key.deleteCharAt(key.length() - 1);
                    key.Remove(key.Length - 1, 1);
                    lastChar = key[key.Length - 1];
                }

                if (key.Length > 2)
                {
                    char last2Char = key[key.Length - 2];
                    // If last characters are AY, replace with Y.
                    if (last2Char == 'A' && lastChar == 'Y')
                    {
                        //.key.deleteCharAt(key.length() - 2);
                        key.Remove(key.Length - 2, 1);
                    }
                }

                // If last character is A, remove it.
                if (lastChar == 'A')
                {
                    //key.deleteCharAt(key.length() - 1);
                    key.Remove(key.Length - 1, 1);
                }
            }

            string result = key.ToString();

            return(this.IsStrict ? result.Substring(0, Math.Min(TRUE_LENGTH, result.Length) - 0) : result);
        }
Example #5
0
 /// <summary>
 /// Returns the number of characters in the two encoded strings that are the
 /// same. This return value ranges from 0 to the length of the shortest
 /// encoded string: 0 indicates little or no similarity, and 4 out of 4 (for
 /// example) indicates strong similarity or identical values. For refined
 /// Soundex, the return value can be greater than 4.
 /// <para/>
 /// See: <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp">
 ///     MS T-SQL DIFFERENCE</a>
 /// <para/>
 /// since 1.3
 /// </summary>
 /// <param name="s1">A string that will be encoded and compared.</param>
 /// <param name="s2">A string that will be encoded and compared.</param>
 /// <returns>The number of characters in the two encoded strings that are the same from 0 to to the length of the shortest encoded string.</returns>
 /// <seealso cref="SoundexUtils.Difference(IStringEncoder, string, string)"/>
 public virtual int Difference(string s1, string s2)
 {
     return(SoundexUtils.Difference(this, s1, s2));
 }