public void TestSoundexUtilsNullBehaviour() { Assert.AreEqual(null, SoundexUtils.Clean(null)); Assert.AreEqual("", SoundexUtils.Clean("")); Assert.AreEqual(0, SoundexUtils.DifferenceEncoded(null, "")); Assert.AreEqual(0, SoundexUtils.DifferenceEncoded("", null)); }
/// <summary> /// Retrieves the Soundex code for a given string. /// </summary> /// <param name="str">String to encode using the Soundex algorithm.</param> /// <returns>A soundex code for the string supplied.</returns> /// <exception cref="ArgumentException">If a character is not mapped.</exception> public virtual string GetSoundex(string str) { if (str == null) { return(null); } str = SoundexUtils.Clean(str); if (str.Length == 0) { return(str); } char[] output = { '0', '0', '0', '0' }; int count = 0; char first = str[0]; output[count++] = first; char lastDigit = Map(first); // previous digit for (int i = 1; i < str.Length && count < output.Length; i++) { char ch = str[i]; if ((this.specialCaseHW) && (ch == 'H' || ch == 'W')) { // these are ignored completely continue; } char digit = Map(ch); if (digit == SILENT_MARKER) { continue; } if (digit != '0' && digit != lastDigit) { // don't store vowels or repeats output[count++] = digit; } lastDigit = digit; } return(new string(output)); }
/// <summary> /// Retrieves the Refined Soundex code for a given string. /// </summary> /// <param name="str">String to encode using the Refined Soundex algorithm.</param> /// <returns>A soundex code for the string supplied.</returns> public virtual string GetSoundex(string str) { if (str == null) { return(null); } str = SoundexUtils.Clean(str); if (str.Length == 0) { return(str); } StringBuilder sBuf = new StringBuilder(); sBuf.Append(str[0]); char last, current; last = '*'; for (int i = 0; i < str.Length; i++) { current = GetMappingCode(str[i]); if (current == last) { continue; } else if (current != 0) { sBuf.Append(current); } last = current; } return(sBuf.ToString()); }
/// <summary> /// Retrieves the NYSIIS code for a given string. /// </summary> /// <param name="str">String to encode using the NYSIIS algorithm.</param> /// <returns>A NYSIIS code for the string supplied.</returns> public virtual string GetNysiis(string str) { if (str == null) { return(null); } // Use the same clean rules as Soundex str = SoundexUtils.Clean(str); if (str.Length == 0) { return(str); } // Translate first characters of name: // MAC -> MCC, KN -> NN, K -> C, PH | PF -> FF, SCH -> SSS str = PAT_MAC.Replace(str, "MCC", 1); str = PAT_KN.Replace(str, "NN", 1); str = PAT_K.Replace(str, "C", 1); str = PAT_PH_PF.Replace(str, "FF", 1); str = PAT_SCH.Replace(str, "SSS", 1); // Translate last characters of name: // EE -> Y, IE -> Y, DT | RT | RD | NT | ND -> D str = PAT_EE_IE.Replace(str, "Y", 1); str = PAT_DT_ETC.Replace(str, "D", 1); // First character of key = first character of name. StringBuilder key = new StringBuilder(str.Length); key.Append(str[0]); // Transcode remaining characters, incrementing by one character each time char[] chars = str.ToCharArray(); int len = chars.Length; for (int i = 1; i < len; i++) { char next = i < len - 1 ? chars[i + 1] : SPACE; char aNext = i < len - 2 ? chars[i + 2] : SPACE; char[] transcoded = TranscodeRemaining(chars[i - 1], chars[i], next, aNext); System.Array.Copy(transcoded, 0, chars, i, transcoded.Length); // only append the current char to the key if it is different from the last one if (chars[i] != chars[i - 1]) { key.Append(chars[i]); } } if (key.Length > 1) { char lastChar = key[key.Length - 1]; // If last character is S, remove it. if (lastChar == 'S') { //key.deleteCharAt(key.length() - 1); key.Remove(key.Length - 1, 1); lastChar = key[key.Length - 1]; } if (key.Length > 2) { char last2Char = key[key.Length - 2]; // If last characters are AY, replace with Y. if (last2Char == 'A' && lastChar == 'Y') { //.key.deleteCharAt(key.length() - 2); key.Remove(key.Length - 2, 1); } } // If last character is A, remove it. if (lastChar == 'A') { //key.deleteCharAt(key.length() - 1); key.Remove(key.Length - 1, 1); } } string result = key.ToString(); return(this.IsStrict ? result.Substring(0, Math.Min(TRUE_LENGTH, result.Length) - 0) : result); }
/// <summary> /// Returns the number of characters in the two encoded strings that are the /// same. This return value ranges from 0 to the length of the shortest /// encoded string: 0 indicates little or no similarity, and 4 out of 4 (for /// example) indicates strong similarity or identical values. For refined /// Soundex, the return value can be greater than 4. /// <para/> /// See: <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> /// MS T-SQL DIFFERENCE</a> /// <para/> /// since 1.3 /// </summary> /// <param name="s1">A string that will be encoded and compared.</param> /// <param name="s2">A string that will be encoded and compared.</param> /// <returns>The number of characters in the two encoded strings that are the same from 0 to to the length of the shortest encoded string.</returns> /// <seealso cref="SoundexUtils.Difference(IStringEncoder, string, string)"/> public virtual int Difference(string s1, string s2) { return(SoundexUtils.Difference(this, s1, s2)); }