public static string RemoveDuplicateString(string str)
    {
        string stringValue = UserDefinedFunctions.CleanUpProductName(str);

        // Keep track of words found in this Dictionary.
        var d = new Dictionary <string, bool>();

        // Build up string into this StringBuilder.
        StringBuilder b = new StringBuilder();

        // Split the input and handle spaces and punctuation.
        string[] a = stringValue.Split(new char[] { ' ' },
                                       StringSplitOptions.RemoveEmptyEntries);

        // Loop over each word
        foreach (string current in a)
        {
            // Lowercase each word
            string lower = current.ToLower();

            // If we haven't already encountered the word,
            // append it to the result.
            if (!d.ContainsKey(lower))
            {
                b.Append(current).Append(' ');
                d.Add(lower, true);
            }
        }
        // Return the duplicate words removed
        return(b.ToString().Trim());
    }
    public static SqlString GetNumbers(string str)
    {
        string stringValue = UserDefinedFunctions.CleanUpProductName(str);

        if (stringValue == null)
        {
            stringValue = "";
        }
        string newStringValue = "";

        char[] KeepArray = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ' ' };


        foreach (char thischar in stringValue)
        {
            foreach (char keepchar in KeepArray)
            {
                if (keepchar == thischar)
                {
                    newStringValue += thischar;
                }
            }
        }

        return((SqlString)(UserDefinedFunctions.CleanUpProductName(newStringValue)));
    }
Beispiel #3
0
    public static SqlString GetString(string str, int numRemoveLetters)
    {
        string stringValue = UserDefinedFunctions.CleanUpProductName(str);
        //string stringValue = str.Replace("  ", " ").Trim(); // dbo.Split

        string stringSortValue = UserDefinedFunctions.CleanUpProductName(UserDefinedFunctions.SortString(stringValue, " ", numRemoveLetters));

        if (stringSortValue == null)
        {
            stringSortValue = "0";
        }

        StringBuilder res = new StringBuilder();

        // Words
        var matches = Regex.Matches(stringSortValue, @"[\D']*");

        foreach (Match match in matches)
        {
            res.Append(match.Value.TrimStart().TrimEnd() + " ").ToString();
        }

        // Result
        if (res == null || res.ToString().Trim() == "")
        {
            return("");
        }
        else
        {
            return(res.ToString().Trim());
        }
    }
    public static int LevenshteinDistance(string firstString, string secondString, Boolean ignoreCase)
    {
        int defaultMatch = 0;

        if ((firstString != null) || (secondString != null))
        {
            string firstStringValue  = UserDefinedFunctions.CleanUpProductName(firstString);
            string secondStringValue = UserDefinedFunctions.CleanUpProductName(secondString);

            string strF = ignoreCase ? firstStringValue.ToLower() : firstStringValue;
            string strS = ignoreCase ? secondStringValue.ToLower() : secondStringValue;

            int lenF = strF.Length;
            int lenS = strS.Length;
            int[,] d = new int[lenF + 1, lenS + 1];

            for (int i = 0; i <= lenF; i++)
            {
                d[i, 0] = i;
            }
            for (int j = 0; j <= lenS; j++)
            {
                d[0, j] = j;
            }

            for (int j = 1; j <= lenS; j++)
            {
                for (int i = 1; i <= lenF; i++)
                {
                    if (strF[i - 1] == strS[j - 1])
                    {
                        d[i, j] = d[i - 1, j - 1];
                    }
                    else
                    {
                        d[i, j] = Math.Min(Math.Min(
                                               d[i - 1, j] + 1,  // a deletion
                                               d[i, j - 1] + 1), // an Insertion
                                           d[i - 1, j - 1] + 1); // a substitution ( match or mismatch)
                    }
                }
            }
            return(d[lenF, lenS]);
        }
        return(defaultMatch);
    }
    public static Double PermutedJaroWinklerDistance(String firstWord, String secondWord)
    {
        double similarity           = 0;
        double defaultMismatchScore = 0.0;

        if ((firstWord != null) && (secondWord != null))
        {
            //Use CleanUpProductName and ToUpperInvariant to clean String
            String firstString  = UserDefinedFunctions.CleanUpProductName(firstWord.ToUpperInvariant());
            String secondString = UserDefinedFunctions.CleanUpProductName(secondWord.ToUpperInvariant());
            int    numberWord   = UserDefinedFunctions.CountStringSize(firstString, secondString);

            List <string> ls1 = firstString.Split(' ').ToList();
            List <string> ls2 = secondString.Split(' ').ToList();

            // Permuta apenas strings com tamanho inferior a numberWords (e.g, 4)
            if (ls2.Count <= numberWord && ls1.Count <= numberWord && numberWord <= 4)
            {
                IList <IList <string> > perm1 = UserDefinedFunctions.Permutations(ls1);
                IList <IList <string> > perm2 = UserDefinedFunctions.Permutations(ls2);

                foreach (var item1 in perm1)
                {
                    string str1 = string.Join(" ", item1.ToArray());
                    foreach (var item2 in perm2)
                    {
                        string str2 = string.Join(" ", item2.ToArray());
                        similarity = Math.Max(similarity, UserDefinedFunctions.JaroWinklerDistance(str1, str2));
                    }
                }
                // Return value permuted JaroWinklerDistance
                return(similarity);
            }
            else
            {
                similarity = UserDefinedFunctions.JaroWinklerDistance(firstString, secondString);
                return(similarity);
            }
        }
        return(defaultMismatchScore);
    }
    public static int CleanMatchCountString(String strInput, String strPatternValue, Boolean ignoreCase)
    {
        int defaultMatch = 0;

        if (strInput == null)
        {
            strInput = "";
        }

        if (strPatternValue == null)
        {
            strPatternValue = "";
        }

        if ((strInput != null) || (strPatternValue != null))
        {
            string firstStringValue  = UserDefinedFunctions.CleanUpProductName(strInput);
            string secondStringValue = UserDefinedFunctions.CleanUpProductName(strPatternValue);

            string strF = ignoreCase ? firstStringValue.ToLower() : firstStringValue;
            string strS = ignoreCase ? secondStringValue.ToLower() : secondStringValue;

            List <string> stringsList = new List <string>(strF.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries));

            int count = 0;
            foreach (string item in stringsList)
            {
                //Ignores single words
                if (Regex.IsMatch(strS, string.Format(@"\b{0}\b", Regex.Escape(item))))
                {
                    count++;
                }
            }

            return(count);
        }
        return(defaultMatch);
    }
Beispiel #7
0
    public static string SortString(string str, string delimiter, int numRemoveLetters)
    {
        string stringValue = UserDefinedFunctions.CleanUpProductName(str);
        //string stringValue = str;

        // split and sort Strings
        List <string> stringsList = new List <string>(stringValue.Split(new string[] { delimiter }, StringSplitOptions.RemoveEmptyEntries));

        stringsList.Sort();
        StringBuilder res = new StringBuilder();

        // remove string < Len 1
        foreach (string item in stringsList)
        {
            if (item.Length > numRemoveLetters)
            {
                res.Append(item + " ");
            }
        }

        // Result
        return(res.ToString().TrimEnd());
    }
Beispiel #8
0
    public static IEnumerable SqlArray(SqlString str, SqlChars delimiter)
    {
        if (str == null)
        {
            str = "";
        }

        string stringValue = UserDefinedFunctions.CleanUpProductName(str.Value);

        //string stringValue = str.Value.Replace("  ", " ").Trim(); // dbo.Split

        //return single element array if no delimiter is specified
        if (delimiter.Length == 0)
        {
            return new string[1] {
                       stringValue
            }
        }
        ;

        //split the string and return a string array
        return(stringValue.Split(delimiter[0]));
    }
    [Microsoft.SqlServer.Server.SqlFunction] //System.Data.SqlTypes.SqlDouble
    public static Double JaroWinklerDistance(String firstWord, String secondWord)
    {
        double defaultMismatchScore = 0.0;
        double defaultMatchScore    = 1.0;

        if ((firstWord != null) && (secondWord != null))
        {
            //Use CleanUpProductName and ToUpperInvariant to clean String
            String firstString  = UserDefinedFunctions.CleanUpProductName(firstWord.ToUpperInvariant());
            String secondString = UserDefinedFunctions.CleanUpProductName(secondWord.ToUpperInvariant());

            if ((firstString != null) && (secondString != null))
            {
                if (firstString == secondString)
                {
                    return((Double)defaultMatchScore);
                }
                else
                {
                    //Bonus weighting for string starting with the same characters (e.g.: prefix scaling factor)
                    double PREFIX_SCALING_FACTOR = 0.1;

                    double cJaroDistance = JaroDistance(firstString, secondString);
                    int    prefixLength  = CommonPrefix(firstString, secondString);

                    //Find the Jaro-Winkler Distance: Jd + (l * p * ( 1 - Jd));
                    double cJaroWinklerDistance = cJaroDistance + ((prefixLength * PREFIX_SCALING_FACTOR) * (1 - cJaroDistance));

                    // Return value JaroWinklerDistance
                    return(cJaroWinklerDistance);
                }
            }
            return(defaultMismatchScore);
        }
        return(defaultMismatchScore);
    }