public static int LevenshteinDistance(string a, string b, NormalizationOptions flags = NormalizationOptions.Default) { a = Normalize(a, flags); b = Normalize(b, flags); return(LevenshteinDistance(a.AsSpan(), b.AsSpan())); }
public static double[,] LevenshteinDistanceScore(IEnumerable <string> a, IEnumerable <string> b, NormalizationOptions flags = NormalizationOptions.Default) { var aArray = a.Select(text => Normalize(text, flags)).ToArray(); var bArray = b.Select(text => Normalize(text, flags)).ToArray(); var scores = new double[aArray.Length, bArray.Length]; for (int i = 0; i < aArray.Length; ++i) { for (int j = 0; j < bArray.Length; ++j) { scores[i, j] = LevenshteinDistanceScore(aArray[i].AsSpan(), bArray[j].AsSpan()); } } return(scores); }
public static string Normalize(string a, NormalizationOptions flags = NormalizationOptions.Default) { a = a?.Normalize(NormalizationForm.FormD) ?? string.Empty; if (flags.HasFlag(NormalizationOptions.Trim)) { bool prevIsWhiteSpace = false; bool currIsWhiteSpace; var span = a.AsSpan(); // Remove leading white space int start = 0; while (start < span.Length && char.IsWhiteSpace(span[start])) { ++start; } if (start != 0) { span = span.Slice(start); } // Remove trailing white space int end = span.Length - 1; while (end >= 0 && char.IsWhiteSpace(span[end])) { --end; } if (end != span.Length - 1) { span = span.Slice(0, end + 1); } var sb = new StringBuilder(span.Length); // Remove consecutive white space and replace white space with space characters for (int i = 0; i < span.Length; ++i) { char c = span[i]; currIsWhiteSpace = char.IsWhiteSpace(c); if (!prevIsWhiteSpace || !currIsWhiteSpace) { if (currIsWhiteSpace) { sb.Append(' '); } else { sb.Append(c); } } prevIsWhiteSpace = currIsWhiteSpace; } a = sb.ToString(); } if (flags.HasFlag(NormalizationOptions.Lower)) { a = a.ToLowerInvariant(); } if (flags.HasFlag(NormalizationOptions.Upper)) { a = a.ToUpperInvariant(); } return(a); }
private static void PrintNamingOptionDefaults(string indent, string mode, NormalizationOptions options) { Console.Out.WriteLine("{0} {1}: {{", indent, mode); string value; switch (options.Casing) { case NameCasing.None: value = "\"none\""; break; case NameCasing.Pascal: value = "\"pascal_case\""; break; case NameCasing.CamelCase: value = "\"camel_case\""; break; case NameCasing.SnakeCase: value = "\"snake_case\""; break; case NameCasing.LowerCase: value = "\"lower_case\""; break; case NameCasing.UpperCase: value = "\"upper_case\""; break; case NameCasing.T4CompatPluralized: value = "\"t4_pluralized\""; break; case NameCasing.T4CompatNonPluralized: value = "\"t4\""; break; default: throw new InvalidOperationException($"Unknown casing option: {options.Casing}"); } printJsonProperty(indent, "case", value); switch (options.Pluralization) { case Pluralization.None: value = "\"none\""; break; case Pluralization.Singular: value = "\"singular\""; break; case Pluralization.Plural: value = "\"plural\""; break; case Pluralization.PluralIfLongerThanOne: value = "\"plural_multiple_characters\""; break; default: throw new InvalidOperationException($"Unknown pluralization option: {options.Pluralization}"); } printJsonProperty(indent, "pluralization", value); printJsonProperty(indent, "prefix", options.Prefix == null ? "null" : $"\"{options.Prefix}\""); printJsonProperty(indent, "suffix", options.Suffix == null ? "null" : $"\"{options.Suffix}\""); switch (options.Transformation) { case NameTransformation.SplitByUnderscore: value = "\"split_by_underscore\""; break; case NameTransformation.Association: value = "\"association\""; break; default: throw new InvalidOperationException($"Unknown transformation option: {options.Transformation}"); } printJsonProperty(indent, "transformation", value); printJsonProperty(indent, "pluralize_if_ends_with_word_only", options.PluralizeOnlyIfLastWordIsText ? "true" : "false"); printJsonProperty(indent, "ignore_all_caps", options.DontCaseAllCaps ? "true" : "false"); if (options.MaxUpperCaseWordLength > 1) { printJsonProperty(indent, "max_uppercase_word_length", options.MaxUpperCaseWordLength.ToString(CultureInfo.InvariantCulture)); } Console.Out.WriteLine("{0} }}", indent);
/// <summary> /// Returns a new string whose textual value is the normalized form of /// <paramref name="source"/>. /// </summary> /// <param name="source">The <see cref="String"/> to normalize. /// </param> /// <param name="options">One of the /// <see cref="NormalizationOptions"/> values.</param> /// <returns>A new, normalized string.</returns> public static string Clean(this string source, NormalizationOptions options) { Contracts.Requires.NotNull(source, "source"); if ((int)options < 0 || ((int)options & (int)~(NormalizationOptions.ControlCharacters | NormalizationOptions.Whitespace)) != 0) { throw ExceptionBuilder.CreateArgumentException("options", String.Format(CultureInfo.CurrentUICulture, Resources.Argument_EnumIllegalVal, (int)options)); } char[] normalized; if ((options & NormalizationOptions.Whitespace) == NormalizationOptions.Whitespace) { normalized = source.Trim().ToCharArray(); } else { normalized = source.ToCharArray(); } int index = 0; int whitespaceCount = 0; int controlCount = 0; StringBuilder builder = new StringBuilder(source.Length); while (index < normalized.Length) { if ((options & NormalizationOptions.Whitespace) == NormalizationOptions.Whitespace) { int position = index; if (Char.IsWhiteSpace(normalized[position])) { while ((position + 1) < normalized.Length && Char.IsWhiteSpace(normalized[++position])) { // we found a whitespace character, so look ahead until we // find the next non-whitespace character. whitespaceCount++; } if (whitespaceCount >= 0) { builder.Append(" "); } whitespaceCount = 0; index = position; } } if ((options & NormalizationOptions.ControlCharacters) == NormalizationOptions.ControlCharacters) { if (Char.IsControl(normalized[index])) { int position = index; while ((position + 1) < normalized.Length && Char.IsControl(normalized[++position])) { // we found a control character, so look ahead until we // find the next non-control character. controlCount++; } controlCount = 0; index = position; } } builder.Append(normalized[index]); index++; } return builder.ToString(); }
// a bit of inhuman logic to reduce migration PITA /// <summary> /// Generates association property/method name. /// </summary> /// <param name="thisTable">This table database name. Source table for direct relation and target for backreference.</param> /// <param name="otherTable">Other table database name. Target table for direct relation and source for backreference.</param> /// <param name="firstFromColumnName">Foreign key column name. Specified only for non-composite FK for from/source association.</param> /// <param name="fkName">Foreign key constrain name.</param> /// <param name="settings">Name generation/normalization rules.</param> /// <param name="defaultSchemas">List of default database schema names.</param> /// <returns>Property/method name for association.</returns> private string GenerateAssociationName( SqlObjectName thisTable, SqlObjectName otherTable, string?firstFromColumnName, string fkName, NormalizationOptions settings, ISet <string> defaultSchemas) { var name = otherTable.Name; // T4 compatibility mode use logic, similar to one, used by old T4 templates if (settings.Transformation == NameTransformation.Association) { // approximate port of SetForeignKeyMemberName T4 method. // Approximate, because not all logic could be converted due to difference in generation pipeline string?newName = null; // TODO: customization/interceptors not implemented yet //if (schemaOptions.GetAssociationMemberName != null) //{ // newName = schemaOptions.GetAssociationMemberName(key); // if (newName != null) // name = ToValidName(newName); //} newName = fkName; // if column name provided - generate association name based on column name if (firstFromColumnName != null && firstFromColumnName.ToLower().EndsWith("id")) { // if column name provided and ends with ID suffix // we trim ID part and possible _ connectors before it newName = firstFromColumnName; newName = newName.Substring(0, newName.Length - "id".Length).TrimEnd('_'); // here name could become empty if column name was ID } else { // if column name not provided - use FK name for association name // remove FK_ prefix if (newName.StartsWith("FK_")) { newName = newName.Substring(3); } // - split name into words using _ as separator // - remove words that match target table name, schema or any of default schema // - concat remaining words back into single name newName = string.Concat(newName .Split('_') .Where(_ => _.Length > 0 && _ != otherTable.Name && (otherTable.Schema == null || defaultSchemas.Contains(otherTable.Schema) || _ != otherTable.Schema))); // remove trailing digits // note that new implementation match all digits, not just 0-9 as it was in T4 var skip = true; newName = string.Concat(newName.EnumerateCharacters().Reverse().Select(_ => { if (skip) { if (_.category == UnicodeCategory.DecimalDigitNumber) { return(string.Empty); } else { skip = false; } } return(_.codePoint); }).Reverse()); } // if resulting name is empty - just use: // - for self-reference relation (to same table): table name // - otherwise: foreign key name without changes if (string.IsNullOrEmpty(newName)) { newName = thisTable == otherTable ? thisTable.Name : fkName; } name = newName; } return(_namingServices.NormalizeIdentifier(settings, name)); }