public static string Numerize(string value) { var result = value; // preprocess result = @" +|([^\d])-([^\d])".Compile().Replace(result, "$1 $2"); // will mutilate hyphenated-words but shouldn't matter for date extraction result = result.Replace("a half", "haAlf"); // take the 'a' out so it doesn't turn into a 1, save the half for the end // easy/direct replacements DIRECT_NUMS.ForEach <string, string>( (p, r) => result = Regex.Replace( result, p, "<num>" + r)); ORDINALS.ForEach <string, string>( (p, r) => result = Regex.Replace( result, p, "<num>" + r + p.LastCharacters (2))); // ten, twenty, etc. TEN_PREFIXES.ForEach <string, int>( (p, r) => result = Regex.Replace( result, "(?:" + p + @") *<num>(\d(?=[^\d]|$))*", match => "<num>" + (r + int.Parse(match.Groups[1].Value)))); TEN_PREFIXES.ForEach <string, int>( (p, r) => result = Regex.Replace(result, p, "<num>" + r.ToString())); // hundreds, thousands, millions, etc. BIG_PREFIXES.ForEach <string, long>( (p, r) => { result = Regex.Replace(result, @"(?:<num>)?(\d*) *" + p, match => "<num>" + (r * int.Parse(match.Groups[1].Value)).ToString()); result = Andition(result); }); // fractional addition // I'm not combining this with the previous block as using float addition complicates the strings // (with extraneous .0"s and such ) result = Regex.Replace(result, @"(\d +)(?: |and | -)*haAlf", match => (float.Parse(match.Groups[1].Value) + 0.5).ToString()); result = result.Replace("<num>", ""); return(result); }
/// <summary> /// limitation à 69 /// </summary> /// <param name="result"></param> /// <returns></returns> private static string NumerizeFrench(string result) { result = @" +|([^\d])-([^\d])".Compile().Replace(result, "$1 $2"); result = result.Replace("et demi", "etDemi"); FRENCH_DIRECT_NUMS.ForEach <string, string>( (p, r) => result = Regex.Replace( result, p, "<num>" + r)); FRENCH_ORDINALS.ForEach <string, string>( (p, r) => result = Regex.Replace( result, p, "<num>" + r + p. LastCharacters (2))); // ten, twenty, etc. FRENCH_TEN_PREFIXES.ForEach <string, int>( (p, r) => result = Regex.Replace( result, "(?:" + p + @")(?: et)? *<num>(\d(?=[^\d]|$))*", match => "<num>" + (r + int.Parse(match.Groups[1].Value)))); FRENCH_TEN_PREFIXES.ForEach <string, int>( (p, r) => result = Regex.Replace(result, p, "<num>" + r.ToString())); // hundreds, thousands, millions, etc. FRENCH_BIG_PREFIXES.ForEach <string, long>( (p, r) => { result = Regex.Replace(result, @"(?:<num>)?(\d*) *" + p, match => "<num>" + (r * int.Parse(match.Groups[1].Value == string.Empty ? "1" : match.Groups[1].Value)).ToString()); result = Andition(result); }); // fractional addition // I'm not combining this with the previous block as using float addition complicates the strings // (with extraneous .0"s and such ) result = Regex.Replace(result, @"(\d +)(?: |et | -)*etDemi", match => (float.Parse(match.Groups[1].Value) + 0.5).ToString()); result = result.Replace("<num>", ""); return(result); }
public static string Numerize(string value, bool intendTime) { var result = value; string pattern; MatchCollection matches; // preprocess result = @" +|([^\d])-([^\d])".Compile().Replace(result, "$1 $2"); // will mutilate hyphenated-words but shouldn't matter for date extraction result = result.Replace("a half", "haAlf"); // take the 'a' out so it doesn't turn into a 1, save the half for the end // easy/direct replacements DIRECT_NUMS.ForEach <string, string>( (p, r) => result = Regex.Replace( result, p, "<num>" + r)); ORDINALS.ForEach <string, string>( (p, r) => result = Regex.Replace( result, p, "<num>" + r + p. LastCharacters (2))); // ten, twenty, etc. TEN_PREFIXES.ForEach <string, int>( (p, r) => result = Regex.Replace( result, "(?:" + p + @") *<num>(\d(?=[^\d]|$))*", match => "<num>" + (r + int.Parse(match.Groups[1].Value)))); var prefix = intendTime ? "<ten><num>" : "<num>"; TEN_PREFIXES.ForEach <string, int>( (p, r) => result = Regex.Replace(result, p, prefix + r.ToString())); if (intendTime) { var tens = result.Split(new string[] { "<ten>" }, StringSplitOptions.None); var newResult = tens[0]; for (int i = 1; i < tens.Length; i++) { var clean = tens[i].Replace("- <num>", "<num>"); newResult += Andition(clean); } result = newResult; pattern = @"<num>\b(0*(?:[0-9][0-9]?))\b"; matches = Regex.Matches(result, pattern); if (matches.Count > 0) { for (int i = matches.Count - 1; i > 0; i--) { dynamic match = matches[i]; var index = match.Groups[1].Index; if (match.Groups[1].Value.Length == 1) { result = result.Insert(index, "0"); } result = result.Insert(index, ":"); var length = " <num>".Length; result = result.Remove(index - length, length); } } } else { result = result.Replace("<ten>", ""); } // hundreds, thousands, millions, etc. BIG_PREFIXES.ForEach <string, long>( (p, r) => { result = Regex.Replace(result, @"(?:<num>)?(\d*) *" + p, match => "<num>" + (r * int.Parse(match.Groups[1].Value)).ToString()); result = Andition(result); }); // fractional addition // I'm not combining this with the previous block as using float addition complicates the strings // (with extraneous .0"s and such ) result = Regex.Replace(result, @"(\d +)(?: |and | -)*haAlf", match => (float.Parse(match.Groups[1].Value) + 0.5).ToString()); result = result.Replace("<num>", ""); return(result); }