private void AppendNameInfo(StringBuilder builder) { builder.AppendLine(String.Format("Name equal: {0} times", SuffixFrequency(String.Empty))); List <String> standardSuffices = new List <String>() { "เหนือ", "ใต้", "พัฒนา", "ใหม่", "ทอง", "น้อย", "ใน" }; foreach (String suffix in standardSuffices) { builder.AppendLine(String.Format("Suffix {0}: {1} times", suffix, SuffixFrequency(suffix))); } builder.AppendLine("Suffix with number:" + SuffixFrequencyNumbers().ToString() + " times"); List <String> standardPrefixes = new List <String>() { "ใหม่" }; foreach (String prefix in standardPrefixes) { builder.AppendLine(String.Format("Prefix {0}: {1} times", prefix, PrefixFrequency(prefix))); } builder.AppendLine(); builder.Append("Other suffices: "); var sortedSuffices = new List <KeyValuePair <String, Int32> >(); foreach (var keyValuePair in _newNameSuffix) { String name = ThaiNumeralHelper.ReplaceThaiNumerals(keyValuePair.Key); if (standardSuffices.Contains(name)) { } else if (String.IsNullOrEmpty(keyValuePair.Key)) { } else if (name.IsNumeric()) { } else { sortedSuffices.Add(keyValuePair); } } sortedSuffices.Sort(delegate(KeyValuePair <String, Int32> x, KeyValuePair <String, Int32> y) { return(y.Value.CompareTo(x.Value)); }); foreach (var keyValuePair in sortedSuffices) { builder.Append(keyValuePair.Key + " (" + keyValuePair.Value.ToString() + ") "); } builder.AppendLine(); }
protected Int32 SuffixFrequencyNumbers() { Int32 result = 0; foreach (var keyValue in _newNameSuffix) { String name = ThaiNumeralHelper.ReplaceThaiNumerals(keyValue.Key); if ((!String.IsNullOrEmpty(name)) && (name.IsNumeric())) { result = result + keyValue.Value; } } return(result); }
private GazetteEntry ParseSingeItem(String value) { value = value.Replace("\t", ""); GazetteEntry retval = null; Int32 position = value.IndexOf(EntryURL); if (position >= 0) { retval = new GazetteEntry(); position = position + EntryURL.Length; Int32 position2 = value.IndexOf(EntryURLend); retval.uri = value.Substring(position, position2 - position); value = value.Substring(position2, value.Length - position2); position = value.IndexOf(EntryTitle) + EntryTitle.Length; position2 = value.IndexOf(EntryTitleEnd); retval.title = value.Substring(position, position2 - position).Trim(); value = value.Substring(position2, value.Length - position2); position = value.IndexOf(EntryVolumeorPage) + EntryVolumeorPage.Length; position2 = value.IndexOf(ColumnEnd, position); string volume = value.Substring(position, position2 - position); retval.volume = Convert.ToByte(ThaiNumeralHelper.ReplaceThaiNumerals(volume)); value = value.Substring(position2, value.Length - position2); position = value.IndexOf(EntryIssue) + EntryIssue.Length; position2 = value.IndexOf(ColumnEnd, position); retval.issue = ThaiNumeralHelper.ReplaceThaiNumerals(value.Substring(position, position2 - position).Trim()); value = value.Substring(position2, value.Length - position2); position = value.IndexOf(EntryDate) + EntryDate.Length; position2 = value.IndexOf(ColumnEnd, position); string Date = value.Substring(position, position2 - position); retval.publication = ThaiDateHelper.ParseThaiDate(Date); value = value.Substring(position2, value.Length - position2); position = value.IndexOf(EntryVolumeorPage) + EntryVolumeorPage.Length; position2 = value.IndexOf(ColumnEnd, position); string page = value.Substring(position, position2 - position); retval.page = ThaiNumeralHelper.ReplaceThaiNumerals(page); if (retval.title.Contains('[') && retval.title.EndsWith("]")) { var beginSubTitle = retval.title.LastIndexOf('['); retval.subtitle = retval.title.Substring(beginSubTitle + 1, retval.title.Length - beginSubTitle - 2).Trim(); retval.title = retval.title.Substring(0, beginSubTitle - 1).Trim(); } } return(retval); }
public static DateTime ParseThaiDate(String value) { String monthString = String.Empty; Int32 month = 0; String yearString = String.Empty; Int32 year = 0; Int32 day = 0; Int32 position = 0; String date = ThaiNumeralHelper.ReplaceThaiNumerals(value); position = date.IndexOf(' '); day = Convert.ToInt32(date.Substring(0, position), CultureInfo.InvariantCulture); date = date.Substring(position + 1, date.Length - position - 1); position = date.IndexOf(' '); monthString = date.Substring(0, position).Trim(); month = ThaiMonthNames[monthString]; // TODO: Weren't there some very old ones with KhoSo as well? position = date.IndexOf(BuddhistEra, StringComparison.Ordinal) + BuddhistEra.Length; yearString = date.Substring(position, date.Length - position); year = Convert.ToInt32(yearString, CultureInfo.InvariantCulture); if (year < 2100) { year = year + 543; // there are entries in KhoSo but with "พ.ศ." in the returned info } if ((year < 2484) & (month < 4)) { year = year - 542; } else { year = year - 543; } return(new DateTime(year, month, day)); }
public List <RomanizationEntry> FindRomanizationSuggestions(out List <RomanizationEntry> romanizationMissing, IEnumerable <Entity> entitiesToCheck) { var result = new List <RomanizationEntry>(); romanizationMissing = new List <RomanizationEntry>(); foreach (var entityToCheck in entitiesToCheck) { if (String.IsNullOrEmpty(entityToCheck.name)) { continue; } if (String.IsNullOrEmpty(entityToCheck.english)) { String foundEnglishName = String.Empty; if (Romanizations.Keys.Contains(entityToCheck.name)) { foundEnglishName = entityToCheck.name; } else { var searchName = entityToCheck.name.StripBanOrChumchon(); if (Romanizations.Keys.Contains(searchName)) { foundEnglishName = searchName; } else { // Chumchon may have the name "Chumchon Ban ..." searchName = searchName.StripBanOrChumchon(); if (Romanizations.Keys.Contains(searchName)) { foundEnglishName = searchName; } } } if (!String.IsNullOrEmpty(foundEnglishName)) { result.Add(CreateRomanizationEntry(entityToCheck, Romanizations[foundEnglishName])); } else { Boolean found = false; String name = entityToCheck.name.StripBanOrChumchon(); name = ThaiNumeralHelper.ReplaceThaiNumerals(name); List <Char> numerals = new List <Char>() { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; foreach (Char c in numerals) { name = name.Replace(c, ' '); } name = name.Trim(); foreach (var keyValuePair in ThaiLanguageHelper.NameSuffixRomanizations) { if (entityToCheck.name.EndsWith(keyValuePair.Key)) { String searchString = name.Substring(0, name.Length - keyValuePair.Key.Length).StripBanOrChumchon(); if (String.IsNullOrEmpty(searchString)) { result.Add(CreateRomanizationEntry(entityToCheck, keyValuePair.Value)); found = true; } else if (Romanizations.Keys.Contains(searchString)) { result.Add(CreateRomanizationEntry(entityToCheck, Romanizations[searchString] + " " + keyValuePair.Value)); found = true; } } } if (!found) { var prefixes = ThaiLanguageHelper.NamePrefixRomanizations.Union(ThaiLanguageHelper.NameSuffixRomanizations); foreach (var keyValuePair in prefixes) { if (name.StartsWith(keyValuePair.Key)) { String searchString = name.Substring(keyValuePair.Key.Length); if (String.IsNullOrEmpty(searchString)) { result.Add(CreateRomanizationEntry(entityToCheck, keyValuePair.Value)); found = true; } else if (Romanizations.Keys.Contains(searchString)) { result.Add(CreateRomanizationEntry(entityToCheck, keyValuePair.Value + " " + Romanizations[searchString])); found = true; } } } } if (!found) { romanizationMissing.Add(new RomanizationEntry(entityToCheck.geocode, entityToCheck.name)); } } } } return(result); }