private string GetPhoneticValue(string word, PhoneticTable phoneticTable) { string phoneticValue = phoneticTable.GetPhoneticValueOf(word); if (phoneticValue == null) phoneticValue = GetPhoneticValueOfWordUsingSimilarWordEnding(word, phoneticTable); return phoneticValue; }
public void Expand(PhoneticTable phoneticTable, WordListFile frequentWordListFile) { string phoneticValue; int countBefore; do { countBefore = phoneticTable.Count; foreach (string word in frequentWordListFile) { if (!phoneticTable.Contains(word)) { phoneticValue = null; if (phoneticValue == null) phoneticValue = phoneticConcatenator.TryConcatenate(word, phoneticTable); if (phoneticValue == null) phoneticValue = phoneticSplitter.TrySplit(word, phoneticTable); if (phoneticValue != null) phoneticTable.Add(word, phoneticValue); } } } while (countBefore != phoneticTable.Count); }
private string TryConcatenate(string word, string wordStart, string wordEnd, PhoneticTable phoneticTable) { if (word == wordStart + wordEnd) { return phoneticTable.GetPhoneticValueOf(wordStart) + " [dash] " + phoneticTable.GetPhoneticValueOf(wordEnd); } else { return null; } }
private List<string> GetListStartsWith(string word, PhoneticTable phoneticTable) { List<string> listStartsWith = new List<string>(); foreach (string currentWord in phoneticTable.EnglishWordList) { if (currentWord.StartsWith(word)) { listStartsWith.Add(currentWord); } } return listStartsWith; }
private string GetPhoneticValueOfWordUsingSimilarWordEnding(string word, PhoneticTable phoneticTable) { string englishEnding; if (word.Length >= howManyEnglishLetterForEnding) englishEnding = word.Substring(word.Length - howManyEnglishLetterForEnding); else englishEnding = word; string similarWord = GetWordEndsWith(englishEnding, phoneticTable); if (similarWord == null) return null; return phoneticTable.GetPhoneticValueOf(similarWord); }
public void Build(PhoneticTable phoneticTable, WordListFile frequentWordListFile, string rhymeChartFile) { string phoneticValue; using (StreamWriter streamWriter = new StreamWriter(rhymeChartFile)) { foreach (string word in frequentWordListFile) { phoneticValue = GetPhoneticValue(word,phoneticTable); if (phoneticValue != null) { streamWriter.WriteLine(word + " : " + GetPhoneticEnding(phoneticValue)); } } } }
/// <summary> /// Get phonetic value by concatenation or null if fails /// </summary> /// <param name="word">word for which we need phonetic value</param> /// <param name="phoneticTable">phonetic table</param> /// <returns>phonetic value by concatenation or null if fails</returns> public string TryConcatenate(string word, PhoneticTable phoneticTable) { List<string> listWordStartsWith = GetListStartsWith(word, phoneticTable); List<string> listWordEndsWith = GetListEndsWith(word, phoneticTable); string concatenation = null; foreach (string wordStart in listWordStartsWith) { foreach (string wordEnd in listWordEndsWith) { concatenation = TryConcatenate(word, wordStart, wordEnd, phoneticTable); if (concatenation != null) return concatenation; } } return null; }
/// <summary> /// Repair phonetic table /// </summary> /// <param name="phoneticTable">phonetic table</param> public void Repair(PhoneticTable phoneticTable) { string phoneticEnding; string trimmedPhoneticValue; foreach (HomophoneGroup homophoneGroup in new List<HomophoneGroup>(phoneticTable)) { foreach (string wordVariant in new HashSet<string>(homophoneGroup)) { if (IsMatchEndingType(wordVariant, homophoneGroup)) { if (IsMatchWordVariantEnding(wordVariant)) { trimmedPhoneticValue = RemoveUndesiredEnding(homophoneGroup.PhoneticValue); phoneticEnding = BuildPhoneticEnding(homophoneGroup.PhoneticValue); homophoneGroup.Remove(wordVariant); phoneticTable.Add(wordVariant, trimmedPhoneticValue + " " + phoneticEnding); } } } } }
public void ReplaceEnding(PhoneticTable phoneticTable, string fromEnglish, string fromPhonetic, string toEnglish, string toPhonetic) { string phoneticValue; foreach (HomophoneGroup homophoneGroup in new List<HomophoneGroup>(phoneticTable)) { foreach (string wordVariant in new HashSet<string>(homophoneGroup)) { if (wordVariant.EndsWith(toEnglish)) { if (homophoneGroup.GetShortestVariant(wordVariant).EndsWith(fromEnglish) && wordVariant != homophoneGroup.GetShortestVariant(wordVariant)) { if (homophoneGroup.PhoneticValue.EndsWith(fromPhonetic)) { phoneticValue = ReplaceEnding(homophoneGroup.PhoneticValue, fromPhonetic, toPhonetic); homophoneGroup.Remove(wordVariant); phoneticTable.Add(wordVariant, phoneticValue); } } } } } }
private string GetWordEndsWith(string englishEnding, PhoneticTable phoneticTable) { foreach (HomophoneGroup homophoneGroup in phoneticTable) { foreach (string currentWord in homophoneGroup) { if (currentWord.EndsWith(englishEnding)) { return phoneticTable.GetPhoneticValueOf(currentWord); } } } return null; }
public void Repair(PhoneticTable phoneticTable) { int countBeforeRepair; do { countBeforeRepair = phoneticTable.Count; endingRepairerS.Repair(phoneticTable); endingRepairerEd.Repair(phoneticTable); endingRepairerLy.Repair(phoneticTable); endingRepairerIsmToIst.Repair(phoneticTable); endingRepairerIstToIsm.Repair(phoneticTable); endingRepairerYtoIes.Repair(phoneticTable); endingRepairerAteToAtion.Repair(phoneticTable); endingRepairerAteToAting.Repair(phoneticTable); endingRepairerAteToAtive.Repair(phoneticTable); endingRepairerAteToAtor.Repair(phoneticTable); endingRepairerYToIc.Repair(phoneticTable); endingReplacer.ReplaceEnding(phoneticTable, "y", "[emacr]", "ically", "[ibreve] [dash] [kreg] [schwa] [lreg] [emacr]"); endingReplacer.ReplaceEnding(phoneticTable, "ic", "[ibreve] [kreg]", "ically", "[ibreve] [dash] [kreg] [schwa] [lreg] [emacr]"); endingReplacer.ReplaceEnding(phoneticTable, "ate", "[amacr] [treg] [lprime]", "ations", "[amacr] [prime] [sreg] [hreg] [schwa] [nreg] [zreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ate", "[amacr] [treg] [lprime]", "ators", "[amacr] [lprime] [treg] [schwa] [rreg] [zreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ize", "[imacr] [zreg] [lprime]", "izing", "[imacr] [lprime] [zreg] [ibreve] [nreg] [greg]"); endingReplacer.ReplaceEnding(phoneticTable, "ize", "[imacr] [zreg] [lprime]", "ising", "[imacr] [lprime] [zreg] [ibreve] [nreg] [greg]"); endingReplacer.ReplaceEnding(phoneticTable, "ize", "[imacr] [zreg] [lprime]", "ised", "[imacr] [zreg] [lprime] [dreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ism", "[ibreve] [zreg] [lprime] [schwa] [mreg]", "istic", "[ibreve] [sreg] [treg] [ibreve] [kreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ism", "[ibreve] [zreg] [lprime] [schwa] [mreg]", "istically", "[ibreve] [sreg] [treg] [ibreve] [kreg] [schwa] [dash] [lreg] [emacr]"); endingReplacer.ReplaceEnding(phoneticTable, "p", "[preg] [prime]", "pped", "[preg] [treg]"); endingReplacer.ReplaceEnding(phoneticTable, "p", "[preg] [prime]", "pper", "[preg] [prime] [schwa] [rreg]"); endingReplacer.ReplaceEnding(phoneticTable, "p", "[preg] [prime]", "ppers", "[preg] [prime] [schwa] [rreg] [zreg]"); endingReplacer.ReplaceEnding(phoneticTable, "p", "[preg] [prime]", "pping", "[preg] [ibreve] [nreg] [greg]"); endingReplacer.ReplaceEnding(phoneticTable, "p", "[preg]", "pped", "[preg] [treg]"); endingReplacer.ReplaceEnding(phoneticTable, "p", "[preg]", "pper", "[preg] [prime] [schwa] [rreg]"); endingReplacer.ReplaceEnding(phoneticTable, "p", "[preg]", "ppers", "[preg] [prime] [schwa] [rreg] [zreg]"); endingReplacer.ReplaceEnding(phoneticTable, "p", "[preg]", "pping", "[preg] [ibreve] [nreg] [greg]"); endingReplacer.ReplaceEnding(phoneticTable, "fy", "[freg] [imacr] [lprime]", "fied", "[freg] [imacr] [dreg] [lprime]"); endingReplacer.ReplaceEnding(phoneticTable, "fy", "[freg] [imacr] [lprime]", "fication", "[freg] [ibreve] [dash] [kreg] [amacr] [prime] [sreg] [hreg] [schwa] [nreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ist", "[ibreve] [dash] [sreg] [treg]", "istical", "[ibreve] [sreg] [prime] [treg] [ibreve] [dash] [kreg] [schwa] [lreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ist", "[ibreve] [sreg] [treg]", "istical", "[ibreve] [sreg] [prime] [treg] [ibreve] [dash] [kreg] [schwa] [lreg]"); endingReplacer.ReplaceEnding(phoneticTable, "er", "[schwa] [rreg]", "erous", "[schwa] [rreg] [dash] [schwa] [sreg]"); endingReplacer.ReplaceEnding(phoneticTable, "er", "[schwa] [rreg]", "ering", "[schwa] [rreg] [ibreve] [nreg] [greg]"); endingReplacer.ReplaceEnding(phoneticTable, "er", "[schwa] [rreg]", "erings", "[schwa] [rreg] [ibreve] [nreg] [greg] [zreg]"); endingReplacer.ReplaceEnding(phoneticTable, "y", "[emacr]", "iness", "[emacr] [nreg] [ebreve] [sreg]"); endingReplacer.ReplaceEnding(phoneticTable, "y", "[emacr]", "ier", "[imacr] [lprime] [schwa] [rreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ant", "[schwa] [nreg] [treg]", "ance", "[schwa] [nreg] [sreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ent", "[schwa] [nreg] [treg]", "ence", "[schwa] [nreg] [sreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ant", "[schwa] [nreg] [treg]", "ancy", "[schwa] [nreg] [dash] [sreg] [emacr]"); endingReplacer.ReplaceEnding(phoneticTable, "ent", "[schwa] [nreg] [treg]", "ency", "[schwa] [nreg] [dash] [sreg] [emacr]"); endingReplacer.ReplaceEnding(phoneticTable, "ance", "[schwa] [nreg] [sreg]", "ant", "[schwa] [nreg] [treg]"); endingReplacer.ReplaceEnding(phoneticTable, "ence", "[schwa] [nreg] [sreg]", "ent", "[schwa] [nreg] [treg]"); endingReplacer.ReplaceEnding(phoneticTable, "ancy", "[schwa] [nreg] [dash] [sreg] [emacr]", "ant", "[schwa] [nreg] [treg]"); endingReplacer.ReplaceEnding(phoneticTable, "ency", "[schwa] [nreg] [dash] [sreg] [emacr]", "ent", "[schwa] [nreg] [treg]"); endingReplacer.ReplaceEnding(phoneticTable, "tion", "[sreg] [hreg] [schwa] [nreg]", "tive", "[treg] [ibreve] [vreg]"); endingReplacer.ReplaceEnding(phoneticTable, "tive", "[treg] [ibreve] [vreg]", "tion", "[sreg] [hreg] [schwa] [nreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ia", "[emacr] [dash] [schwa]", "ic", "[ibreve] [kreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ic", "[ibreve] [kreg]", "ia", "[emacr] [dash] [schwa]"); endingReplacer.ReplaceEnding(phoneticTable, "ists", "[ibreve] [sreg] [treg] [sreg]", "isms", "[ibreve] [zreg] [lprime] [schwa] [mreg] [zreg]"); endingReplacer.ReplaceEnding(phoneticTable, "isms", "[ibreve] [zreg] [lprime] [schwa] [mreg] [zreg]", "ists", "[ibreve] [sreg] [treg] [sreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ist", "[ibreve] [sreg] [treg]", "isms", "[ibreve] [zreg] [lprime] [schwa] [mreg] [zreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ism", "[ibreve] [zreg] [lprime] [schwa] [mreg]", "ists", "[ibreve] [sreg] [treg] [sreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ble", "[breg] [schwa] [lreg]", "bility", "[breg] [ibreve] [lreg] [prime] [ibreve] [dash] [treg] [emacr]"); endingReplacer.ReplaceEnding(phoneticTable, "ize", "[imacr] [zreg] [lprime]", "ization", "[ibreve] [dash] [zreg] [amacr] [prime] [sreg] [hreg] [schwa] [nreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ia", "[emacr] [dash] [schwa]", "ian", "[emacr] [dash] [schwa] [nreg]"); endingReplacer.ReplaceEnding(phoneticTable, "ian", "[emacr] [dash] [schwa] [nreg]", "ia", "[emacr] [dash] [schwa]"); RepairEnding(phoneticTable, "ing", "[ibreve] [nreg] [greg]"); RepairEnding(phoneticTable, "er", "[schwa] [rreg]"); RepairEnding(phoneticTable, "ers", "[schwa] [rreg] [zreg]"); RepairEnding(phoneticTable, "r", "[schwa] [rreg]"); RepairEnding(phoneticTable, "or", "[schwa] [rreg]"); RepairEnding(phoneticTable, "ic", "[ibreve] [kreg]"); RepairEnding(phoneticTable, "ity", "[ibreve] [dash] [treg] [emacr]"); RepairEnding(phoneticTable, "ism", "[ibreve] [zreg] [lprime] [schwa] [mreg]"); RepairEnding(phoneticTable, "ist", "[ibreve] [dash] [sreg] [treg]"); RepairEnding(phoneticTable, "ness", "[nreg] [ebreve] [sreg]"); RepairEnding(phoneticTable, "less", "[lreg] [ebreve] [sreg]"); RepairEnding(phoneticTable, "est", "[ebreve] [sreg] [treg]"); RepairEnding(phoneticTable, "st", "[ebreve] [sreg] [treg]"); RepairEnding(phoneticTable, "istic", "[ibreve] [sreg] [treg] [ibreve] [kreg]"); RepairEnding(phoneticTable, "ically", "[ibreve] [dash] [kreg] [schwa] [lreg] [emacr]"); RepairEnding(phoneticTable, "ation", "[amacr] [prime] [sreg] [hreg] [schwa] [nreg]"); RepairEnding(phoneticTable, "ations", "[amacr] [prime] [sreg] [hreg] [schwa] [nreg] [zreg]"); RepairEnding(phoneticTable, "al", "[lreg]"); RepairEnding(phoneticTable, "ally", "[schwa] [dash] [lreg] [emacr]"); RepairEnding(phoneticTable, "ment", "[mreg] [ebreve] [nreg] [prime] [treg]"); } while (phoneticTable.Count != countBeforeRepair); #warning Implement Repair() for other cases //endingRepairerLy (l or no l at the end) //ist to ism and vice versae even when no ist or ism counterpart available //involuntary ->involuntarily //invulnerable -> invulnerability //evict -> eviction /* //RepairEnding(phoneticTable, "ator"); RepairEnding(phoneticTable, "ly"); RepairEnding(phoneticTable, "ion"); RepairEnding(phoneticTable, "es"); RepairEnding(phoneticTable, "ence", "ent");*/ //militant -> militance //militant -> militancy }
private void RepairEnding(PhoneticTable phoneticTable, string englishEnding, string phoneticEnding) { phoneticEnding = phoneticEnding.Trim(); foreach (HomophoneGroup homophoneGroup in new List<HomophoneGroup>(phoneticTable)) { foreach (string wordVariant in new HashSet<string>(homophoneGroup)) { if (wordVariant == homophoneGroup.GetShortestVariant(wordVariant) + englishEnding && wordVariant != homophoneGroup.GetShortestVariant(wordVariant)) { homophoneGroup.Remove(wordVariant); phoneticTable.Add(wordVariant, homophoneGroup.PhoneticValue + " " + phoneticEnding); } } } }
public void Trim(PhoneticTable phoneticTable) { foreach (HomophoneGroup homophoneGroup in phoneticTable) Trim(homophoneGroup); }
/// <summary> /// Get phonetic value by splitting or null if fails /// </summary> /// <param name="word">word for which we need phonetic value</param> /// <param name="phoneticTable">phonetic table</param> /// <returns>phonetic value by splitting or null if fails</returns> public string TrySplit(string word, PhoneticTable phoneticTable) { #warning Implement TrySplit() (return null only if fails) return null; }