public List <CorrectionCandidate> GetCandidatesFromAffixCorrection(int key, string error, out string log) { //[prefix-1] + [prefix-2] + root + [suffix] + [possessive] + [particle] //1.Particles: -lah, -kah, -pun, -tah. //2.Possessives: -ku, -mu, -nya. //3.Suffixes: -i, -an, -kan. //4.Prefixes: meN -, beN -, peN -, teN -, di -, ke -, se -. log = ""; List <string> Prefixs = new List <string>() { "", "di", "ke", "se", "ber", "bel", "be", "te", "ter", "me", "mem", "men", "meng", " menge", "meny", "pe", "per", "pem", "pen", "peng", "penge", "peny", "pel", "memper", "mempel", "menter", "member", "diper", "diter", "dipel", " diber", "keber", "keter" }; List <string> baseSuffixes = new List <string>() { "i", "an", "kan" }; List <string> possessives = new List <string>() { "ku", "mu", "nya" }; List <string> particles = new List <string>() { "lah", "kah", "pun", "tah" }; //List<string> akhirans = new List<string>() { "", "i", "an", "kan", "ku", "mu", "nja", "lah", "kah", "pun", "tah", "iku", "imu", "inja", "anku", "anmu", "annja", "kanku", "kanmu", "kannja", "ilah", "ikah", "ipun", "itah", "anlah", "ankah", "anpun", "antah", "kanlah", "kankah", "kanpun", "kantah", "kulah", "kukah", "kupun", "kutah", " mulah", "mukah", "mupun", "mutah", " nyalah", "nyakah", "nyapun", "nyatah", "ikulah", "ikukah", "ikupun", "ikutah", "imulah", "imukah", "imupun", "imutah", "inyalah", "inyakah", "inyapun", "inyatah", "ankulah", "ankukah", "ankupun", "ankutah", "anmulah", "anmukah", "anmupun", "anmutah", "annyalah", "annyakah", "annyapun", "annyatah", "kankulah", "kankukah", "kankupun", "kankutah", "kanmulah", "kanmukah", "kanmupun", "kanmutah", "kannjalah", "kannjakah", "kannjapun", "kannjatah" }; List <string> suffixes = new List <string>() { "" }; suffixes.AddRange(baseSuffixes); suffixes.AddRange(possessives); suffixes.AddRange(particles); foreach (string s in baseSuffixes) { foreach (string po in possessives) { suffixes.Add(s + po); // contoh: diperbaikinya foreach (string pa in particles) { suffixes.Add(s + pa); //contoh:dipelukanmulah } } } foreach (string s in baseSuffixes) { foreach (string pa in particles) { suffixes.Add(s + pa); // contoh: pertahankanlah } } List <CorrectionCandidate> candidates = new List <CorrectionCandidate>(); Dictionary <string, int> dicCandidates = new Dictionary <string, int>(); Correction correct = new Correction(); string rootWord = GetRootWord(correct.ChangeOldToNewSpell(error)); if (rootWord == "" || rootWord.Length < 3) { return(candidates); } foreach (string prefix in Prefixs) { foreach (string suffix in suffixes) { Affixer affixer = new Affixer(); string candidate = correct.ChangeNewToOldSpell(affixer.Affixing(correct.ChangeNewToOldSpell(rootWord), prefix, suffix)); int levenshtein = EditDistance.LevenshteinDistance(candidate, error, 2); if (levenshtein != -1 && levenshtein <= 2) { if (!dicCandidates.ContainsKey(correct.ChangeOldToNewSpell(candidate))) { dicCandidates.Add(correct.ChangeOldToNewSpell(candidate), levenshtein); } } } } if (dicCandidates.Count == 0) { return(candidates); } Dictionary <string, int> dicCandidateAndFreq = GetFrequencies(dicCandidates.Keys.ToArray()); foreach (KeyValuePair <string, int> can in dicCandidates) { int frequency = 0; if (dicCandidateAndFreq.ContainsKey(can.Key)) { frequency = dicCandidateAndFreq[can.Key]; } CorrectionCandidate corrcandidate = new CorrectionCandidate { Key = key, Error = error, Candidate = can.Key, SameBigramAmount = -1, Frequency = frequency, LengthDifference = Math.Abs(can.Key.Length - correct.ChangeOldToNewSpell(error).Length), Levensthein = can.Value }; candidates.Add(corrcandidate); log += can.Key + "," + can.Value + "," + frequency.ToString() + ";"; } if (log.Length > 0) { log = "[" + log + "]"; } return(candidates); }
public List <CorrectionCandidate> GetCandidates(string spName, int key, string error, string root, string prefix, string suffix, int minSameBigramAmount, int minLengthVariant, int maxLevensthein, out string log) { // sample: call getCandidates('depat',2,0,1); log = ""; List <CorrectionCandidate> lsCandidates = new List <CorrectionCandidate>(); MySqlConnection conn = new MySqlConnection(); conn.ConnectionString = MariaDBConn; MySqlCommand cmd = new MySqlCommand(); try { conn.Open(); cmd.Connection = conn; cmd.CommandText = spName; cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.AddWithValue("@Word", root); cmd.Parameters["@Word"].Direction = ParameterDirection.Input; cmd.Parameters.AddWithValue("@MinSameBigramAmount", minSameBigramAmount); cmd.Parameters["@MinSameBigramAmount"].Direction = ParameterDirection.Input; cmd.Parameters.AddWithValue("@MinLengthVariant", minLengthVariant); cmd.Parameters["@MinLengthVariant"].Direction = ParameterDirection.Input; cmd.Parameters.AddWithValue("@MaxLevensthein", maxLevensthein); cmd.Parameters["@MaxLevensthein"].Direction = ParameterDirection.Input; cmd.Parameters.AddWithValue("@IsLemma", 1); cmd.Parameters["@IsLemma"].Direction = ParameterDirection.Input; MySqlDataReader dataReader = cmd.ExecuteReader(); while (dataReader.Read()) { string stem = dataReader["Unigram"].ToString(); Affixer affixer = new Affixer(); string sCandidate = affixer.Affixing(stem, prefix, suffix); int levensthein = EditDistance.LevenshteinDistance(sCandidate, error, 2); if (levensthein != -1 && levensthein <= 2) { CorrectionCandidate candidate = new CorrectionCandidate { Key = key, Error = error, Candidate = sCandidate, SameBigramAmount = Convert.ToInt32(dataReader["SameBigramAmount"]), Frequency = 0, LengthDifference = Convert.ToInt32(dataReader["LengthDifference"]), Levensthein = levensthein }; lsCandidates.Add(candidate); } } //close Data Reader dataReader.Close(); } catch (MySql.Data.MySqlClient.MySqlException ex) { throw new Exception(ex.Message); } conn.Close(); // Find Frequency then Update to list: List <string> sCandidates = new List <string>();; foreach (CorrectionCandidate cc in lsCandidates) { sCandidates.Add(cc.Candidate); } Dictionary <string, int> dicCandidateAndFreq = GetFrequencies(sCandidates.ToArray()); foreach (CorrectionCandidate candidate in lsCandidates) { if (dicCandidateAndFreq.ContainsKey(candidate.Candidate)) { candidate.Frequency = dicCandidateAndFreq[candidate.Candidate]; } log += candidate.Candidate + "," + candidate.Levensthein + "," + candidate.Frequency.ToString() + ";"; } log = string.Format("[{0},{1},{2}][{3}]", minSameBigramAmount, minLengthVariant, maxLevensthein, log); return(lsCandidates); }