private bool canAddWord(string query) { int userId; string userName; auth.CheckSession(HttpContext.Request.Headers, out userId, out userName); if (userId < 0) { return(false); } if (query.Length > 6) { return(false); } UniHanziInfo[] uhis = langRepo.GetUnihanInfo(query); foreach (var uhi in uhis) { if (uhi == null) { return(false); } if (!uhi.CanBeSimp) { return(false); } } return(true); }
private bool validateHeadword(string lang, string simp, string trad, string pinyin, List <HeadwordProblem> errorsSimp, List <HeadwordProblem> errorsTrad, List <HeadwordProblem> errorsPinyin) { var tprov = TextProvider.Instance; string msg; // Check each simplified: is it really simplified? UniHanziInfo[] uhiSimp = langRepo.GetUnihanInfo(simp); for (int i = 0; i != uhiSimp.Length; ++i) { var uhi = uhiSimp[i]; if (!uhi.CanBeSimp) { msg = tprov.GetString(lang, "editEntry.hwProblemNotSimplified"); msg = string.Format(msg, simp[i]); errorsSimp.Add(new HeadwordProblem(false, msg)); } } // Check each traditional: is it really traditional? UniHanziInfo[] uhiTrad = langRepo.GetUnihanInfo(trad); for (int i = 0; i != uhiTrad.Length; ++i) { var uhi = uhiTrad[i]; // Traditional chars are listed as their own traditional variant if (Array.IndexOf(uhi.TradVariants, trad[i]) < 0) { msg = tprov.GetString(lang, "editEntry.hwProblemNotTraditional"); msg = string.Format(msg, trad[i]); errorsTrad.Add(new HeadwordProblem(false, msg)); } } // Check each traditional against its simplified friend if (trad.Length != simp.Length) { errorsTrad.Add(new HeadwordProblem(true, tprov.GetString(lang, "editEntry.hwProblemSimpTradCounts"))); } else { for (int i = 0; i != uhiSimp.Length; ++i) { var uhi = uhiSimp[i]; if (Array.IndexOf(uhi.TradVariants, trad[i]) < 0) { msg = tprov.GetString(lang, "editEntry.hwProblemNotTradForSimp"); msg = string.Format(msg, simp[i], trad[i]); errorsTrad.Add(new HeadwordProblem(false, msg)); } } } // Normalize pinyin (multiple spaces, leading/trailing spaces) string pyNorm = pinyin; while (true) { string x = pyNorm.Replace(" ", " "); if (x == pyNorm) { break; } pyNorm = x; } pyNorm = pyNorm.Trim(); if (pyNorm != pinyin) { errorsPinyin.Add(new HeadwordProblem(true, tprov.GetString(lang, "editEntry.hwProblemExtraSpacesPinyin"))); } // Try to match up normalized pinyin with simplified Hanzi CedictParser parser = new CedictParser(); CedictEntry ee = null; try { ee = parser.ParseEntry(trad + " " + simp + " [" + pyNorm + "] /x/", 0, null); } catch { } if (ee == null) { errorsPinyin.Add(new HeadwordProblem(true, tprov.GetString(lang, "editEntry.hwProblemInvalidPinyin"))); } else { if (simp.Length == ee.ChSimpl.Length) { for (int i = 0; i != uhiSimp.Length; ++i) { var uhi = uhiSimp[i]; var py = ee.GetPinyinAt(i); var cnt = uhi.Pinyin.Count(x => x.GetDisplayString(false) == py.GetDisplayString(false)); if (cnt == 0) { msg = tprov.GetString(lang, "editEntry.hwProblemWrongPinyin"); msg = string.Format(msg, py.GetDisplayString(false), simp[i]); errorsPinyin.Add(new HeadwordProblem(false, msg)); } } } } return(ee != null); }
/// <summary> /// Retrieves information about (simplified) hanzi. /// </summary> public IActionResult ProcessSimp([FromQuery] string simp) { if (simp == null) { simp = ""; } NewEntryProcessSimpResult res = new NewEntryProcessSimpResult(); // Prepare result: as long as input; empty array for each position foreach (char c in simp) { res.Trad.Add(new List <string>()); res.Pinyin.Add(new List <string>()); } // Do we have CEDICT headwords for this simplified HW? // If yes, put first headword's traditional and pinyin into first layer of result // Fill rest of the alternatives with input from additional results HeadwordSyll[][] chHeads = langRepo.GetPossibleHeadwords(simp, false); for (int i = 0; i != chHeads.Length; ++i) { HeadwordSyll[] sylls = chHeads[i]; for (int j = 0; j != simp.Length; ++j) { addIfNew(res.Trad[j], sylls[j].Trad.ToString()); addIfNew(res.Pinyin[j], sylls[j].Pinyin); } } // Unihan lookup UniHanziInfo[] uhis = langRepo.GetUnihanInfo(simp); // We had no headword: build from Unihan data, but with a twist // Make sure first traditional matches most common pinyin if (chHeads.Length == 0) { for (int i = 0; i != uhis.Length; ++i) { UniHanziInfo uhi = uhis[i]; if (uhi == null) { continue; } // Add pinyin readings first foreach (PinyinSyllable syll in uhi.Pinyin) { addIfNew(res.Pinyin[i], syll.GetDisplayString(true)); } // Look up traditional chars for this position UniHanziInfo[] tradUhis = langRepo.GetUnihanInfo(uhi.TradVariants); // Find "best" traditional character: the first one whose pinyin readings include our first pinyin char firstTrad = (char)0; string favoritePinyin = uhi.Pinyin[0].GetDisplayString(true); if (tradUhis != null) { for (int tx = 0; tx != uhi.TradVariants.Length; ++tx) { UniHanziInfo tradUhi = tradUhis[tx]; if (tradUhi == null) { continue; } bool hasFavoritePinyin = false; foreach (PinyinSyllable py in tradUhi.Pinyin) { if (py.GetDisplayString(true) == favoritePinyin) { hasFavoritePinyin = true; break; } } if (hasFavoritePinyin) { firstTrad = uhi.TradVariants[tx]; break; } } } // Add first traditional, if found if (firstTrad != (char)0) { addIfNew(res.Trad[i], firstTrad.ToString()); } // Add all the remaining traditional variants foreach (char c in uhi.TradVariants) { addIfNew(res.Trad[i], c.ToString()); } } } // We had a headword: fill remaining slots with traditional and pinyin items from Unihan else { res.IsKnownHeadword = true; for (int i = 0; i != uhis.Length; ++i) { UniHanziInfo uhi = uhis[i]; if (uhi == null) { continue; } foreach (char c in uhi.TradVariants) { addIfNew(res.Trad[i], c.ToString()); } foreach (PinyinSyllable syll in uhi.Pinyin) { addIfNew(res.Pinyin[i], syll.GetDisplayString(true)); } } } // Filter pinyin: only keep those that work with traditional on the first spot // Unless intersection is empty - can also happen in this weird world for (int i = 0; i != simp.Length; ++i) { List <string> pyList = res.Pinyin[i]; if (pyList.Count < 2) { continue; } List <string> tradList = res.Trad[i]; if (tradList.Count == 0) { continue; } List <string> toRem = new List <string>(); UniHanziInfo[] tradUhis = langRepo.GetUnihanInfo(new char[] { tradList[0][0] }); if (tradUhis == null || tradUhis[0] == null) { continue; } List <string> pinyinsOfTrad = new List <string>(); foreach (var x in tradUhis[0].Pinyin) { pinyinsOfTrad.Add(x.GetDisplayString(true)); } // If we had a match, start from second: don't want to remove what just came from CEDICT for (int j = res.IsKnownHeadword ? 1 : 0; j < pyList.Count; ++j) { string py = pyList[j]; if (!pinyinsOfTrad.Contains(py)) { toRem.Add(py); } } if (toRem.Count == pyList.Count) { continue; } foreach (string py in toRem) { pyList.Remove(py); } } // Check if there are positions where we have no tradition or pinyin // For the purposes of this lookup, we just inject character from input there for (int i = 0; i != simp.Length; ++i) { char c = simp[i]; if (res.Trad[i].Count == 0) { res.Trad[i].Add(c.ToString()); } if (res.Pinyin[i].Count == 0) { res.Pinyin[i].Add(c.ToString()); } } // Tell our caller return(new ObjectResult(res)); }