public static string Write(CedictEntry entry) { StringBuilder sb = new StringBuilder(); sb.Append(entry.ChTrad); sb.Append(' '); sb.Append(entry.ChSimpl); sb.Append(" ["); for (int i = 0; i != entry.PinyinCount; ++i) { if (i != 0) { sb.Append(' '); } sb.Append(entry.GetPinyinAt(i).GetDisplayString(false)); } sb.Append("] /"); for (int i = 0; i != entry.SenseCount; ++i) { string sense = entry.GetSenseAt(i).GetPlainText(); if (sense.Contains('/')) { sense = sense.Replace('/', '\\'); } sb.Append(sense); sb.Append('/'); } return(sb.ToString()); }
public static void Write(CedictEntry entry, out string head, out string trg) { StringBuilder sbHead = new StringBuilder(); sbHead.Append(entry.ChTrad); sbHead.Append(' '); sbHead.Append(entry.ChSimpl); sbHead.Append(" ["); for (int i = 0; i != entry.PinyinCount; ++i) { if (i != 0) { sbHead.Append(' '); } sbHead.Append(entry.GetPinyinAt(i).GetDisplayString(false)); } sbHead.Append("]"); head = sbHead.ToString(); StringBuilder sbTrg = new StringBuilder(); sbTrg.Append('/'); for (int i = 0; i != entry.SenseCount; ++i) { string sense = entry.GetSenseAt(i).GetPlainText(); if (sense.Contains('/')) { sense = sense.Replace('/', '\\'); } sbTrg.Append(sense); sbTrg.Append('/'); } trg = sbTrg.ToString(); }
/// <summary> /// Retrieves pinyin lookup candidates, verifies actual presence of search expression in headword. /// </summary> List <ResWithEntry> doLoadVerifyPinyin(BinReader br, IEnumerable <int> poss, List <PinyinSyllable> sylls) { List <ResWithEntry> resList = new List <ResWithEntry>(); // Yes, we only open our file on-demand // But we do this within each lookup's scope, so lookup stays thread-safe // Look at each entry: load, verify, keep or drop foreach (int pos in poss) { // Load up entry from file br.Position = pos; CedictEntry entry = new CedictEntry(br); // Find query syllables in entry int syllStart = -1; for (int i = 0; i <= entry.PinyinCount - sylls.Count; ++i) { int j; for (j = 0; j != sylls.Count; ++j) { PinyinSyllable syllEntry = entry.GetPinyinAt(i + j); PinyinSyllable syllQuery = sylls[j]; if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text) { break; } if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone) { break; } } if (j == sylls.Count) { syllStart = i; break; } } // Entry is a keeper if query syllables found if (syllStart == -1) { continue; } // Drop if there's any unprintable Hanzi if (!areHanziCovered(entry)) { continue; } // Keeper! CedictResult res = new CedictResult(pos, entry.HanziPinyinMap, syllStart, sylls.Count); ResWithEntry resWE = new ResWithEntry(res, entry); resList.Add(resWE); } return(resList); }
public static string WritePinyin(CedictEntry entry) { StringBuilder sb = new StringBuilder(); for (int i = 0; i != entry.PinyinCount; ++i) { if (i != 0) { sb.Append(' '); } sb.Append(entry.GetPinyinAt(i).GetDisplayString(false)); } return(sb.ToString()); }
private List <ResWithEntry> retrieveVerifyPinyin(List <int> cands, List <PinyinSyllable> qsylls) { List <ResWithEntry> resList = new List <ResWithEntry>(); foreach (int blobId in cands) { // Load entry from DB CedictEntry entry = loadFromBlob(blobId); // Find query syllables in entry int syllStart = -1; for (int i = 0; i <= entry.PinyinCount - qsylls.Count; ++i) { int j; for (j = 0; j != qsylls.Count; ++j) { PinyinSyllable syllEntry = entry.GetPinyinAt(i + j); PinyinSyllable syllQuery = qsylls[j]; if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text) { break; } if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone) { break; } } if (j == qsylls.Count) { syllStart = i; break; } } // Entry is a keeper if query syllables found if (syllStart == -1) { continue; } // Keeper! CedictResult cres = new CedictResult(blobId, entry.HanziPinyinMap, syllStart, qsylls.Count); ResWithEntry resWE = new ResWithEntry(cres, entry); resList.Add(resWE); } return(resList); }
private void verifyPinyin(CedictEntry entry, int entryId, List <PinyinSyllable> sylls, List <CedictResult> res) { // Find query syllables in entry int syllStart = -1; for (int i = 0; i <= entry.PinyinCount - sylls.Count; ++i) { int j; for (j = 0; j != sylls.Count; ++j) { PinyinSyllable syllEntry = entry.GetPinyinAt(i + j); PinyinSyllable syllQuery = sylls[j]; if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text) { break; } if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone) { break; } } if (j == sylls.Count) { syllStart = i; break; } } // Entry is a keeper if query syllables found if (syllStart == -1) { return; } // Keeper! CedictResult cr = new CedictResult(entry, entry.HanziPinyinMap, syllStart, sylls.Count); res.Add(cr); }
/// <summary> /// <para>Makes an array of headword syllables from entry's data.</para> /// <para>Result has same length as entry's headword.</para> /// </summary> private void addHeadIfNew(List <HeadwordSyll[]> cdHeads, CedictEntry entry, bool unihanFilter) { // The new headword: with pinyin lower-cased HeadwordSyll[] res = new HeadwordSyll[entry.ChSimpl.Length]; for (int i = 0; i != res.Length; ++i) { string pyLower = entry.GetPinyinAt(i).GetDisplayString(true); // Do not lower-case single latin letter if (pyLower.Length == 1 && pyLower[0] >= 'A' && pyLower[0] <= 'Z') { /* NOP */ } else { pyLower = pyLower.ToLowerInvariant(); } res[i] = new HeadwordSyll(entry.ChSimpl[i], entry.ChTrad[i], pyLower); } // Is it already on list? // Do traditional chars make sense? UniHanziInfo[] uhis = null; if (unihanFilter) { char[] simp = new char[entry.ChSimpl.Length]; for (int i = 0; i != simp.Length; ++i) { simp[i] = entry.ChSimpl[i]; } uhis = GetUnihanInfo(simp); } bool toSkip = false; foreach (HeadwordSyll[] x in cdHeads) { // Only add if new bool different = false; for (int i = 0; i != res.Length; ++i) { if (x[i].Simp != res[i].Simp) { different = true; break; } if (x[i].Trad != res[i].Trad) { different = true; break; } if (x[i].Pinyin != res[i].Pinyin) { different = true; break; } } if (!different) { toSkip = true; break; } } // Drop those where traditional character is odd if (unihanFilter) { for (int i = 0; i != res.Length; ++i) { if (uhis[i] == null) { continue; } if (Array.IndexOf(uhis[i].TradVariants, res[i].Trad) < 0) { toSkip = true; break; } } } // If traditionals chars are OK and HW is new, add if (!toSkip) { cdHeads.Add(res); } }
public IActionResult GetEditEntryData([FromQuery] string entryId, [FromQuery] string lang) { if (entryId == null || lang == null) { return(StatusCode(400, "Missing parameter(s).")); } // The data we'll return. EditEntryData res = new EditEntryData(); // Is this an authenticated user? int userId; string userName; auth.CheckSession(HttpContext.Request.Headers, out userId, out userName); // Can she approve entries? if (userId != -1) { res.CanApprove = auth.CanApprove(userId); } // Retrieve entry int idVal = EntryId.StringToId(entryId); string hw, trg; EntryStatus status; SqlDict.GetEntryById(idVal, out hw, out trg, out status); CedictParser parser = new CedictParser(); CedictEntry entry = parser.ParseEntry(hw + " " + trg, 0, null); res.Status = status.ToString().ToLowerInvariant(); res.HeadSimp = entry.ChSimpl; res.HeadTrad = entry.ChTrad; res.HeadPinyin = ""; for (int i = 0; i != entry.PinyinCount; ++i) { if (res.HeadPinyin.Length > 0) { res.HeadPinyin += " "; } var pys = entry.GetPinyinAt(i); res.HeadPinyin += pys.GetDisplayString(false); } res.TrgTxt = trg.Trim('/').Replace('/', '\n').Replace('\\', '/'); // Entry HTML entry.Status = status; EntryRenderer er = new EntryRenderer(lang, entry, true, "mainEntry"); er.OneLineHanziLimit = 12; StringBuilder sb = new StringBuilder(); er.Render(sb, null); res.EntryHtml = sb.ToString(); // Entry history List <ChangeItem> changes = SqlDict.GetEntryChanges(idVal); sb.Clear(); HistoryRenderer.RenderEntryChanges(sb, hw, trg, status, changes, lang); res.HistoryHtml = sb.ToString(); return(new ObjectResult(res)); }
private bool validateHeadword(string lang, string simp, string trad, string pinyin, List <HeadwordProblem> errorsSimp, List <HeadwordProblem> errorsTrad, List <HeadwordProblem> errorsPinyin) { var tprov = TextProvider.Instance; string msg; // Check each simplified: is it really simplified? UniHanziInfo[] uhiSimp = langRepo.GetUnihanInfo(simp); for (int i = 0; i != uhiSimp.Length; ++i) { var uhi = uhiSimp[i]; if (!uhi.CanBeSimp) { msg = tprov.GetString(lang, "editEntry.hwProblemNotSimplified"); msg = string.Format(msg, simp[i]); errorsSimp.Add(new HeadwordProblem(false, msg)); } } // Check each traditional: is it really traditional? UniHanziInfo[] uhiTrad = langRepo.GetUnihanInfo(trad); for (int i = 0; i != uhiTrad.Length; ++i) { var uhi = uhiTrad[i]; // Traditional chars are listed as their own traditional variant if (Array.IndexOf(uhi.TradVariants, trad[i]) < 0) { msg = tprov.GetString(lang, "editEntry.hwProblemNotTraditional"); msg = string.Format(msg, trad[i]); errorsTrad.Add(new HeadwordProblem(false, msg)); } } // Check each traditional against its simplified friend if (trad.Length != simp.Length) { errorsTrad.Add(new HeadwordProblem(true, tprov.GetString(lang, "editEntry.hwProblemSimpTradCounts"))); } else { for (int i = 0; i != uhiSimp.Length; ++i) { var uhi = uhiSimp[i]; if (Array.IndexOf(uhi.TradVariants, trad[i]) < 0) { msg = tprov.GetString(lang, "editEntry.hwProblemNotTradForSimp"); msg = string.Format(msg, simp[i], trad[i]); errorsTrad.Add(new HeadwordProblem(false, msg)); } } } // Normalize pinyin (multiple spaces, leading/trailing spaces) string pyNorm = pinyin; while (true) { string x = pyNorm.Replace(" ", " "); if (x == pyNorm) { break; } pyNorm = x; } pyNorm = pyNorm.Trim(); if (pyNorm != pinyin) { errorsPinyin.Add(new HeadwordProblem(true, tprov.GetString(lang, "editEntry.hwProblemExtraSpacesPinyin"))); } // Try to match up normalized pinyin with simplified Hanzi CedictParser parser = new CedictParser(); CedictEntry ee = null; try { ee = parser.ParseEntry(trad + " " + simp + " [" + pyNorm + "] /x/", 0, null); } catch { } if (ee == null) { errorsPinyin.Add(new HeadwordProblem(true, tprov.GetString(lang, "editEntry.hwProblemInvalidPinyin"))); } else { if (simp.Length == ee.ChSimpl.Length) { for (int i = 0; i != uhiSimp.Length; ++i) { var uhi = uhiSimp[i]; var py = ee.GetPinyinAt(i); var cnt = uhi.Pinyin.Count(x => x.GetDisplayString(false) == py.GetDisplayString(false)); if (cnt == 0) { msg = tprov.GetString(lang, "editEntry.hwProblemWrongPinyin"); msg = string.Format(msg, py.GetDisplayString(false), simp[i]); errorsPinyin.Add(new HeadwordProblem(false, msg)); } } } } return(ee != null); }