/// <summary> /// Adds a single new entry to the dictionary. /// </summary> public void NewEntry(CedictEntry entry, string note) { tr = conn.BeginTransaction(); string head, trg; entry.GetCedict(out head, out trg); // Check restrictions - can end up dropped entry checkRestrictions(entry.ChSimpl, trg); // Check for duplicate if (SqlDict.DoesHeadExist(head)) { throw new Exception("Headword already exists: " + head); } // Serialize, store in DB, index int binId = indexEntry(entry); // Populate entries table int entryId = storeEntry(entry.ChSimpl, head, trg, binId); // Record change cmdInsModifNew.Parameters["@timestamp"].Value = DateTime.UtcNow; cmdInsModifNew.Parameters["@user_id"].Value = userId; cmdInsModifNew.Parameters["@note"].Value = note; cmdInsModifNew.Parameters["@entry_id"].Value = entryId; cmdInsModifNew.ExecuteNonQuery(); int modifId = (int)cmdInsModifNew.LastInsertedId; // Also link from entry cmdUpdLastModif.Parameters["@entry_id"].Value = entryId; cmdUpdLastModif.Parameters["@last_modif_id"].Value = modifId; cmdUpdLastModif.ExecuteNonQuery(); // Commit. Otherwise, dispose will roll all this back if it finds non-null transaction. tr.Commit(); tr.Dispose(); tr = null; }
/// <summary> /// Returns true of display font covers all Hanzi in entry; false otherwise. /// </summary> private bool areHanziCovered(CedictEntry entry) { // Simplified and traditional headword if (!areHanziCovered(entry.ChSimpl, entry.ChTrad)) { return(false); } // Hanzi in hybrid text of senses for (int i = 0; i != entry.SenseCount; ++i) { CedictSense cs = entry.GetSenseAt(i); if (!areHanziCovered(cs.Domain)) { return(false); } if (!areHanziCovered(cs.Equiv)) { return(false); } if (!areHanziCovered(cs.Note)) { return(false); } } // We're good to go. return(true); }
/// <summary> /// Parse a single entry. Return null if rejected for whatever reason. /// </summary> /// <param name="line">Line to parse.</param> /// <param name="lineNum">Line number in input.</param> /// <param name="swLog">Stream to log warnings. Can be null.</param> /// <param name="swDrop">Stream to record dropped entries (failed to parse). Can be null.</param> public CedictEntry ParseEntry(string line, int lineNum, StreamWriter swLog) { // Cannot handle code points about 0xffff if (!surrogateCheck(line, swLog, lineNum)) { return(null); } // Sanitization and initial split string strHead, strBody; // Initial split: header vs body int firstSlash = line.IndexOf('/'); strHead = line.Substring(0, firstSlash - 1); strBody = line.Substring(firstSlash); // Parse entry. If failed > null. CedictEntry entry = null; try { entry = parseEntry(strHead, strBody, swLog, lineNum); } catch (Exception ex) { string msg = "Line {0}: ERROR: {1}: {2}"; msg = string.Format(msg, lineNum, ex.GetType().Name, ex.Message); if (swLog != null) { swLog.WriteLine(msg); } } return(entry); }
/// <summary> /// Displays the received results, discarding existing data. /// </summary> /// <param name="lookupId">ID of lookup whose results are shown. If ID is smaller than last seen value, we don't show results.</param> /// <param name="entryProvider">The entry provider; ownership passed by caller to me.</param> /// <param name="results">Cedict lookup results to show.</param> /// <param name="script">Defines which script(s) to show.</param> /// <returns>True if results got shown; false if they're discarded because newer results are already on display.</returns> public bool SetResults(int lookupId, ICedictEntryProvider entryProvider, ReadOnlyCollection <CedictResult> results, SearchScript script) { #if DEBUG // Make us crash at bottom if first result "柏林" (comes up for "bolin") if (results.Count > 0) { CedictEntry entry = entryProvider.GetEntry(results[0].EntryId); if (entry.ChSimpl == "柏林") { crashForTest = true; } else { crashForTest = false; } } #endif try { return(doSetResults(lookupId, entryProvider, results, script)); } finally { if (entryProvider != null) { entryProvider.Dispose(); } } }
public override void Process() { string simp = Req.Params["simp"]; if (simp == null) { throw new ApiException(400, "Missing 'simp' parameter."); } string trad = Req.Params["trad"]; if (trad == null) { throw new ApiException(400, "Missing 'trad' parameter."); } string pinyin = Req.Params["pinyin"]; if (pinyin == null) { throw new ApiException(400, "Missing 'pinyin' parameter."); } string trg = Req.Params["trg"]; if (trg == null) { throw new ApiException(400, "Missing 'trg' parameter."); } string note = Req.Params["note"]; if (note == null) { throw new ApiException(400, "Missing 'note' parameter."); } Result res = new Result { Success = true }; SqlDict.SimpleBuilder builder = null; try { builder = new SqlDict.SimpleBuilder(0); CedictEntry entry = SqlDict.BuildEntry(simp, trad, pinyin, trg); builder.NewEntry(entry, note); } catch (Exception ex) { DiagLogger.LogError(ex); res.Success = false; } finally { if (builder != null) { builder.Dispose(); } } // Tell our caller Res = res; }
/// <summary> /// Gets the entry formatted a single CEDICT plain text line. /// </summary> public static string GetCedict(CedictEntry entry) { StringBuilder sb = new StringBuilder(); sb.Append(entry.ChTrad); sb.Append(' '); sb.Append(entry.ChSimpl); sb.Append(" ["); sb.Append(GetPinyinCedict(entry.Pinyin)); sb.Append("] /"); foreach (var sense in entry.Senses) { string strDomain = HybridToCedict(sense.Domain); string strEquiv = HybridToCedict(sense.Equiv); string strNote = HybridToCedict(sense.Note); sb.Append(strDomain); if (strDomain != string.Empty && strDomain != "CL:") { if (strEquiv != string.Empty || strNote != string.Empty) { sb.Append(' '); } } sb.Append(strEquiv); if (strEquiv != string.Empty && strNote != string.Empty) { sb.Append(' '); } sb.Append(strNote); sb.Append('/'); } // Done. return(sb.ToString()); }
public static string Write(CedictEntry entry) { StringBuilder sb = new StringBuilder(); sb.Append(entry.ChTrad); sb.Append(' '); sb.Append(entry.ChSimpl); sb.Append(" ["); for (int i = 0; i != entry.PinyinCount; ++i) { if (i != 0) { sb.Append(' '); } sb.Append(entry.GetPinyinAt(i).GetDisplayString(false)); } sb.Append("] /"); for (int i = 0; i != entry.SenseCount; ++i) { string sense = entry.GetSenseAt(i).GetPlainText(); if (sense.Contains('/')) { sense = sense.Replace('/', '\\'); } sb.Append(sense); sb.Append('/'); } return(sb.ToString()); }
private void retrieveBatch(List <int> batch, byte[] buf, List <CedictEntry> entries, MySqlCommand cmdSelBinary10) { entries.Clear(); cmdSelBinary10.Parameters["@id0"].Value = batch.Count > 0 ? batch[0] : -1; cmdSelBinary10.Parameters["@id1"].Value = batch.Count > 1 ? batch[1] : -1; cmdSelBinary10.Parameters["@id2"].Value = batch.Count > 2 ? batch[2] : -1; cmdSelBinary10.Parameters["@id3"].Value = batch.Count > 3 ? batch[3] : -1; cmdSelBinary10.Parameters["@id4"].Value = batch.Count > 4 ? batch[4] : -1; cmdSelBinary10.Parameters["@id5"].Value = batch.Count > 5 ? batch[5] : -1; cmdSelBinary10.Parameters["@id6"].Value = batch.Count > 6 ? batch[6] : -1; cmdSelBinary10.Parameters["@id7"].Value = batch.Count > 7 ? batch[7] : -1; cmdSelBinary10.Parameters["@id8"].Value = batch.Count > 8 ? batch[8] : -1; cmdSelBinary10.Parameters["@id9"].Value = batch.Count > 9 ? batch[9] : -1; for (int i = 0; i != batch.Count; ++i) { entries.Add(null); } using (MySqlDataReader rdr = cmdSelBinary10.ExecuteReader()) { while (rdr.Read()) { int len = (int)rdr.GetBytes(0, 0, buf, 0, buf.Length); int entryId = rdr.GetInt32(1); using (BinReader br = new BinReader(buf, len)) { CedictEntry entry = new CedictEntry(br); int ix = batch.IndexOf(entryId); entries[ix] = entry; } } } }
/// <summary> /// Parse a single entry. Return null if rejected for whatever reason. /// </summary> /// <param name="line">Line to parse.</param> /// <param name="lineNum">Line number in input.</param> /// <param name="swLog">Stream to log warnings. Can be null.</param> /// <param name="swDrop">Stream to record dropped entries (failed to parse). Can be null.</param> public static CedictEntry ParseEntry(string line, int lineNum, StreamWriter swLog, StreamWriter swDrop) { // Empty lines if (line.Trim() == "" || line.StartsWith("#")) { return(null); } // Cannot handle code points about 0xffff if (!surrogateCheck(line, swLog, lineNum)) { return(null); } // Sanitization and initial split string strHead, strBody; sanitizeAndSplit(line, out strHead, out strBody); // Parse entry. If failed > null. CedictEntry entry = null; try { entry = parseEntry(strHead, strBody, swLog, lineNum); } catch { if (swDrop != null) { swDrop.WriteLine(line); } } return(entry); }
public static void Write(CedictEntry entry, out string head, out string trg) { StringBuilder sbHead = new StringBuilder(); sbHead.Append(entry.ChTrad); sbHead.Append(' '); sbHead.Append(entry.ChSimpl); sbHead.Append(" ["); for (int i = 0; i != entry.PinyinCount; ++i) { if (i != 0) { sbHead.Append(' '); } sbHead.Append(entry.GetPinyinAt(i).GetDisplayString(false)); } sbHead.Append("]"); head = sbHead.ToString(); StringBuilder sbTrg = new StringBuilder(); sbTrg.Append('/'); for (int i = 0; i != entry.SenseCount; ++i) { string sense = entry.GetSenseAt(i).GetPlainText(); if (sense.Contains('/')) { sense = sense.Replace('/', '\\'); } sbTrg.Append(sense); sbTrg.Append('/'); } trg = sbTrg.ToString(); }
private void verifyHanzi(CedictEntry entry, int entryId, string query, List <CedictResult> res) { if (entry == null) { return; } // Figure out position/length of query string in simplified and traditional headwords int hiliteStart = -1; int hiliteLength = 0; hiliteStart = entry.ChSimpl.IndexOf(query); if (hiliteStart != -1) { hiliteLength = query.Length; } // If not found in simplified, check in traditional if (hiliteLength == 0) { hiliteStart = entry.ChTrad.IndexOf(query); if (hiliteStart != -1) { hiliteLength = query.Length; } } // Entry is a keeper if either source or target headword contains query if (hiliteLength != 0) { CedictResult cr = new CedictResult(CedictResult.SimpTradWarning.None, entry, entry.HanziPinyinMap, hiliteStart, hiliteLength); res.Add(cr); } }
/// <summary> /// Render HW's Hanzi in annotation mode /// </summary> private void renderHanzi(string query, CedictEntry entry, int annStart, int annLength, HtmlTextWriter writer) { for (int i = 0; i != query.Length; ++i) { char c = query[i]; PinyinSyllable py = null; if (i >= annStart && i < annStart + annLength) { int pyIx = entry.HanziPinyinMap[i - annStart]; if (pyIx != -1) { py = entry.Pinyin[pyIx]; } } // Class to put on hanzi string cls = ""; // We mark up tones if needed if (tones != UiTones.None && py != null) { if (py.Tone == 1) { cls = "tone1"; } else if (py.Tone == 2) { cls = "tone2"; } else if (py.Tone == 3) { cls = "tone3"; } else if (py.Tone == 4) { cls = "tone4"; } // -1 for unknown, and 0 for neutral: we don't mark up anything } // Whatever's outside annotation is faint if (i < annStart || i >= annStart + annLength) { cls += " faint"; } // Mark up character for stroke order animation if (hanim) { cls += " hanim"; } // Render with enclosing span if we have a relevant class if (!string.IsNullOrEmpty(cls)) { writer.AddAttribute("class", cls); writer.RenderBeginTag(HtmlTextWriterTag.Span); } writer.WriteEncodedText(c.ToString()); if (!string.IsNullOrEmpty(cls)) { writer.RenderEndTag(); } } }
private void fileHead(CedictEntry entry) { string headStr = entry.ChTrad + " " + entry.ChSimpl + " ["; bool first = true; string py = ""; foreach (var ps in entry.Pinyin) { if (!first) { py += " "; } first = false; py += ps.GetDisplayString(false); } headStr += py + "]"; string headLo = headStr.ToLowerInvariant(); if (!heads.ContainsKey(headLo)) { heads[headLo] = new List <string>(); heads[headLo].Add(headStr); } else { heads[headLo].Add(headStr); } }
/// <summary> /// Add data from one entry to stats. /// </summary> public void CalculateEntryStats(CedictEntry entry) { // Get tokens from entry HashSet <string> tokens = new HashSet <string>(); foreach (CedictSense sense in entry.Senses) { foreach (TextRun tr in sense.Equiv.Runs) { if (tr is TextRunZho) { continue; } getTokens(tr as TextRunLatin, tokens); } } // Increase counts of tokens foreach (string token in tokens) { if (tokenCounts.ContainsKey(token)) { ++tokenCounts[token]; } else { tokenCounts[token] = 1; } } }
public async Task ReadAsync_ShouldSkipComments() { string s = "# This is a comment\n" + "再見 再见 [zai4 jian4] /goodbye/see you again later/"; byte[] b = Encoding.UTF8.GetBytes(s); var stream = new MemoryStream(b); using (var reader = new StreamReader(stream)) { var parser = new CedictParser(reader); CedictEntry entry = await parser.ReadAsync(); Assert.IsNotNull(entry); Assert.AreEqual("再見", entry.Traditional); Assert.AreEqual("再见", entry.Simplified); Assert.AreEqual("zai4 jian4", entry.Pinyin); Assert.IsNotNull(entry.Definitions); Assert.AreEqual(2, entry.Definitions.Length); Assert.AreEqual("goodbye", entry.Definitions[0]); Assert.AreEqual("see you again later", entry.Definitions[1]); } }
/// <summary> /// Add Pinyin to DB's index/instance tables. /// </summary> private void indexPinyin(CedictEntry entry, int entryId) { // Count only one occurrence List <PinyinSyllable> uniqueList = new List <PinyinSyllable>(); foreach (PinyinSyllable ps in entry.Pinyin) { // Normalize to lower case PinyinSyllable normps = new PinyinSyllable(ps.Text.ToLowerInvariant(), ps.Tone); // Add one instance bool onList = false; foreach (PinyinSyllable x in uniqueList) { if (x.Text == normps.Text && x.Tone == normps.Tone) { onList = true; break; } } if (!onList) { uniqueList.Add(normps); } } // Index each item we have on unique list cmdInsPinyinInstance.Parameters["@syll_count"].Value = uniqueList.Count; cmdInsPinyinInstance.Parameters["@blob_id"].Value = entryId; foreach (PinyinSyllable ps in uniqueList) { int hash = CedictEntry.Hash(ps.Text); cmdInsPinyinInstance.Parameters["@pinyin_hash"].Value = hash; cmdInsPinyinInstance.Parameters["@tone"].Value = ps.Tone; cmdInsPinyinInstance.ExecuteNonQuery(); } }
/// <summary> /// Render HW's Hanzi in annotation mode /// </summary> private void renderHanzi(string query, CedictEntry entry, int annStart, int annLength, StringBuilder sb) { for (int i = 0; i != query.Length; ++i) { char c = query[i]; PinyinSyllable py = null; if (i >= annStart && i < annStart + annLength) { int pyIx = entry.HanziPinyinMap[i - annStart]; if (pyIx != -1) { py = entry.Pinyin[pyIx]; } } // Class to put on hanzi string cls = ""; // We mark up tones if needed if (tones != UiTones.None && py != null) { if (py.Tone == 1) { cls = "tone1"; } else if (py.Tone == 2) { cls = "tone2"; } else if (py.Tone == 3) { cls = "tone3"; } else if (py.Tone == 4) { cls = "tone4"; } // -1 for unknown, and 0 for neutral: we don't mark up anything } // Whatever's outside annotation is faint if (i < annStart || i >= annStart + annLength) { cls += " faint"; } // Mark up character for stroke order animation if (hanim) { cls += " hanim"; } // Render with enclosing span if we have a relevant class if (!string.IsNullOrEmpty(cls)) { sb.Append("<span class='" + cls + "'>"); } sb.Append(HtmlEncoder.Default.Encode(c.ToString())); if (!string.IsNullOrEmpty(cls)) { sb.Append("</span>"); } } }
/// <summary> /// Render HW's Hanzi in normal lookup result /// </summary> private void renderHanzi(CedictEntry entry, bool simp, bool faintIdentTrad, HtmlTextWriter writer) { string hzStr = simp ? entry.ChSimpl : entry.ChTrad; for (int i = 0; i != hzStr.Length; ++i) { char c = hzStr[i]; int pyIx = entry.HanziPinyinMap[i]; PinyinSyllable py = null; if (pyIx != -1) { py = entry.Pinyin[pyIx]; } // Class to put on hanzi string cls = ""; // We mark up tones if needed if (tones != UiTones.None && py != null) { if (py.Tone == 1) { cls = "tone1"; } else if (py.Tone == 2) { cls = "tone2"; } else if (py.Tone == 3) { cls = "tone3"; } else if (py.Tone == 4) { cls = "tone4"; } // -1 for unknown, and 0 for neutral: we don't mark up anything } // If we're rendering both scripts, then show faint traditional chars where same as simp if (faintIdentTrad && c == entry.ChSimpl[i]) { cls += " faint"; } // Mark up character for stroke order animation if (hanim) { cls += " hanim"; } // Render with enclosing span if we have a relevant class if (!string.IsNullOrEmpty(cls)) { writer.AddAttribute("class", cls); writer.RenderBeginTag(HtmlTextWriterTag.Span); } writer.WriteEncodedText(c.ToString()); if (!string.IsNullOrEmpty(cls)) { writer.RenderEndTag(); } } }
/// <summary> /// Ctor: dictionary entry in change history. /// </summary> public EntryRenderer(CedictEntry entry) { this.entryToRender = entry; this.script = UiScript.Both; this.tones = UiTones.None; this.hanim = false; this.dimIdenticalTrad = false; }
/// <summary> /// Render HW's Hanzi in normal lookup result /// </summary> private void renderHanzi(CedictEntry entry, bool simp, bool faintIdentTrad, StringBuilder sb) { string hzStr = simp ? entry.ChSimpl : entry.ChTrad; for (int i = 0; i != hzStr.Length; ++i) { char c = hzStr[i]; int pyIx = entry.HanziPinyinMap[i]; PinyinSyllable py = null; if (pyIx != -1) { py = entry.Pinyin[pyIx]; } // Class to put on hanzi string cls = ""; // We mark up tones if needed if (tones != UiTones.None && py != null) { if (py.Tone == 1) { cls = "tone1"; } else if (py.Tone == 2) { cls = "tone2"; } else if (py.Tone == 3) { cls = "tone3"; } else if (py.Tone == 4) { cls = "tone4"; } // -1 for unknown, and 0 for neutral: we don't mark up anything } // If we're rendering both scripts, then show faint traditional chars where same as simp if (faintIdentTrad && c == entry.ChSimpl[i]) { cls += " faint"; } // Mark up character for stroke order animation if (hanim) { cls += " hanim"; } // Render with enclosing span if we have a relevant class if (!string.IsNullOrEmpty(cls)) { sb.Append("<span class='" + cls + "'>"); } sb.Append(HtmlEncoder.Default.Encode(c.ToString())); if (!string.IsNullOrEmpty(cls)) { sb.Append("</span>"); } } }
/// <summary> /// Add Hanzi to DB's index/instance tables. /// </summary> private void indexHanzi(CedictEntry entry, int entryId) { // Distinct Hanzi in simplified and traditional HW HashSet <char> simpSet = new HashSet <char>(); foreach (char c in entry.ChSimpl) { simpSet.Add(c); } int simpCount = simpSet.Count; HashSet <char> tradSet = new HashSet <char>(); foreach (char c in entry.ChTrad) { tradSet.Add(c); } int tradCount = tradSet.Count; // Extract intersection HashSet <char> cmnSet = new HashSet <char>(); List <char> toRem = new List <char>(); foreach (char c in simpSet) { if (tradSet.Contains(c)) { cmnSet.Add(c); toRem.Add(c); } } foreach (char c in toRem) { simpSet.Remove(c); tradSet.Remove(c); } // Index each Hanzi cmdInsHanziInstance.Parameters["@simp_count"].Value = simpCount; cmdInsHanziInstance.Parameters["@trad_count"].Value = tradCount; cmdInsHanziInstance.Parameters["@blob_id"].Value = entryId; foreach (char c in simpSet) { cmdInsHanziInstance.Parameters["@hanzi"].Value = (int)c; cmdInsHanziInstance.Parameters["@simptrad"].Value = (byte)1; cmdInsHanziInstance.ExecuteNonQuery(); } foreach (char c in tradSet) { cmdInsHanziInstance.Parameters["@hanzi"].Value = (int)c; cmdInsHanziInstance.Parameters["@simptrad"].Value = (byte)2; cmdInsHanziInstance.ExecuteNonQuery(); } foreach (char c in cmnSet) { cmdInsHanziInstance.Parameters["@hanzi"].Value = (int)c; cmdInsHanziInstance.Parameters["@simptrad"].Value = (byte)3; cmdInsHanziInstance.ExecuteNonQuery(); } }
public IActionResult SaveFullEntry([FromForm] string entryId, [FromForm] string hw, [FromForm] string trg, [FromForm] string note, [FromForm] string lang) { if (entryId == null || hw == null || trg == null || note == null || lang == null) { return(StatusCode(400, "Missing parameter(s).")); } // Must be authenticated user int userId; string userName; auth.CheckSession(HttpContext.Request.Headers, out userId, out userName); if (userId < 0) { return(StatusCode(401, "Request must contain authentication token.")); } EditEntryResult res = new EditEntryResult(); int idVal = EntryId.StringToId(entryId); trg = trg.Replace("\r\n", "\n"); trg = trg.Replace('/', '\\'); trg = trg.Replace('\n', '/'); trg = "/" + trg + "/"; CedictParser parser = new CedictParser(); CedictEntry entry = null; try { entry = parser.ParseEntry(hw + " " + trg, 0, null); } catch { } if (entry == null) { res.Error = TextProvider.Instance.GetString(lang, "editEntry.badDataOnSave"); return(new ObjectResult(res)); } bool persisted; using (SqlDict.SimpleBuilder builder = dict.GetSimpleBuilder(userId)) { persisted = builder.ChangeHeadAndTarget(userId, idVal, hw, trg, note); } // Not persisted: violates uniqueness constraint if (!persisted) { res.Error = TextProvider.Instance.GetString(lang, "editEntry.duplicateOnSave"); return(new ObjectResult(res)); } // Refresh cached contrib score auth.RefreshUserInfo(userId); // Tell our caller we dun it res.Success = true; return(new ObjectResult(res)); }
/// <summary> /// Retrieves pinyin lookup candidates, verifies actual presence of search expression in headword. /// </summary> List <ResWithEntry> doLoadVerifyPinyin(BinReader br, IEnumerable <int> poss, List <PinyinSyllable> sylls) { List <ResWithEntry> resList = new List <ResWithEntry>(); // Yes, we only open our file on-demand // But we do this within each lookup's scope, so lookup stays thread-safe // Look at each entry: load, verify, keep or drop foreach (int pos in poss) { // Load up entry from file br.Position = pos; CedictEntry entry = new CedictEntry(br); // Find query syllables in entry int syllStart = -1; for (int i = 0; i <= entry.PinyinCount - sylls.Count; ++i) { int j; for (j = 0; j != sylls.Count; ++j) { PinyinSyllable syllEntry = entry.GetPinyinAt(i + j); PinyinSyllable syllQuery = sylls[j]; if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text) { break; } if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone) { break; } } if (j == sylls.Count) { syllStart = i; break; } } // Entry is a keeper if query syllables found if (syllStart == -1) { continue; } // Drop if there's any unprintable Hanzi if (!areHanziCovered(entry)) { continue; } // Keeper! CedictResult res = new CedictResult(pos, entry.HanziPinyinMap, syllStart, sylls.Count); ResWithEntry resWE = new ResWithEntry(res, entry); resList.Add(resWE); } return(resList); }
/// <summary> /// Ctor: dictionary entry in change history. /// </summary> public EntryRenderer(string lang, CedictEntry entry, bool dimIdenticalTrad, string extraEntryClass = "") { this.lang = lang; this.entryToRender = entry; this.script = UiScript.Both; this.tones = UiTones.None; this.hanim = false; this.dimIdenticalTrad = dimIdenticalTrad; this.extraEntryClass = extraEntryClass; }
/// <summary> /// See <see cref="IHeadwordInfo.GetEntries"/>. /// </summary> public void GetEntries(string simp, out CedictEntry[] ced, out CedictEntry[] hdd) { List <CedictEntry> cedList = new List <CedictEntry>(); List <CedictEntry> hddList = new List <CedictEntry>(); int hash = CedictEntry.Hash(simp); // Do we have this hash? HashChainPointer hcp = new HashChainPointer(hash); int pos = Array.BinarySearch(hashPtrs, hcp, new HashComp()); using (BinReader br = new BinReader(dataFileName)) { // CEDICT entries if (pos >= 0 && hashPtrs[pos].CedictPos != 0) { int binPos = hashPtrs[pos].CedictPos; while (binPos != 0) { br.Position = binPos; // Next in chain binPos = br.ReadInt(); // Entry CedictEntry entry = new CedictEntry(br); // Only keep if simplified really is identical // Could be a hash collision if (entry.ChSimpl == simp) { cedList.Add(entry); } } } // HanDeDict entries if (pos >= 0 && hashPtrs[pos].HanDeDictPos != 0) { int binPos = hashPtrs[pos].HanDeDictPos; while (binPos != 0) { br.Position = binPos; // Next in chain binPos = br.ReadInt(); // Entry CedictEntry entry = new CedictEntry(br); // Only keep if simplified really is identical // Could be a hash collision if (entry.ChSimpl == simp) { hddList.Add(entry); } } } } // Our results ced = cedList.ToArray(); hdd = hddList.ToArray(); }
protected int storeEntry(string simp, string head, string trg, int binId) { cmdInsEntry.Parameters["@hw"].Value = head; cmdInsEntry.Parameters["@trg"].Value = trg; cmdInsEntry.Parameters["@simp_hash"].Value = CedictEntry.Hash(simp); cmdInsEntry.Parameters["@status"].Value = 0; cmdInsEntry.Parameters["@deleted"].Value = 0; cmdInsEntry.Parameters["@bin_id"].Value = binId; cmdInsEntry.ExecuteNonQuery(); return((int)cmdInsEntry.LastInsertedId); }
private void dictLine(string line, bool cedict, BinWriter bw) { if (line == "" || line.StartsWith("#")) { return; } // Parse entry CedictEntry entry = parser.ParseEntry(line, 0, null); // Verify that simp, trad and pinyin are equal length if (entry != null) { if (entry.ChSimpl.Length != entry.ChTrad.Length || entry.ChSimpl.Length != entry.PinyinCount) { entry = null; } } // Just count if failed to parse if (entry == null) { if (cedict) { ++cedictDropped; } else { ++hddDropped; } return; } // Serialize int fpos = bw.Position; // First: hash chain: next entry in file with same hash. Will fill later. bw.WriteInt(0); // Then, entry itself entry.Serialize(bw); // Hash simplified and remember file position int hash = CedictEntry.Hash(entry.ChSimpl); List <int> poss; Dictionary <int, List <int> > hashPoss = cedict ? cedictHashPoss : hddHashPoss; if (!hashPoss.ContainsKey(hash)) { poss = new List <int>(); hashPoss[hash] = poss; } else { poss = hashPoss[hash]; } poss.Add(fpos); }
/// <summary> /// Import new entries from file as a single bulk change. /// </summary> public void BulkAdd(string dictPath, string workingFolder) { CedictParser parser = new CedictParser(); Startup.InitDB(config, null, false); SqlDict dict = new SqlDict(null, mut); int lineNum = 0; DateTime dt = DateTime.Now; string fnLog = "importlog-" + dt.Year + "-" + dt.Month.ToString("00") + "-" + dt.Day.ToString("00") + "!" + dt.Hour.ToString("00") + "-" + dt.Minute.ToString("00") + "-" + dt.Second.ToString("00") + ".txt"; fnLog = Path.Combine(workingFolder, fnLog); using (FileStream fs = new FileStream(dictPath, FileMode.Open, FileAccess.Read)) using (StreamReader sr = new StreamReader(fs)) using (FileStream fsLog = new FileStream(fnLog, FileMode.Create, FileAccess.ReadWrite)) using (StreamWriter swLog = new StreamWriter(fsLog)) { // First two lines are commented and have metainfo // First line: user name // Second line: bulk change's comment string user = sr.ReadLine().Substring(1).Trim(); string note = sr.ReadLine().Substring(1).Trim(); lineNum = 2; using (SqlDict.ImportBuilder builder = dict.GetBulkBuilder(user, note)) { string line; while ((line = sr.ReadLine()) != null) { ++lineNum; if (line == "" || line.StartsWith("#")) { continue; } CedictEntry entry = parser.ParseEntry(line, lineNum, swLog); if (entry == null) { swLog.WriteLine(line); continue; } entry.Status = EntryStatus.Approved; bool ok = builder.AddNewEntry(entry); if (!ok) { swLog.WriteLine("Line " + lineNum + ": Entry rejected by importer."); swLog.WriteLine(line); continue; } } builder.CommitRest(); } } }
/// <summary> /// Checks if entry contains "(u.E.)" /// </summary> private static bool isVerified(CedictEntry entry) { bool isUE = false; foreach (var sense in entry.Senses) { if (sense.GetPlainText().Contains("(u.E.)")) { isUE = true; break; } } return(!isUE); }
public void XRenderHanzi(StringBuilder sb, string extraSimpClass = "", string extraTradClass = "") { CedictEntry entry = entryToRender; if (entry == null) { entry = res.Entry; } if (script != UiScript.Trad) { sb.Append("<span class='hw-simp " + extraSimpClass + "' lang='zh-CN'>"); // <span class="hw-simp"> renderHanzi(entry, true, false, sb); sb.Append("</span>"); // <span class="hw-simp"> } if (script == UiScript.Both) { // Up to N hanzi: on a single line if (entry.ChSimpl.Length <= OneLineHanziLimit) { string clsSep = "hw-sep"; if (tones != UiTones.None) { clsSep = "hw-sep faint"; } sb.Append("<span class='" + clsSep + "'>"); // <span class="hw-sep"> sb.Append("•"); sb.Append("</span>"); // <span class="hw-sep"> } // Otherwise, line break else { sb.Append("<bs/>"); } } if (script != UiScript.Simp) { string clsTrad = "hw-trad"; // Need special class so traditional floats left after line break if (script == UiScript.Both && entry.ChSimpl.Length > OneLineHanziLimit) { clsTrad = "hw-trad break"; } if (extraTradClass != "") { clsTrad += " " + extraTradClass; } sb.Append("<span class='" + clsTrad + "' lang='zh-TW'>"); // <span class="hw-trad"> renderHanzi(entry, false, dimIdenticalTrad && script == UiScript.Both, sb); sb.Append("</span>"); // <span class="hw-trad"> } }