/// <summary> /// Gets the entry formatted a single CEDICT plain text line. /// </summary> public static string GetCedict(CedictEntry entry) { StringBuilder sb = new StringBuilder(); sb.Append(entry.ChTrad); sb.Append(' '); sb.Append(entry.ChSimpl); sb.Append(" ["); sb.Append(GetPinyinCedict(entry.Pinyin)); sb.Append("] /"); foreach (var sense in entry.Senses) { string strDomain = HybridToCedict(sense.Domain); string strEquiv = HybridToCedict(sense.Equiv); string strNote = HybridToCedict(sense.Note); sb.Append(strDomain); if (strDomain != string.Empty && strDomain != "CL:") if (strEquiv != string.Empty || strNote != string.Empty) sb.Append(' '); sb.Append(strEquiv); if (strEquiv != string.Empty && strNote != string.Empty) sb.Append(' '); sb.Append(strNote); sb.Append('/'); } // Done. return sb.ToString(); }
/// <summary> /// Ctor: init immutable instance. /// </summary> public CedictAnnotation(int entryId, CedictEntry entry, SearchScript script, int start, int length) { EntryId = entryId; Entry = entry; Script = script; StartInQuery = start; LengthInQuery = length; }
/// <summary> /// Ctor: init immutable instance - result of pinyin lookup. /// </summary> public CedictResult(CedictEntry entry, ReadOnlyCollection <short> hanziPinyinMap, int pinyinHiliteStart, int pinyinHiliteLength) { targetHilites = new CedictTargetHighlight[0]; HanziWarning = SimpTradWarning.None; Entry = entry; PinyinHiliteStart = pinyinHiliteStart; PinyinHiliteLength = pinyinHiliteLength; calculateHanziHighlights(hanziPinyinMap, out HanziHiliteStart, out HanziHiliteLength); }
/// <summary> /// Ctor: init immutable instance - result of hanzi lookup. /// </summary> public CedictResult(SimpTradWarning hanziWarning, CedictEntry entry, ReadOnlyCollection <short> hanziPinyinMap, int hanziHiliteStart, int hanziHiliteLength) { targetHilites = new CedictTargetHighlight[0]; HanziWarning = hanziWarning; Entry = entry; HanziHiliteStart = hanziHiliteStart; HanziHiliteLength = hanziHiliteLength; calculatePinyinHighlights(hanziPinyinMap, out PinyinHiliteStart, out PinyinHiliteLength); }
/// <summary> /// Compares headword's pinyin to other headword to get lexicographical ordering. /// </summary> public int PinyinCompare(CedictEntry other) { int length = Math.Min(pinyin.Length, other.pinyin.Length); // Compare syllable by syllable for (int i = 0; i != length; ++i) { int cmp = pinyin[i].CompareTo(other.pinyin[i]); if (cmp != 0) { return(cmp); } } // If shorter is the prefix of longer, or the two are identical: shorter wins return(pinyin.Length.CompareTo(other.pinyin.Length)); }
/// <summary> /// Add data from one entry to stats. /// </summary> public void CalculateEntryStats(CedictEntry entry) { // Get tokens from entry HashSet<string> tokens = new HashSet<string>(); foreach (CedictSense sense in entry.Senses) { foreach (TextRun tr in sense.Equiv.Runs) { if (tr is TextRunZho) continue; getTokens(tr as TextRunLatin, tokens); } } // Increase counts of tokens foreach (string token in tokens) { if (tokenCounts.ContainsKey(token)) ++tokenCounts[token]; else tokenCounts[token] = 1; } }
/// <summary> /// Ctor: init immutable instance - result of target lookup. /// </summary> public CedictResult(CedictEntry entry, ReadOnlyCollection <CedictTargetHighlight> targetHilites) { if (targetHilites == null) { throw new ArgumentNullException("targetHilites"); } this.targetHilites = new CedictTargetHighlight[targetHilites.Count]; for (int i = 0; i != targetHilites.Count; ++i) { if (targetHilites[i] == null) { throw new ArgumentException("Null element in highlights array."); } this.targetHilites[i] = targetHilites[i]; } HanziWarning = SimpTradWarning.None; Entry = entry; HanziHiliteStart = -1; HanziHiliteLength = 0; PinyinHiliteStart = -1; PinyinHiliteLength = 0; }
/// <summary> /// Ctor: takes data to display. /// </summary> /// <param name="owner">Zen control that owns me.</param> /// <param name="tprov">Localized display text provider.</param> /// <param name="lookupThroughLink">Delegate to call when user initiates lookup by clicking on a link.</param> /// <param name="getEntry">Delegate to call when an entry must be retrieved (for "copy" context menu).</param> /// <param name="entryProvider">Dictionary entry provider.</param> /// <param name="cr">The lookup result this control will show.</param> /// <param name="maxHeadLength">Longest headword in full results list.</param> /// <param name="script">Scripts to show in headword.</param> /// <param name="odd">Odd/even position in list, for alternating BG color.</param> public OneResultControl(ZenControlBase owner, float scale, ITextProvider tprov, LookupThroughLinkDelegate lookupThroughLink, ParentPaintDelegate parentPaint, GetEntryDelegate getEntry, ICedictEntryProvider entryProvider, CedictResult cr, SearchScript script, bool last) : base(owner) { this.scale = scale; this.tprov = tprov; this.lookupThroughLink = lookupThroughLink; this.parentPaint = parentPaint; this.getEntry = getEntry; this.entry = entryProvider.GetEntry(cr.EntryId); this.res = cr; this.analyzedScript = script; this.last = last; padLeft = (int)(5.0F * scale); padTop = (int)(4.0F * scale); padBottom = (int)(8.0F * scale); padMid = (int)(20.0F * scale); padRight = (int)(10.0F * scale); }
/// <summary> /// Gets the entry formatted in HTML. /// </summary> public static string GetHtml(ITextProvider tprov, CedictEntry entry, SearchScript script) { StringBuilder bodyHtml = new StringBuilder(); // Are we showing one or two Hanzi headwords? string hanzi1 = script == SearchScript.Traditional ? entry.ChTrad : entry.ChSimpl; string hanzi2 = null; if (script == SearchScript.Both && entry.ChSimpl != entry.ChTrad) hanzi2 = entry.ChTrad; // Find simplest possible template, work with that // Only one hanzi, no longer than 2 chars, only one sense bool mustDoSenses = true; if (hanzi2 == null && hanzi1.Length <= 2 && entry.SenseCount == 1) { mustDoSenses = false; bodyHtml.Append(template1); bodyHtml.Replace("{hanzi}", escape(hanzi1)); bodyHtml.Replace("{pinyin}", escape(GetPinyinString(entry.GetPinyinForDisplay(true)))); bodyHtml.Replace("{sense}", getSenseHtmlPure(tprov, entry.GetSenseAt(0), script)); } // Only one script, no more than 6 chars else if (hanzi2 == null && hanzi1.Length <= 6) { bodyHtml.Append(template2); bodyHtml.Replace("{hanzi}", escape(hanzi1)); bodyHtml.Replace("{pinyin}", escape(GetPinyinString(entry.GetPinyinForDisplay(true)))); } // Only one script else if (hanzi2 == null) { bodyHtml.Append(template3); bodyHtml.Replace("{hanzi}", escape(hanzi1)); bodyHtml.Replace("{pinyin}", escape(GetPinyinString(entry.GetPinyinForDisplay(true)))); } // Everything else: very full-fledged entry else { bodyHtml.Append(template4); bodyHtml.Replace("{hanzi1}", escape(hanzi1)); bodyHtml.Replace("{hanzi2}", escape(hanzi2)); bodyHtml.Replace("{pinyin}", escape(GetPinyinString(entry.GetPinyinForDisplay(true)))); } // In all but the first, simplest case, dealing with senses is the same if (mustDoSenses) { StringBuilder sbSenses = new StringBuilder(); foreach (CedictSense sense in entry.Senses) { string senseHtml = ""; if (!sense.Domain.EqualsPlainText("CL:")) { senseHtml += templateDiamond; senseHtml += " "; } senseHtml += getSenseHtmlPure(tprov, sense, script); senseHtml = templateSense.Replace("{sense}", senseHtml); sbSenses.Append(senseHtml); } bodyHtml.Replace("{senses}", sbSenses.ToString()); } // Assemble the whole HTML StringBuilder sb = new StringBuilder(); sb.Append(templateOuter); sb.Replace("{body}", bodyHtml.ToString()); // Purge new lines and tabs: this avoids extra spaces e.g. when pasting into Word sb.Replace("\r\n", ""); sb.Replace("\t", ""); // Done return sb.ToString(); }
/// <summary> /// Compares headword's pinyin to other headword to get lexicographical ordering. /// </summary> public int PinyinCompare(CedictEntry other) { int length = Math.Min(pinyin.Length, other.pinyin.Length); // Compare syllable by syllable for (int i = 0; i != length; ++i) { int cmp = pinyin[i].CompareTo(other.pinyin[i]); if (cmp != 0) return cmp; } // If shorter is the prefix of longer, or the two are identical: shorter wins return pinyin.Length.CompareTo(other.pinyin.Length); }
/// <summary> /// Indexes one parsed Cedict entry (hanzi, pinyin and target-language indexes). /// </summary> private void indexEntry(CedictEntry entry, int id) { // Index character of simplified headword foreach (char c in entry.ChSimpl) { IdeoIndexItem ii; if (index.IdeoIndex.ContainsKey(c)) ii = index.IdeoIndex[c]; else { ii = new IdeoIndexItem(); index.IdeoIndex[c] = ii; } // Avoid indexing same entry twice if a char occurs multiple times if (ii.EntriesHeadwordSimp.Count == 0 || ii.EntriesHeadwordSimp[ii.EntriesHeadwordSimp.Count - 1] != id) ii.EntriesHeadwordSimp.Add(id); } // Index characters of traditional headword foreach (char c in entry.ChTrad) { IdeoIndexItem ii; if (index.IdeoIndex.ContainsKey(c)) ii = index.IdeoIndex[c]; else { ii = new IdeoIndexItem(); index.IdeoIndex[c] = ii; } // Avoid indexing same entry twice if a char occurs multiple times if (ii.EntriesHeadwordTrad.Count == 0 || ii.EntriesHeadwordTrad[ii.EntriesHeadwordTrad.Count - 1] != id) ii.EntriesHeadwordTrad.Add(id); } // Index pinyin syllables foreach (PinyinSyllable pys in entry.Pinyin) { PinyinIndexItem pi; // Index contains lower-case syllables string textLo = pys.Text.ToLowerInvariant(); if (index.PinyinIndex.ContainsKey(textLo)) pi = index.PinyinIndex[textLo]; else { pi = new PinyinIndexItem(); index.PinyinIndex[textLo] = pi; } // Figure out which list in index item - by tone List<int> entryList; if (pys.Tone == -1) entryList = pi.EntriesNT; else if (pys.Tone == 0) entryList = pi.Entries0; else if (pys.Tone == 1) entryList = pi.Entries1; else if (pys.Tone == 2) entryList = pi.Entries2; else if (pys.Tone == 3) entryList = pi.Entries3; else if (pys.Tone == 4) entryList = pi.Entries4; else throw new Exception("Invalid tone: " + pys.Tone.ToString()); // Avoid indexing same entry twice if a syllable occurs multiple times if (entryList.Count == 0 || entryList[entryList.Count - 1] != id) entryList.Add(id); } // Index equiv of each sense int senseIx = -1; foreach (CedictSense sense in entry.Senses) { ++senseIx; // Empty equiv: nothing to index if (sense.Equiv.IsEmpty) continue; // Tokenize ReadOnlyCollection<EquivToken> tokens = tokenizer.Tokenize(sense.Equiv); // Index sense indexSense(tokens, id, senseIx); } }
public ResWithEntry(CedictResult res, CedictEntry entry) { Res = res; Entry = entry; }
/// <summary> /// Retrieves pinyin lookup candidates, verifies actual presence of search expression in headword. /// </summary> List<ResWithEntry> doLoadVerifyPinyin(BinReader br, IEnumerable<int> poss, List<PinyinSyllable> sylls) { List<ResWithEntry> resList = new List<ResWithEntry>(); // Yes, we only open our file on-demand // But we do this within each lookup's scope, so lookup stays thread-safe // Look at each entry: load, verify, keep or drop foreach (int pos in poss) { // Load up entry from file br.Position = pos; CedictEntry entry = new CedictEntry(br); // Find query syllables in entry int syllStart = -1; for (int i = 0; i <= entry.PinyinCount - sylls.Count; ++i) { int j; for (j = 0; j != sylls.Count; ++j) { PinyinSyllable syllEntry = entry.GetPinyinAt(i + j); PinyinSyllable syllQuery = sylls[j]; if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text) break; if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone) break; } if (j == sylls.Count) { syllStart = i; break; } } // Entry is a keeper if query syllables found if (syllStart == -1) continue; // Drop if there's any unprintable Hanzi if (!areHanziCovered(entry)) continue; // Keeper! CedictResult res = new CedictResult(pos, entry.HanziPinyinMap, syllStart, sylls.Count); ResWithEntry resWE = new ResWithEntry(res, entry); resList.Add(resWE); } return resList; }
/// <summary> /// Retrieves hanzi lookup candidates, verifies actual presence of search expression in headword. /// </summary> List<ResWithEntry> doLoadVerifyHanzi(BinReader br, IEnumerable<int> poss, string query, SearchScript script) { List<ResWithEntry> resList = new List<ResWithEntry>(); // Yes, we only open our file on-demand // But we do this within each lookup's scope, so lookup stays thread-safe // Look at each entry: load, verify, keep or drop foreach (int pos in poss) { // Load up entry from file br.Position = pos; CedictEntry entry = new CedictEntry(br); // Figure out position/length of query string in simplified and traditional headwords int hiliteStart = -1; int hiliteLength = 0; hiliteStart = entry.ChSimpl.IndexOf(query); if (hiliteStart != -1) hiliteLength = query.Length; // If not found in simplified, check in traditional if (hiliteLength == 0) { hiliteStart = entry.ChTrad.IndexOf(query); if (hiliteStart != -1) hiliteLength = query.Length; } // Entry is a keeper if either source or target headword contains query if (hiliteLength != 0) { // Drop if there's any unprintable hanzi if (!areHanziCovered(entry)) continue; // TO-DO: indicate wrong script in result CedictResult res = new CedictResult(CedictResult.SimpTradWarning.None, pos, entry.HanziPinyinMap, hiliteStart, hiliteLength); ResWithEntry resWE = new ResWithEntry(res, entry); resList.Add(resWE); } } return resList; }
/// <summary> /// Returns true of display font covers all Hanzi in entry; false otherwise. /// </summary> private bool areHanziCovered(CedictEntry entry) { // Simplified and traditional headword if (!areHanziCovered(entry.ChSimpl, entry.ChTrad)) return false; // Hanzi in hybrid text of senses for (int i = 0; i != entry.SenseCount; ++i) { CedictSense cs = entry.GetSenseAt(i); if (!areHanziCovered(cs.Domain)) return false; if (!areHanziCovered(cs.Equiv)) return false; if (!areHanziCovered(cs.Note)) return false; } // We're good to go. return true; }
/// <summary> /// Ctor: init. /// </summary> /// <param name="cmdTriggered">Delegate that will be called when a command is issued.</param> /// <param name="entry">Cedict entry to fetch clipboard data from.</param> /// <param name="senseIX">Index of sense over which user right-clicked, or -1.</param> /// <param name="script">Search script (so two Hanzi items are shown if needed).</param> public ResultsCtxtControl(CommandTriggeredDelegate cmdTriggered, ITextProvider tprov, CedictEntry entry, int senseIx, SearchScript script) { this.cmdTriggered = cmdTriggered; this.tprov = tprov; this.entry = entry; this.senseIx = senseIx; this.script = script; InitializeComponent(); BackColor = ZenParams.BorderColor; pnlTop.BackColor = ZenParams.WindowColor; tblFull.BackColor = ZenParams.WindowColor; tblZho.BackColor = ZenParams.WindowColor; tblSense.BackColor = ZenParams.WindowColor; // Display strings string title = tprov.GetString("CtxtCopyTitle"); string fullFormatted, fullCedict, hanzi1, hanzi2, pinyin, sense; getDisplayStrings(tprov, senseIx, out fullFormatted, out fullCedict, out hanzi1, out hanzi2, out pinyin, out sense); lblFullFormatted.Text = fullFormatted; lblFullCedict.Text = fullCedict; lblHanzi1.Text = hanzi1; lblHanzi2.Text = hanzi2; lblPinyin.Text = pinyin; lblSense.Text = sense; // Margin/border tweaks: 1px also at higher DPIs tblLayout.Location = new Point(1, 1); pnlTop.Margin = new Padding(0, 0, 0, 1); tblLayout.RowStyles[1].Height = pnlTop.Height + 1; tblFull.Margin = new Padding(0, 0, 0, 1); tblLayout.RowStyles[1].Height = tblFull.Height + 1; tblZho.Margin = new Padding(0, 0, 0, 1); tblLayout.RowStyles[2].Height = tblZho.Height + 1; tblSense.Margin = new Padding(0, 0, 0, 0); tblLayout.RowStyles[3].Height = tblSense.Height; tblLayout.Height = tblSense.Bottom; // Hide rows we don't need: second hanzi if (hanzi2 == null) { int hHanzi2 = lblHanzi2.Height; tblZho.Controls.Remove(lblHanzi2); lblPinyin.Top = lblHanzi2.Top; lblHanzi2.Dispose(); lblHanzi2 = null; tblZho.Controls.Remove(lblPinyin); tblZho.Controls.Add(lblPinyin, 0, 2); tblZho.RowCount -= 1; tblZho.RowStyles.RemoveAt(2); tblZho.Height -= hHanzi2; tblLayout.RowStyles[2].Height -= hHanzi2; tblLayout.Height -= hHanzi2; } // Sense if (sense == null) { int hSense = tblSense.Height; tblLayout.Controls.Remove(tblSense); tblSense.Dispose(); tblSense = null; tblLayout.RowStyles.RemoveAt(tblLayout.RowStyles.Count - 1); tblLayout.RowCount -= 1; tblLayout.Height -= hSense + 1; } // Label collection for hover int lblCount = 6; if (lblHanzi2 == null) --lblCount; if (lblSense == null) --lblCount; lblColl = new Label[lblCount]; lblColl[0] = lblFullFormatted; lblColl[1] = lblFullCedict; lblColl[2] = lblHanzi1; int ix = 3; if (lblHanzi2 != null) { lblColl[ix] = lblHanzi2; ++ix; } lblColl[ix] = lblPinyin; ++ix; if (lblSense != null) { lblColl[ix] = lblSense; ++ix; } // Event handling for hover tblFull.CellPaint += onTblLayoutCellPaint; tblZho.CellPaint += onTblLayoutCellPaint; if (tblSense != null) tblSense.CellPaint += onTblLayoutCellPaint; }
/// <summary> /// Parses an entry (line) that has been separated into headword and rest. /// </summary> private CedictEntry parseEntry(string strHead, string strBody, StreamWriter logStream) { // Decompose head Match hm = reHead.Match(strHead); if (!hm.Success) { string msg = "Line {0}: ERROR: Invalid header syntax: {1}"; msg = string.Format(msg, lineNum, strHead); logStream.WriteLine(msg); return null; } // Split pinyin by spaces string[] pinyinParts = hm.Groups[3].Value.Split(new char[] { ' ' }); // Convert pinyin to our normalized format PinyinSyllable[] pinyinSylls; List<int> pinyinMap; normalizePinyin(pinyinParts, out pinyinSylls, out pinyinMap); // Weird syllables found > warning if (Array.FindIndex(pinyinSylls, x => x.Tone == -1) != -1) { string msg = "Line {0}: Warning: Weird pinyin syllable: {1}"; msg = string.Format(msg, lineNum, strHead); logStream.WriteLine(msg); } // Trad and simp MUST have same # of chars, always if (hm.Groups[1].Value.Length != hm.Groups[2].Value.Length) { string msg = "Line {0}: ERROR: Trad/simp char count mismatch: {1}"; msg = string.Format(msg, lineNum, strHead); logStream.WriteLine(msg); return null; } // Transform map so it says, for each hanzi, which pinyin syllable it corresponds to // Some chars in hanzi may have no pinyin: when hanzi includes a non-ideagraphic character short[] hanziToPinyin = transformPinyinMap(hm.Groups[1].Value, pinyinMap); // Headword MUST have same number of ideo characters as non-weird pinyin syllables if (hanziToPinyin == null) { string msg = "Line {0}: Warning: Failed to match hanzi to pinyin: {1}"; msg = string.Format(msg, lineNum, strHead); logStream.WriteLine(msg); } // Split meanings by slash string[] meaningsRaw = strBody.Split(new char[] { '/' }); List<string> meanings = new List<string>(); foreach (string s in meaningsRaw) if (s.Trim() != "") meanings.Add(s.Trim()); if (meaningsRaw.Length != meanings.Count) { string msg = "Line {0}: Warning: Empty sense in entry: {1}"; msg = string.Format(msg, lineNum, strBody); logStream.WriteLine(msg); } // At least one meaning! if (meanings.Count == 0) { string msg = "Line {0}: ERROR: No sense: {1}"; msg = string.Format(msg, lineNum, strBody); logStream.WriteLine(msg); return null; } // Separate domain, equiv and not in each sense List<CedictSense> cedictSenses = new List<CedictSense>(); foreach (string s in meanings) { string domain, equiv, note; trimSense(s, out domain, out equiv, out note); // Equiv is empty: merits at least a warning if (equiv == "") { string msg = "Line {0}: Warning: No equivalent in sense, only domain/notes: {1}"; msg = string.Format(msg, lineNum, s); logStream.WriteLine(msg); } // Convert all parts of sense to hybrid text HybridText hDomain = plainTextToHybrid(domain, lineNum, logStream); HybridText hEquiv = plainTextToHybrid(equiv, lineNum, logStream); HybridText hNote = plainTextToHybrid(note, lineNum, logStream); // Store new sense - unless we failed to parse anything properly if (hDomain != null && hEquiv != null && hNote != null) { cedictSenses.Add(new CedictSense(hDomain, hEquiv, hNote)); } } // If there are no senses, we failed. But that will have been logged before, so just return null. if (cedictSenses.Count == 0) return null; // Done with entry CedictEntry res = new CedictEntry(hm.Groups[2].Value, hm.Groups[1].Value, new ReadOnlyCollection<PinyinSyllable>(pinyinSylls), new ReadOnlyCollection<CedictSense>(cedictSenses), hanziToPinyin); return res; }