private void verifyHanzi(CedictEntry entry, int entryId, string query, List <CedictResult> res) { if (entry == null) { return; } // Figure out position/length of query string in simplified and traditional headwords int hiliteStart = -1; int hiliteLength = 0; hiliteStart = entry.ChSimpl.IndexOf(query); if (hiliteStart != -1) { hiliteLength = query.Length; } // If not found in simplified, check in traditional if (hiliteLength == 0) { hiliteStart = entry.ChTrad.IndexOf(query); if (hiliteStart != -1) { hiliteLength = query.Length; } } // Entry is a keeper if either source or target headword contains query if (hiliteLength != 0) { CedictResult cr = new CedictResult(CedictResult.SimpTradWarning.None, entry, entry.HanziPinyinMap, hiliteStart, hiliteLength); res.Add(cr); } }
/// <summary> /// Ctor: regular lookup result /// </summary> public OneResultCtrl(CedictResult res, ICedictEntryProvider prov, UiScript script, UiTones tones, bool isMobile) { this.res = res; this.prov = prov; this.script = script; this.tones = tones; this.isMobile = isMobile; }
/// <summary> /// Retrieves pinyin lookup candidates, verifies actual presence of search expression in headword. /// </summary> List <ResWithEntry> doLoadVerifyPinyin(BinReader br, IEnumerable <int> poss, List <PinyinSyllable> sylls) { List <ResWithEntry> resList = new List <ResWithEntry>(); // Yes, we only open our file on-demand // But we do this within each lookup's scope, so lookup stays thread-safe // Look at each entry: load, verify, keep or drop foreach (int pos in poss) { // Load up entry from file br.Position = pos; CedictEntry entry = new CedictEntry(br); // Find query syllables in entry int syllStart = -1; for (int i = 0; i <= entry.PinyinCount - sylls.Count; ++i) { int j; for (j = 0; j != sylls.Count; ++j) { PinyinSyllable syllEntry = entry.GetPinyinAt(i + j); PinyinSyllable syllQuery = sylls[j]; if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text) { break; } if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone) { break; } } if (j == sylls.Count) { syllStart = i; break; } } // Entry is a keeper if query syllables found if (syllStart == -1) { continue; } // Drop if there's any unprintable Hanzi if (!areHanziCovered(entry)) { continue; } // Keeper! CedictResult res = new CedictResult(pos, entry.HanziPinyinMap, syllStart, sylls.Count); ResWithEntry resWE = new ResWithEntry(res, entry); resList.Add(resWE); } return(resList); }
/// <summary> /// Ctor: regular lookup result /// </summary> public EntryRenderer(CedictResult res, ICedictEntryProvider prov, UiScript script, UiTones tones) { this.res = res; this.prov = prov; this.script = script; this.tones = tones; this.hanim = true; this.dimIdenticalTrad = true; }
/// <summary> /// Ctor: regular lookup result /// </summary> public EntryRenderer(string lang, CedictResult res, UiScript script, UiTones tones, string entryId) { this.lang = lang; this.res = res; this.script = script; this.tones = tones; this.hanim = true; this.dimIdenticalTrad = true; this.entryId = entryId; this.extraEntryClass = ""; }
/// <summary> /// Retrieves hanzi lookup candidates, verifies actual presence of search expression in headword. /// </summary> List <ResWithEntry> doLoadVerifyHanzi(BinReader br, IEnumerable <int> poss, string query, SearchScript script) { List <ResWithEntry> resList = new List <ResWithEntry>(); // Yes, we only open our file on-demand // But we do this within each lookup's scope, so lookup stays thread-safe // Look at each entry: load, verify, keep or drop foreach (int pos in poss) { // Load up entry from file br.Position = pos; CedictEntry entry = new CedictEntry(br); // Figure out position/length of query string in simplified and traditional headwords int hiliteStart = -1; int hiliteLength = 0; hiliteStart = entry.ChSimpl.IndexOf(query); if (hiliteStart != -1) { hiliteLength = query.Length; } // If not found in simplified, check in traditional if (hiliteLength == 0) { hiliteStart = entry.ChTrad.IndexOf(query); if (hiliteStart != -1) { hiliteLength = query.Length; } } // Entry is a keeper if either source or target headword contains query if (hiliteLength != 0) { // Drop if there's any unprintable hanzi if (!areHanziCovered(entry)) { continue; } // TO-DO: indicate wrong script in result CedictResult res = new CedictResult(CedictResult.SimpTradWarning.None, pos, entry.HanziPinyinMap, hiliteStart, hiliteLength); ResWithEntry resWE = new ResWithEntry(res, entry); resList.Add(resWE); } } return(resList); }
private List <ResWithEntry> retrieveVerifyPinyin(List <int> cands, List <PinyinSyllable> qsylls) { List <ResWithEntry> resList = new List <ResWithEntry>(); foreach (int blobId in cands) { // Load entry from DB CedictEntry entry = loadFromBlob(blobId); // Find query syllables in entry int syllStart = -1; for (int i = 0; i <= entry.PinyinCount - qsylls.Count; ++i) { int j; for (j = 0; j != qsylls.Count; ++j) { PinyinSyllable syllEntry = entry.GetPinyinAt(i + j); PinyinSyllable syllQuery = qsylls[j]; if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text) { break; } if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone) { break; } } if (j == qsylls.Count) { syllStart = i; break; } } // Entry is a keeper if query syllables found if (syllStart == -1) { continue; } // Keeper! CedictResult cres = new CedictResult(blobId, entry.HanziPinyinMap, syllStart, qsylls.Count); ResWithEntry resWE = new ResWithEntry(cres, entry); resList.Add(resWE); } return(resList); }
/// <summary> /// Compares lookup results after pinyin lookup for sorted presentation. /// </summary> private static int pyComp(CedictResult a, CedictResult b) { // Shorter entry comes first int lengthCmp = a.Entry.PinyinCount.CompareTo(b.Entry.PinyinCount); if (lengthCmp != 0) { return(lengthCmp); } // Between equally long headwords where match starts sooner comes first int startCmp = a.PinyinHiliteStart.CompareTo(b.PinyinHiliteStart); if (startCmp != 0) { return(startCmp); } // Order equally long entries by pinyin lexicographical order return(a.Entry.PinyinCompare(b.Entry)); }
private static int hrComp(CedictResult a, CedictResult b) { // First come those where match starts sooner int startCmp = a.HanziHiliteStart.CompareTo(b.HanziHiliteStart); if (startCmp != 0) { return(startCmp); } // Then, pinyin lexical compare up to shorter's length int pyComp = a.Entry.PinyinCompare(b.Entry); if (pyComp != 0) { return(pyComp); } // Pinyin is identical: shorter comes first int lengthCmp = a.Entry.ChSimpl.Length.CompareTo(b.Entry.ChSimpl.Length); return(lengthCmp); }
private List <ResWithEntry> retrieveVerifyHanzi(HashSet <int> cands, string query) { List <ResWithEntry> resList = new List <ResWithEntry>(); foreach (int blobId in cands) { // Load entry from DB CedictEntry entry = loadFromBlob(blobId); // Figure out position/length of query string in simplified and traditional headwords int hiliteStart = -1; int hiliteLength = 0; hiliteStart = entry.ChSimpl.IndexOf(query); if (hiliteStart != -1) { hiliteLength = query.Length; } // If not found in simplified, check in traditional if (hiliteLength == 0) { hiliteStart = entry.ChTrad.IndexOf(query); if (hiliteStart != -1) { hiliteLength = query.Length; } } // Entry is a keeper if either source or target headword contains query if (hiliteLength != 0) { CedictResult res = new CedictResult(CedictResult.SimpTradWarning.None, blobId, entry.HanziPinyinMap, hiliteStart, hiliteLength); ResWithEntry resWE = new ResWithEntry(res, entry); resList.Add(resWE); } } return(resList); }
/// <summary> /// Ctor: takes data to display. /// </summary> /// <param name="owner">Zen control that owns me.</param> /// <param name="tprov">Localized display text provider.</param> /// <param name="lookupThroughLink">Delegate to call when user initiates lookup by clicking on a link.</param> /// <param name="getEntry">Delegate to call when an entry must be retrieved (for "copy" context menu).</param> /// <param name="entryProvider">Dictionary entry provider.</param> /// <param name="cr">The lookup result this control will show.</param> /// <param name="maxHeadLength">Longest headword in full results list.</param> /// <param name="script">Scripts to show in headword.</param> /// <param name="odd">Odd/even position in list, for alternating BG color.</param> public OneResultControl(ZenControlBase owner, float scale, ITextProvider tprov, LookupThroughLinkDelegate lookupThroughLink, ParentPaintDelegate parentPaint, GetEntryDelegate getEntry, ICedictEntryProvider entryProvider, CedictResult cr, SearchScript script, bool last) : base(owner) { this.scale = scale; this.tprov = tprov; this.lookupThroughLink = lookupThroughLink; this.parentPaint = parentPaint; this.getEntry = getEntry; this.entry = entryProvider.GetEntry(cr.EntryId); this.res = cr; this.analyzedScript = script; this.last = last; padLeft = (int)(5.0F * scale); padTop = (int)(4.0F * scale); padBottom = (int)(8.0F * scale); padMid = (int)(20.0F * scale); padRight = (int)(10.0F * scale); }
private void verifyPinyin(CedictEntry entry, int entryId, List <PinyinSyllable> sylls, List <CedictResult> res) { // Find query syllables in entry int syllStart = -1; for (int i = 0; i <= entry.PinyinCount - sylls.Count; ++i) { int j; for (j = 0; j != sylls.Count; ++j) { PinyinSyllable syllEntry = entry.GetPinyinAt(i + j); PinyinSyllable syllQuery = sylls[j]; if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text) { break; } if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone) { break; } } if (j == sylls.Count) { syllStart = i; break; } } // Entry is a keeper if query syllables found if (syllStart == -1) { return; } // Keeper! CedictResult cr = new CedictResult(entry, entry.HanziPinyinMap, syllStart, sylls.Count); res.Add(cr); }
public ResWithEntry(CedictResult res, CedictEntry entry) { Res = res; Entry = entry; }
private bool verifyTrg(Tokenizer tokenizer, CedictEntry entry, int entryId, int senseIx, List <Token> qtoks, List <CedictResult> res) { if (entry == null) { return(false); } // Tokenize indicated sense's equiv; see if it matches query string equiv = entry.GetSenseAt(senseIx).Equiv; List <Token> rtoks = tokenizer.Tokenize(equiv); for (int i = 0; i != rtoks.Count; ++i) { int j = 0; bool startSplit = false; bool endSplit = false; for (; j != qtoks.Count; ++j) { if (i + j >= rtoks.Count) { break; } bool ok = false; Token rtok = rtoks[i + j]; Token qtok = qtoks[j]; if (rtok.Norm == qtok.Norm) { // Stopwords: only OK if token is an entire sub-sense in retrieved sense if (!trgStopWords.Contains(rtok.Norm)) { ok = true; } else { ok = rtok.SubSeq == 0 && (i == rtoks.Count - 1 || rtoks[i + 1].SubSeq == 0); } } // First query word: can be second half of word in retrieved sense if (j == 0 && rtok.SplitPosNorm != 0) { string rTwo = rtok.Norm.Substring(rtok.SplitPosNorm); if (rTwo == qtok.Norm) { // Stopwords: only if query is min 2 tokens, or result is one token only if (!trgStopWords.Contains(rTwo) || qtoks.Count > 1) { ok = true; } startSplit = true; } } // Last query word: can be first half of word in retrieved sense if (j == qtoks.Count - 1 && rtok.SplitPosNorm != 0) { string rOne = rtok.Norm.Substring(0, rtok.SplitPosNorm); if (rOne == qtok.Norm) { // Stopwords: only if query is min 2 tokens, or result is one token only if (!trgStopWords.Contains(rOne) || qtoks.Count > 1) { ok = true; } endSplit = true; } } if (!ok) { break; } } if (j != qtoks.Count) { continue; } // We got a match starting at i! CedictTargetHighlight[] hlarr = new CedictTargetHighlight[1]; int start = rtoks[i].Start; if (startSplit) { start += rtoks[i].SplitPosSurf; } int end = rtoks[i + j - 1].Start + rtoks[i + j - 1].Surf.Length; if (endSplit) { end -= (rtoks[i + j - 1].Surf.Length - rtoks[i + j - 1].SplitPosSurf); } hlarr[0] = new CedictTargetHighlight(senseIx, start, end - start); ReadOnlyCollection <CedictTargetHighlight> hlcoll = new ReadOnlyCollection <CedictTargetHighlight>(hlarr); CedictResult cr = new CedictResult(entry, hlcoll); // Stop right here res.Add(cr); return(true); } // Not a match return(false); }
/// <summary> /// See <see cref="SetResults"/>. /// </summary> private bool doSetResults(int lookupId, ICedictEntryProvider entryProvider, ReadOnlyCollection <CedictResult> results, SearchScript script) { lock (displayIdLO) { // If we're already too late, don't bother changing display. if (displayId > lookupId) { return(false); } displayId = lookupId; // Empty result set - special handling if (results.Count == 0) { lock (resCtrlsLO) { doDisposeResultControls(); txtResCount = tprov.GetString("ResultsCountNone"); setScrollbarVisibility(false); } // Render doFade(false); MakeMePaint(false, RenderMode.Invalidate); return(true); } } // Decide if we first try with scrollbar visible or not // This is a very rough heuristics (10 results or more), but doesn't matter // Recalc costs much if there are many results, and the number covers that safely bool sbarVisible = results.Count > 10; // Content rectangle height and width int cw, ch; getContentSize(sbarVisible, out cw, out ch); // Create new result controls. At this point, not overwriting old ones! // This is the cycle that takes *long*. List <OneResultControl> newCtrls = new List <OneResultControl>(results.Count); int y = 0; using (Bitmap bmp = new Bitmap(1, 1)) using (Graphics g = Graphics.FromImage(bmp)) { bool canceled = false; for (int rix = 0; rix != results.Count; ++rix) { CedictResult cr = results[rix]; OneResultControl orc = new OneResultControl(null, Scale, tprov, onLookupFromCtrl, onPaintFromCtrl, onGetEntry, entryProvider, cr, script, rix == results.Count - 1); orc.Analyze(g, cw); // Cannot use RelLocation b/c control has no parent yet orc.AbsLocation = new Point(AbsLeft + 1, AbsTop + y + 1); y += orc.Height; newCtrls.Add(orc); // At any point, if we realize lookup ID has changed, we stop // This can happen if a later, quick lookup completes and shows results before us // Checking integers is atomic, no locking if (displayId > lookupId) { canceled = true; break; } } if (canceled) { foreach (OneResultControl orc in newCtrls) { orc.Dispose(); } return(false); } } // OK, last chance to change our mind about showing results. // The rest is synchronized - but it's also fast lock (displayIdLO) { if (displayId > lookupId) { return(false); } displayId = lookupId; // Rest must be invoked on GUI. Otherwise, as we're adding children, // Collections are modified that are also accessed by paint in a resize event handler etc. InvokeOnForm((MethodInvoker) delegate { // Stop any scrolling that may be going on. Cannot scroll what's being replaced. if (sbar.Parent == this) { sbar.StopAnyScrolling(); } // Prevent any painting from worker threads - also accesses collection we're changing lock (resCtrlsLO) { // Get rid of old result controls, remember/own new ones doDisposeResultControls(); resCtrls = newCtrls; foreach (OneResultControl orc in resCtrls) { AddChild(orc); } // Actually show or hide scrollbar as per original decision setScrollbarVisibility(sbarVisible); // Now, by the time we're here, size may have changed // That is unlikely, but then we got to re-layout stuff int cwNew, chNew; getContentSize(sbarVisible, out cwNew, out chNew); if (cwNew != cw || chNew != ch) { reAnalyzeResultsDisplay(); } else { // Everything as big as it used to be... // Change our mind about scrollbar? cw = showOrHideScrollbar(); } } // Results count text if (resCtrls.Count == 1) { txtResCount = tprov.GetString("ResultsCountOne"); } else { txtResCount = tprov.GetString("ResultsCountN"); txtResCount = string.Format(txtResCount, resCtrls.Count); } // Update first visible control's index updateFirstVisibleIdx(); // Render doFade(false); MakeMePaint(false, RenderMode.Invalidate); }); // Done. return(true); } }
/// <summary> /// Retrieves matching entries for a target-language search expression. /// </summary> private List <CedictResult> doTargetLookup(BinReader br, string query) { // Empty query string: no results query = query.Trim(); if (query == string.Empty) { return(new List <CedictResult>()); } // Tokenize query string HybridText txtQuery = new HybridText(query); ReadOnlyCollection <EquivToken> txtTokenized = tokenizer.Tokenize(txtQuery); // Get query string's token IDs bool anyUnknown = false; HashSet <int> idSet = new HashSet <int>(); foreach (EquivToken eqt in txtTokenized) { if (eqt.TokenId == WordHolder.IdUnknown || eqt.TokenId == index.WordHolder.IdZho) { anyUnknown = true; break; } idSet.Add(eqt.TokenId); } // Any unknown tokens - no match, we know that immediately List <CedictResult> res = new List <CedictResult>(); if (anyUnknown) { return(res); } // Collect IDs of tokenized senses that contain one or more of our query IDs Dictionary <int, SenseLookupInfo> senseTokenCounts = new Dictionary <int, SenseLookupInfo>(); bool firstToken = true; // For each token... foreach (int tokenId in idSet) { // Get sense instances where it occurs List <SenseInfo> instances = index.SenseIndex[tokenId].GetOrLoadInstances(br); foreach (SenseInfo si in instances) { SenseLookupInfo sli; // We already have a count for this token ID if (senseTokenCounts.ContainsKey(si.TokenizedSenseId)) { ++senseTokenCounts[si.TokenizedSenseId].NumOfQueryTokensInSense; } // Or this is the first time we're seeing it // We only record counts for the first token // We're looking for senses that contain *all* query tokens else if (firstToken) { sli = new SenseLookupInfo { NumOfQueryTokensInSense = 0, TokensInSense = si.TokensInSense }; senseTokenCounts[si.TokenizedSenseId] = sli; ++sli.NumOfQueryTokensInSense; } } firstToken = false; } // Keep those sense IDs (positions) that contain all of our query tokens // We already eliminated some candidates through "firstToken" trick before, but not all List <int> sensePosList = new List <int>(); foreach (var x in senseTokenCounts) { if (x.Value.NumOfQueryTokensInSense == idSet.Count) { sensePosList.Add(x.Key); } } // Load each tokenized sense to find out: // - whether entry is a real match // - entry ID // - best score for entry (multiple senses may hold query string) // - highlights Dictionary <int, EntryMatchInfo> entryIdToInfo = new Dictionary <int, EntryMatchInfo>(); foreach (int senseId in sensePosList) { doVerifyTarget(txtTokenized, senseId, entryIdToInfo, br); } // Sort entry IDs by their best score // Drop entries with unprintable hanzi in HW now List <EntryMatchInfo> entryInfoList = new List <EntryMatchInfo>(); foreach (var x in entryIdToInfo) { // Check coverage. Because we don't load full entry, it's possible // that some unsupported chars in hybrid text of senses slip through. // There's a limit to perfectionism. string simp, trad; br.Position = x.Value.EntryId; CedictEntry.DeserializeHanzi(br, out simp, out trad); if (!areHanziCovered(simp, trad)) { continue; } // Queue up for sorting. entryInfoList.Add(x.Value); } entryInfoList.Sort((a, b) => b.BestSenseScore.CompareTo(a.BestSenseScore)); // Load entries, wrap into results foreach (EntryMatchInfo emi in entryInfoList) { CedictResult cr = new CedictResult(emi.EntryId, new ReadOnlyCollection <CedictTargetHighlight>(emi.TargetHilites)); res.Add(cr); } return(res); }