Beispiel #1
0
        private void verifyHanzi(CedictEntry entry, int entryId, string query, List <CedictResult> res)
        {
            if (entry == null)
            {
                return;
            }

            // Figure out position/length of query string in simplified and traditional headwords
            int hiliteStart  = -1;
            int hiliteLength = 0;

            hiliteStart = entry.ChSimpl.IndexOf(query);
            if (hiliteStart != -1)
            {
                hiliteLength = query.Length;
            }
            // If not found in simplified, check in traditional
            if (hiliteLength == 0)
            {
                hiliteStart = entry.ChTrad.IndexOf(query);
                if (hiliteStart != -1)
                {
                    hiliteLength = query.Length;
                }
            }
            // Entry is a keeper if either source or target headword contains query
            if (hiliteLength != 0)
            {
                CedictResult cr = new CedictResult(CedictResult.SimpTradWarning.None,
                                                   entry, entry.HanziPinyinMap,
                                                   hiliteStart, hiliteLength);
                res.Add(cr);
            }
        }
Beispiel #2
0
 /// <summary>
 /// Ctor: regular lookup result
 /// </summary>
 public OneResultCtrl(CedictResult res, ICedictEntryProvider prov,
                      UiScript script, UiTones tones, bool isMobile)
 {
     this.res      = res;
     this.prov     = prov;
     this.script   = script;
     this.tones    = tones;
     this.isMobile = isMobile;
 }
Beispiel #3
0
        /// <summary>
        /// Retrieves pinyin lookup candidates, verifies actual presence of search expression in headword.
        /// </summary>
        List <ResWithEntry> doLoadVerifyPinyin(BinReader br, IEnumerable <int> poss, List <PinyinSyllable> sylls)
        {
            List <ResWithEntry> resList = new List <ResWithEntry>();

            // Yes, we only open our file on-demand
            // But we do this within each lookup's scope, so lookup stays thread-safe
            // Look at each entry: load, verify, keep or drop
            foreach (int pos in poss)
            {
                // Load up entry from file
                br.Position = pos;
                CedictEntry entry = new CedictEntry(br);

                // Find query syllables in entry
                int syllStart = -1;
                for (int i = 0; i <= entry.PinyinCount - sylls.Count; ++i)
                {
                    int j;
                    for (j = 0; j != sylls.Count; ++j)
                    {
                        PinyinSyllable syllEntry = entry.GetPinyinAt(i + j);
                        PinyinSyllable syllQuery = sylls[j];
                        if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text)
                        {
                            break;
                        }
                        if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone)
                        {
                            break;
                        }
                    }
                    if (j == sylls.Count)
                    {
                        syllStart = i;
                        break;
                    }
                }
                // Entry is a keeper if query syllables found
                if (syllStart == -1)
                {
                    continue;
                }

                // Drop if there's any unprintable Hanzi
                if (!areHanziCovered(entry))
                {
                    continue;
                }

                // Keeper!
                CedictResult res   = new CedictResult(pos, entry.HanziPinyinMap, syllStart, sylls.Count);
                ResWithEntry resWE = new ResWithEntry(res, entry);
                resList.Add(resWE);
            }
            return(resList);
        }
Beispiel #4
0
 /// <summary>
 /// Ctor: regular lookup result
 /// </summary>
 public EntryRenderer(CedictResult res, ICedictEntryProvider prov,
                      UiScript script, UiTones tones)
 {
     this.res              = res;
     this.prov             = prov;
     this.script           = script;
     this.tones            = tones;
     this.hanim            = true;
     this.dimIdenticalTrad = true;
 }
Beispiel #5
0
 /// <summary>
 /// Ctor: regular lookup result
 /// </summary>
 public EntryRenderer(string lang, CedictResult res, UiScript script, UiTones tones, string entryId)
 {
     this.lang             = lang;
     this.res              = res;
     this.script           = script;
     this.tones            = tones;
     this.hanim            = true;
     this.dimIdenticalTrad = true;
     this.entryId          = entryId;
     this.extraEntryClass  = "";
 }
Beispiel #6
0
        /// <summary>
        /// Retrieves hanzi lookup candidates, verifies actual presence of search expression in headword.
        /// </summary>
        List <ResWithEntry> doLoadVerifyHanzi(BinReader br, IEnumerable <int> poss, string query, SearchScript script)
        {
            List <ResWithEntry> resList = new List <ResWithEntry>();

            // Yes, we only open our file on-demand
            // But we do this within each lookup's scope, so lookup stays thread-safe
            // Look at each entry: load, verify, keep or drop
            foreach (int pos in poss)
            {
                // Load up entry from file
                br.Position = pos;
                CedictEntry entry = new CedictEntry(br);

                // Figure out position/length of query string in simplified and traditional headwords
                int hiliteStart  = -1;
                int hiliteLength = 0;
                hiliteStart = entry.ChSimpl.IndexOf(query);
                if (hiliteStart != -1)
                {
                    hiliteLength = query.Length;
                }
                // If not found in simplified, check in traditional
                if (hiliteLength == 0)
                {
                    hiliteStart = entry.ChTrad.IndexOf(query);
                    if (hiliteStart != -1)
                    {
                        hiliteLength = query.Length;
                    }
                }
                // Entry is a keeper if either source or target headword contains query
                if (hiliteLength != 0)
                {
                    // Drop if there's any unprintable hanzi
                    if (!areHanziCovered(entry))
                    {
                        continue;
                    }

                    // TO-DO: indicate wrong script in result
                    CedictResult res = new CedictResult(CedictResult.SimpTradWarning.None,
                                                        pos, entry.HanziPinyinMap,
                                                        hiliteStart, hiliteLength);
                    ResWithEntry resWE = new ResWithEntry(res, entry);
                    resList.Add(resWE);
                }
            }
            return(resList);
        }
Beispiel #7
0
            private List <ResWithEntry> retrieveVerifyPinyin(List <int> cands, List <PinyinSyllable> qsylls)
            {
                List <ResWithEntry> resList = new List <ResWithEntry>();

                foreach (int blobId in cands)
                {
                    // Load entry from DB
                    CedictEntry entry = loadFromBlob(blobId);

                    // Find query syllables in entry
                    int syllStart = -1;
                    for (int i = 0; i <= entry.PinyinCount - qsylls.Count; ++i)
                    {
                        int j;
                        for (j = 0; j != qsylls.Count; ++j)
                        {
                            PinyinSyllable syllEntry = entry.GetPinyinAt(i + j);
                            PinyinSyllable syllQuery = qsylls[j];
                            if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text)
                            {
                                break;
                            }
                            if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone)
                            {
                                break;
                            }
                        }
                        if (j == qsylls.Count)
                        {
                            syllStart = i;
                            break;
                        }
                    }
                    // Entry is a keeper if query syllables found
                    if (syllStart == -1)
                    {
                        continue;
                    }

                    // Keeper!
                    CedictResult cres  = new CedictResult(blobId, entry.HanziPinyinMap, syllStart, qsylls.Count);
                    ResWithEntry resWE = new ResWithEntry(cres, entry);
                    resList.Add(resWE);
                }
                return(resList);
            }
Beispiel #8
0
        /// <summary>
        /// Compares lookup results after pinyin lookup for sorted presentation.
        /// </summary>
        private static int pyComp(CedictResult a, CedictResult b)
        {
            // Shorter entry comes first
            int lengthCmp = a.Entry.PinyinCount.CompareTo(b.Entry.PinyinCount);

            if (lengthCmp != 0)
            {
                return(lengthCmp);
            }
            // Between equally long headwords where match starts sooner comes first
            int startCmp = a.PinyinHiliteStart.CompareTo(b.PinyinHiliteStart);

            if (startCmp != 0)
            {
                return(startCmp);
            }
            // Order equally long entries by pinyin lexicographical order
            return(a.Entry.PinyinCompare(b.Entry));
        }
Beispiel #9
0
        private static int hrComp(CedictResult a, CedictResult b)
        {
            // First come those where match starts sooner
            int startCmp = a.HanziHiliteStart.CompareTo(b.HanziHiliteStart);

            if (startCmp != 0)
            {
                return(startCmp);
            }
            // Then, pinyin lexical compare up to shorter's length
            int pyComp = a.Entry.PinyinCompare(b.Entry);

            if (pyComp != 0)
            {
                return(pyComp);
            }
            // Pinyin is identical: shorter comes first
            int lengthCmp = a.Entry.ChSimpl.Length.CompareTo(b.Entry.ChSimpl.Length);

            return(lengthCmp);
        }
Beispiel #10
0
            private List <ResWithEntry> retrieveVerifyHanzi(HashSet <int> cands, string query)
            {
                List <ResWithEntry> resList = new List <ResWithEntry>();

                foreach (int blobId in cands)
                {
                    // Load entry from DB
                    CedictEntry entry = loadFromBlob(blobId);

                    // Figure out position/length of query string in simplified and traditional headwords
                    int hiliteStart  = -1;
                    int hiliteLength = 0;
                    hiliteStart = entry.ChSimpl.IndexOf(query);
                    if (hiliteStart != -1)
                    {
                        hiliteLength = query.Length;
                    }
                    // If not found in simplified, check in traditional
                    if (hiliteLength == 0)
                    {
                        hiliteStart = entry.ChTrad.IndexOf(query);
                        if (hiliteStart != -1)
                        {
                            hiliteLength = query.Length;
                        }
                    }
                    // Entry is a keeper if either source or target headword contains query
                    if (hiliteLength != 0)
                    {
                        CedictResult res = new CedictResult(CedictResult.SimpTradWarning.None,
                                                            blobId, entry.HanziPinyinMap,
                                                            hiliteStart, hiliteLength);
                        ResWithEntry resWE = new ResWithEntry(res, entry);
                        resList.Add(resWE);
                    }
                }
                return(resList);
            }
Beispiel #11
0
        /// <summary>
        /// Ctor: takes data to display.
        /// </summary>
        /// <param name="owner">Zen control that owns me.</param>
        /// <param name="tprov">Localized display text provider.</param>
        /// <param name="lookupThroughLink">Delegate to call when user initiates lookup by clicking on a link.</param>
        /// <param name="getEntry">Delegate to call when an entry must be retrieved (for "copy" context menu).</param>
        /// <param name="entryProvider">Dictionary entry provider.</param>
        /// <param name="cr">The lookup result this control will show.</param>
        /// <param name="maxHeadLength">Longest headword in full results list.</param>
        /// <param name="script">Scripts to show in headword.</param>
        /// <param name="odd">Odd/even position in list, for alternating BG color.</param>
        public OneResultControl(ZenControlBase owner, float scale, ITextProvider tprov,
                                LookupThroughLinkDelegate lookupThroughLink,
                                ParentPaintDelegate parentPaint, GetEntryDelegate getEntry,
                                ICedictEntryProvider entryProvider, CedictResult cr,
                                SearchScript script, bool last)
            : base(owner)
        {
            this.scale             = scale;
            this.tprov             = tprov;
            this.lookupThroughLink = lookupThroughLink;
            this.parentPaint       = parentPaint;
            this.getEntry          = getEntry;
            this.entry             = entryProvider.GetEntry(cr.EntryId);
            this.res            = cr;
            this.analyzedScript = script;
            this.last           = last;

            padLeft   = (int)(5.0F * scale);
            padTop    = (int)(4.0F * scale);
            padBottom = (int)(8.0F * scale);
            padMid    = (int)(20.0F * scale);
            padRight  = (int)(10.0F * scale);
        }
Beispiel #12
0
        private void verifyPinyin(CedictEntry entry, int entryId, List <PinyinSyllable> sylls, List <CedictResult> res)
        {
            // Find query syllables in entry
            int syllStart = -1;

            for (int i = 0; i <= entry.PinyinCount - sylls.Count; ++i)
            {
                int j;
                for (j = 0; j != sylls.Count; ++j)
                {
                    PinyinSyllable syllEntry = entry.GetPinyinAt(i + j);
                    PinyinSyllable syllQuery = sylls[j];
                    if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text)
                    {
                        break;
                    }
                    if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone)
                    {
                        break;
                    }
                }
                if (j == sylls.Count)
                {
                    syllStart = i;
                    break;
                }
            }
            // Entry is a keeper if query syllables found
            if (syllStart == -1)
            {
                return;
            }
            // Keeper!
            CedictResult cr = new CedictResult(entry, entry.HanziPinyinMap, syllStart, sylls.Count);

            res.Add(cr);
        }
Beispiel #13
0
 public ResWithEntry(CedictResult res, CedictEntry entry)
 {
     Res   = res;
     Entry = entry;
 }
Beispiel #14
0
        private bool verifyTrg(Tokenizer tokenizer, CedictEntry entry, int entryId, int senseIx,
                               List <Token> qtoks, List <CedictResult> res)
        {
            if (entry == null)
            {
                return(false);
            }

            // Tokenize indicated sense's equiv; see if it matches query
            string       equiv = entry.GetSenseAt(senseIx).Equiv;
            List <Token> rtoks = tokenizer.Tokenize(equiv);

            for (int i = 0; i != rtoks.Count; ++i)
            {
                int  j          = 0;
                bool startSplit = false;
                bool endSplit   = false;
                for (; j != qtoks.Count; ++j)
                {
                    if (i + j >= rtoks.Count)
                    {
                        break;
                    }
                    bool  ok   = false;
                    Token rtok = rtoks[i + j];
                    Token qtok = qtoks[j];
                    if (rtok.Norm == qtok.Norm)
                    {
                        // Stopwords: only OK if token is an entire sub-sense in retrieved sense
                        if (!trgStopWords.Contains(rtok.Norm))
                        {
                            ok = true;
                        }
                        else
                        {
                            ok = rtok.SubSeq == 0 && (i == rtoks.Count - 1 || rtoks[i + 1].SubSeq == 0);
                        }
                    }
                    // First query word: can be second half of word in retrieved sense
                    if (j == 0 && rtok.SplitPosNorm != 0)
                    {
                        string rTwo = rtok.Norm.Substring(rtok.SplitPosNorm);
                        if (rTwo == qtok.Norm)
                        {
                            // Stopwords: only if query is min 2 tokens, or result is one token only
                            if (!trgStopWords.Contains(rTwo) || qtoks.Count > 1)
                            {
                                ok = true;
                            }
                            startSplit = true;
                        }
                    }
                    // Last query word: can be first half of word in retrieved sense
                    if (j == qtoks.Count - 1 && rtok.SplitPosNorm != 0)
                    {
                        string rOne = rtok.Norm.Substring(0, rtok.SplitPosNorm);
                        if (rOne == qtok.Norm)
                        {
                            // Stopwords: only if query is min 2 tokens, or result is one token only
                            if (!trgStopWords.Contains(rOne) || qtoks.Count > 1)
                            {
                                ok = true;
                            }
                            endSplit = true;
                        }
                    }
                    if (!ok)
                    {
                        break;
                    }
                }
                if (j != qtoks.Count)
                {
                    continue;
                }
                // We got a match starting at i!
                CedictTargetHighlight[] hlarr = new CedictTargetHighlight[1];
                int start = rtoks[i].Start;
                if (startSplit)
                {
                    start += rtoks[i].SplitPosSurf;
                }
                int end = rtoks[i + j - 1].Start + rtoks[i + j - 1].Surf.Length;
                if (endSplit)
                {
                    end -= (rtoks[i + j - 1].Surf.Length - rtoks[i + j - 1].SplitPosSurf);
                }
                hlarr[0] = new CedictTargetHighlight(senseIx, start, end - start);
                ReadOnlyCollection <CedictTargetHighlight> hlcoll = new ReadOnlyCollection <CedictTargetHighlight>(hlarr);
                CedictResult cr = new CedictResult(entry, hlcoll);
                // Stop right here
                res.Add(cr);
                return(true);
            }
            // Not a match
            return(false);
        }
Beispiel #15
0
        /// <summary>
        /// See <see cref="SetResults"/>.
        /// </summary>
        private bool doSetResults(int lookupId,
                                  ICedictEntryProvider entryProvider,
                                  ReadOnlyCollection <CedictResult> results,
                                  SearchScript script)
        {
            lock (displayIdLO)
            {
                // If we're already too late, don't bother changing display.
                if (displayId > lookupId)
                {
                    return(false);
                }
                displayId = lookupId;
                // Empty result set - special handling
                if (results.Count == 0)
                {
                    lock (resCtrlsLO)
                    {
                        doDisposeResultControls();
                        txtResCount = tprov.GetString("ResultsCountNone");
                        setScrollbarVisibility(false);
                    }
                    // Render
                    doFade(false);
                    MakeMePaint(false, RenderMode.Invalidate);
                    return(true);
                }
            }

            // Decide if we first try with scrollbar visible or not
            // This is a very rough heuristics (10 results or more), but doesn't matter
            // Recalc costs much if there are many results, and the number covers that safely
            bool sbarVisible = results.Count > 10;

            // Content rectangle height and width
            int cw, ch;

            getContentSize(sbarVisible, out cw, out ch);

            // Create new result controls. At this point, not overwriting old ones!
            // This is the cycle that takes *long*.
            List <OneResultControl> newCtrls = new List <OneResultControl>(results.Count);
            int y = 0;

            using (Bitmap bmp = new Bitmap(1, 1))
                using (Graphics g = Graphics.FromImage(bmp))
                {
                    bool canceled = false;
                    for (int rix = 0; rix != results.Count; ++rix)
                    {
                        CedictResult     cr  = results[rix];
                        OneResultControl orc = new OneResultControl(null, Scale, tprov,
                                                                    onLookupFromCtrl, onPaintFromCtrl, onGetEntry,
                                                                    entryProvider, cr, script, rix == results.Count - 1);
                        orc.Analyze(g, cw);
                        // Cannot use RelLocation b/c control has no parent yet
                        orc.AbsLocation = new Point(AbsLeft + 1, AbsTop + y + 1);
                        y += orc.Height;
                        newCtrls.Add(orc);
                        // At any point, if we realize lookup ID has changed, we stop
                        // This can happen if a later, quick lookup completes and shows results before us
                        // Checking integers is atomic, no locking
                        if (displayId > lookupId)
                        {
                            canceled = true; break;
                        }
                    }
                    if (canceled)
                    {
                        foreach (OneResultControl orc in newCtrls)
                        {
                            orc.Dispose();
                        }
                        return(false);
                    }
                }
            // OK, last chance to change our mind about showing results.
            // The rest is synchronized - but it's also fast
            lock (displayIdLO)
            {
                if (displayId > lookupId)
                {
                    return(false);
                }
                displayId = lookupId;
                // Rest must be invoked on GUI. Otherwise, as we're adding children,
                // Collections are modified that are also accessed by paint in a resize event handler etc.
                InvokeOnForm((MethodInvoker) delegate
                {
                    // Stop any scrolling that may be going on. Cannot scroll what's being replaced.
                    if (sbar.Parent == this)
                    {
                        sbar.StopAnyScrolling();
                    }
                    // Prevent any painting from worker threads - also accesses collection we're changing
                    lock (resCtrlsLO)
                    {
                        // Get rid of old result controls, remember/own new ones
                        doDisposeResultControls();
                        resCtrls = newCtrls;
                        foreach (OneResultControl orc in resCtrls)
                        {
                            AddChild(orc);
                        }
                        // Actually show or hide scrollbar as per original decision
                        setScrollbarVisibility(sbarVisible);
                        // Now, by the time we're here, size may have changed
                        // That is unlikely, but then we got to re-layout stuff
                        int cwNew, chNew;
                        getContentSize(sbarVisible, out cwNew, out chNew);
                        if (cwNew != cw || chNew != ch)
                        {
                            reAnalyzeResultsDisplay();
                        }
                        else
                        {
                            // Everything as big as it used to be...
                            // Change our mind about scrollbar?
                            cw = showOrHideScrollbar();
                        }
                    }
                    // Results count text
                    if (resCtrls.Count == 1)
                    {
                        txtResCount = tprov.GetString("ResultsCountOne");
                    }
                    else
                    {
                        txtResCount = tprov.GetString("ResultsCountN");
                        txtResCount = string.Format(txtResCount, resCtrls.Count);
                    }
                    // Update first visible control's index
                    updateFirstVisibleIdx();
                    // Render
                    doFade(false);
                    MakeMePaint(false, RenderMode.Invalidate);
                });
                // Done.
                return(true);
            }
        }
Beispiel #16
0
        /// <summary>
        /// Retrieves matching entries for a target-language search expression.
        /// </summary>
        private List <CedictResult> doTargetLookup(BinReader br, string query)
        {
            // Empty query string: no results
            query = query.Trim();
            if (query == string.Empty)
            {
                return(new List <CedictResult>());
            }

            // Tokenize query string
            HybridText txtQuery = new HybridText(query);
            ReadOnlyCollection <EquivToken> txtTokenized = tokenizer.Tokenize(txtQuery);
            // Get query string's token IDs
            bool          anyUnknown = false;
            HashSet <int> idSet      = new HashSet <int>();

            foreach (EquivToken eqt in txtTokenized)
            {
                if (eqt.TokenId == WordHolder.IdUnknown || eqt.TokenId == index.WordHolder.IdZho)
                {
                    anyUnknown = true; break;
                }
                idSet.Add(eqt.TokenId);
            }
            // Any unknown tokens - no match, we know that immediately
            List <CedictResult> res = new List <CedictResult>();

            if (anyUnknown)
            {
                return(res);
            }
            // Collect IDs of tokenized senses that contain one or more of our query IDs
            Dictionary <int, SenseLookupInfo> senseTokenCounts = new Dictionary <int, SenseLookupInfo>();
            bool firstToken = true;

            // For each token...
            foreach (int tokenId in idSet)
            {
                // Get sense instances where it occurs
                List <SenseInfo> instances = index.SenseIndex[tokenId].GetOrLoadInstances(br);
                foreach (SenseInfo si in instances)
                {
                    SenseLookupInfo sli;
                    // We already have a count for this token ID
                    if (senseTokenCounts.ContainsKey(si.TokenizedSenseId))
                    {
                        ++senseTokenCounts[si.TokenizedSenseId].NumOfQueryTokensInSense;
                    }
                    // Or this is the first time we're seeing it
                    // We only record counts for the first token
                    // We're looking for senses that contain *all* query tokens
                    else if (firstToken)
                    {
                        sli = new SenseLookupInfo
                        {
                            NumOfQueryTokensInSense = 0,
                            TokensInSense           = si.TokensInSense
                        };
                        senseTokenCounts[si.TokenizedSenseId] = sli;
                        ++sli.NumOfQueryTokensInSense;
                    }
                }
                firstToken = false;
            }
            // Keep those sense IDs (positions) that contain all of our query tokens
            // We already eliminated some candidates through "firstToken" trick before, but not all
            List <int> sensePosList = new List <int>();

            foreach (var x in senseTokenCounts)
            {
                if (x.Value.NumOfQueryTokensInSense == idSet.Count)
                {
                    sensePosList.Add(x.Key);
                }
            }
            // Load each tokenized sense to find out:
            // - whether entry is a real match
            // - entry ID
            // - best score for entry (multiple senses may hold query string)
            // - highlights
            Dictionary <int, EntryMatchInfo> entryIdToInfo = new Dictionary <int, EntryMatchInfo>();

            foreach (int senseId in sensePosList)
            {
                doVerifyTarget(txtTokenized, senseId, entryIdToInfo, br);
            }

            // Sort entry IDs by their best score
            // Drop entries with unprintable hanzi in HW now
            List <EntryMatchInfo> entryInfoList = new List <EntryMatchInfo>();

            foreach (var x in entryIdToInfo)
            {
                // Check coverage. Because we don't load full entry, it's possible
                // that some unsupported chars in hybrid text of senses slip through.
                // There's a limit to perfectionism.
                string simp, trad;
                br.Position = x.Value.EntryId;
                CedictEntry.DeserializeHanzi(br, out simp, out trad);
                if (!areHanziCovered(simp, trad))
                {
                    continue;
                }
                // Queue up for sorting.
                entryInfoList.Add(x.Value);
            }
            entryInfoList.Sort((a, b) => b.BestSenseScore.CompareTo(a.BestSenseScore));
            // Load entries, wrap into results
            foreach (EntryMatchInfo emi in entryInfoList)
            {
                CedictResult cr = new CedictResult(emi.EntryId,
                                                   new ReadOnlyCollection <CedictTargetHighlight>(emi.TargetHilites));
                res.Add(cr);
            }
            return(res);
        }