Exemple #1
0
        private bool canAddWord(string query)
        {
            int    userId;
            string userName;

            auth.CheckSession(HttpContext.Request.Headers, out userId, out userName);
            if (userId < 0)
            {
                return(false);
            }
            if (query.Length > 6)
            {
                return(false);
            }
            UniHanziInfo[] uhis = langRepo.GetUnihanInfo(query);
            foreach (var uhi in uhis)
            {
                if (uhi == null)
                {
                    return(false);
                }
                if (!uhi.CanBeSimp)
                {
                    return(false);
                }
            }
            return(true);
        }
        private bool validateHeadword(string lang, string simp, string trad, string pinyin,
                                      List <HeadwordProblem> errorsSimp, List <HeadwordProblem> errorsTrad, List <HeadwordProblem> errorsPinyin)
        {
            var    tprov = TextProvider.Instance;
            string msg;

            // Check each simplified: is it really simplified?
            UniHanziInfo[] uhiSimp = langRepo.GetUnihanInfo(simp);
            for (int i = 0; i != uhiSimp.Length; ++i)
            {
                var uhi = uhiSimp[i];
                if (!uhi.CanBeSimp)
                {
                    msg = tprov.GetString(lang, "editEntry.hwProblemNotSimplified");
                    msg = string.Format(msg, simp[i]);
                    errorsSimp.Add(new HeadwordProblem(false, msg));
                }
            }
            // Check each traditional: is it really traditional?
            UniHanziInfo[] uhiTrad = langRepo.GetUnihanInfo(trad);
            for (int i = 0; i != uhiTrad.Length; ++i)
            {
                var uhi = uhiTrad[i];
                // Traditional chars are listed as their own traditional variant
                if (Array.IndexOf(uhi.TradVariants, trad[i]) < 0)
                {
                    msg = tprov.GetString(lang, "editEntry.hwProblemNotTraditional");
                    msg = string.Format(msg, trad[i]);
                    errorsTrad.Add(new HeadwordProblem(false, msg));
                }
            }
            // Check each traditional against its simplified friend
            if (trad.Length != simp.Length)
            {
                errorsTrad.Add(new HeadwordProblem(true, tprov.GetString(lang, "editEntry.hwProblemSimpTradCounts")));
            }
            else
            {
                for (int i = 0; i != uhiSimp.Length; ++i)
                {
                    var uhi = uhiSimp[i];
                    if (Array.IndexOf(uhi.TradVariants, trad[i]) < 0)
                    {
                        msg = tprov.GetString(lang, "editEntry.hwProblemNotTradForSimp");
                        msg = string.Format(msg, simp[i], trad[i]);
                        errorsTrad.Add(new HeadwordProblem(false, msg));
                    }
                }
            }
            // Normalize pinyin (multiple spaces, leading/trailing spaces)
            string pyNorm = pinyin;

            while (true)
            {
                string x = pyNorm.Replace("  ", " ");
                if (x == pyNorm)
                {
                    break;
                }
                pyNorm = x;
            }
            pyNorm = pyNorm.Trim();
            if (pyNorm != pinyin)
            {
                errorsPinyin.Add(new HeadwordProblem(true, tprov.GetString(lang, "editEntry.hwProblemExtraSpacesPinyin")));
            }
            // Try to match up normalized pinyin with simplified Hanzi
            CedictParser parser = new CedictParser();
            CedictEntry  ee     = null;

            try { ee = parser.ParseEntry(trad + " " + simp + " [" + pyNorm + "] /x/", 0, null); }
            catch { }
            if (ee == null)
            {
                errorsPinyin.Add(new HeadwordProblem(true, tprov.GetString(lang, "editEntry.hwProblemInvalidPinyin")));
            }
            else
            {
                if (simp.Length == ee.ChSimpl.Length)
                {
                    for (int i = 0; i != uhiSimp.Length; ++i)
                    {
                        var uhi = uhiSimp[i];
                        var py  = ee.GetPinyinAt(i);
                        var cnt = uhi.Pinyin.Count(x => x.GetDisplayString(false) == py.GetDisplayString(false));
                        if (cnt == 0)
                        {
                            msg = tprov.GetString(lang, "editEntry.hwProblemWrongPinyin");
                            msg = string.Format(msg, py.GetDisplayString(false), simp[i]);
                            errorsPinyin.Add(new HeadwordProblem(false, msg));
                        }
                    }
                }
            }
            return(ee != null);
        }
        /// <summary>
        /// Retrieves information about (simplified) hanzi.
        /// </summary>
        public IActionResult ProcessSimp([FromQuery] string simp)
        {
            if (simp == null)
            {
                simp = "";
            }
            NewEntryProcessSimpResult res = new NewEntryProcessSimpResult();

            // Prepare result: as long as input; empty array for each position
            foreach (char c in simp)
            {
                res.Trad.Add(new List <string>());
                res.Pinyin.Add(new List <string>());
            }

            // Do we have CEDICT headwords for this simplified HW?
            // If yes, put first headword's traditional and pinyin into first layer of result
            // Fill rest of the alternatives with input from additional results
            HeadwordSyll[][] chHeads = langRepo.GetPossibleHeadwords(simp, false);
            for (int i = 0; i != chHeads.Length; ++i)
            {
                HeadwordSyll[] sylls = chHeads[i];
                for (int j = 0; j != simp.Length; ++j)
                {
                    addIfNew(res.Trad[j], sylls[j].Trad.ToString());
                    addIfNew(res.Pinyin[j], sylls[j].Pinyin);
                }
            }
            // Unihan lookup
            UniHanziInfo[] uhis = langRepo.GetUnihanInfo(simp);
            // We had no headword: build from Unihan data, but with a twist
            // Make sure first traditional matches most common pinyin
            if (chHeads.Length == 0)
            {
                for (int i = 0; i != uhis.Length; ++i)
                {
                    UniHanziInfo uhi = uhis[i];
                    if (uhi == null)
                    {
                        continue;
                    }
                    // Add pinyin readings first
                    foreach (PinyinSyllable syll in uhi.Pinyin)
                    {
                        addIfNew(res.Pinyin[i], syll.GetDisplayString(true));
                    }
                    // Look up traditional chars for this position
                    UniHanziInfo[] tradUhis = langRepo.GetUnihanInfo(uhi.TradVariants);
                    // Find "best" traditional character: the first one whose pinyin readings include our first pinyin
                    char   firstTrad      = (char)0;
                    string favoritePinyin = uhi.Pinyin[0].GetDisplayString(true);
                    if (tradUhis != null)
                    {
                        for (int tx = 0; tx != uhi.TradVariants.Length; ++tx)
                        {
                            UniHanziInfo tradUhi = tradUhis[tx];
                            if (tradUhi == null)
                            {
                                continue;
                            }
                            bool hasFavoritePinyin = false;
                            foreach (PinyinSyllable py in tradUhi.Pinyin)
                            {
                                if (py.GetDisplayString(true) == favoritePinyin)
                                {
                                    hasFavoritePinyin = true;
                                    break;
                                }
                            }
                            if (hasFavoritePinyin)
                            {
                                firstTrad = uhi.TradVariants[tx];
                                break;
                            }
                        }
                    }
                    // Add first traditional, if found
                    if (firstTrad != (char)0)
                    {
                        addIfNew(res.Trad[i], firstTrad.ToString());
                    }
                    // Add all the remaining traditional variants
                    foreach (char c in uhi.TradVariants)
                    {
                        addIfNew(res.Trad[i], c.ToString());
                    }
                }
            }
            // We had a headword: fill remaining slots with traditional and pinyin items from Unihan
            else
            {
                res.IsKnownHeadword = true;
                for (int i = 0; i != uhis.Length; ++i)
                {
                    UniHanziInfo uhi = uhis[i];
                    if (uhi == null)
                    {
                        continue;
                    }
                    foreach (char c in uhi.TradVariants)
                    {
                        addIfNew(res.Trad[i], c.ToString());
                    }
                    foreach (PinyinSyllable syll in uhi.Pinyin)
                    {
                        addIfNew(res.Pinyin[i], syll.GetDisplayString(true));
                    }
                }
            }
            // Filter pinyin: only keep those that work with traditional on the first spot
            // Unless intersection is empty - can also happen in this weird world
            for (int i = 0; i != simp.Length; ++i)
            {
                List <string> pyList = res.Pinyin[i];
                if (pyList.Count < 2)
                {
                    continue;
                }
                List <string> tradList = res.Trad[i];
                if (tradList.Count == 0)
                {
                    continue;
                }
                List <string>  toRem    = new List <string>();
                UniHanziInfo[] tradUhis = langRepo.GetUnihanInfo(new char[] { tradList[0][0] });
                if (tradUhis == null || tradUhis[0] == null)
                {
                    continue;
                }
                List <string> pinyinsOfTrad = new List <string>();
                foreach (var x in tradUhis[0].Pinyin)
                {
                    pinyinsOfTrad.Add(x.GetDisplayString(true));
                }
                // If we had a match, start from second: don't want to remove what just came from CEDICT
                for (int j = res.IsKnownHeadword ? 1 : 0; j < pyList.Count; ++j)
                {
                    string py = pyList[j];
                    if (!pinyinsOfTrad.Contains(py))
                    {
                        toRem.Add(py);
                    }
                }
                if (toRem.Count == pyList.Count)
                {
                    continue;
                }
                foreach (string py in toRem)
                {
                    pyList.Remove(py);
                }
            }

            // Check if there are positions where we have no tradition or pinyin
            // For the purposes of this lookup, we just inject character from input there
            for (int i = 0; i != simp.Length; ++i)
            {
                char c = simp[i];
                if (res.Trad[i].Count == 0)
                {
                    res.Trad[i].Add(c.ToString());
                }
                if (res.Pinyin[i].Count == 0)
                {
                    res.Pinyin[i].Add(c.ToString());
                }
            }

            // Tell our caller
            return(new ObjectResult(res));
        }