Ejemplo n.º 1
0
        public static string Write(CedictEntry entry)
        {
            StringBuilder sb = new StringBuilder();

            sb.Append(entry.ChTrad);
            sb.Append(' ');
            sb.Append(entry.ChSimpl);
            sb.Append(" [");
            for (int i = 0; i != entry.PinyinCount; ++i)
            {
                if (i != 0)
                {
                    sb.Append(' ');
                }
                sb.Append(entry.GetPinyinAt(i).GetDisplayString(false));
            }
            sb.Append("] /");
            for (int i = 0; i != entry.SenseCount; ++i)
            {
                string sense = entry.GetSenseAt(i).GetPlainText();
                if (sense.Contains('/'))
                {
                    sense = sense.Replace('/', '\\');
                }
                sb.Append(sense);
                sb.Append('/');
            }
            return(sb.ToString());
        }
Ejemplo n.º 2
0
        public static void Write(CedictEntry entry, out string head, out string trg)
        {
            StringBuilder sbHead = new StringBuilder();

            sbHead.Append(entry.ChTrad);
            sbHead.Append(' ');
            sbHead.Append(entry.ChSimpl);
            sbHead.Append(" [");
            for (int i = 0; i != entry.PinyinCount; ++i)
            {
                if (i != 0)
                {
                    sbHead.Append(' ');
                }
                sbHead.Append(entry.GetPinyinAt(i).GetDisplayString(false));
            }
            sbHead.Append("]");
            head = sbHead.ToString();
            StringBuilder sbTrg = new StringBuilder();

            sbTrg.Append('/');
            for (int i = 0; i != entry.SenseCount; ++i)
            {
                string sense = entry.GetSenseAt(i).GetPlainText();
                if (sense.Contains('/'))
                {
                    sense = sense.Replace('/', '\\');
                }
                sbTrg.Append(sense);
                sbTrg.Append('/');
            }
            trg = sbTrg.ToString();
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Retrieves pinyin lookup candidates, verifies actual presence of search expression in headword.
        /// </summary>
        List <ResWithEntry> doLoadVerifyPinyin(BinReader br, IEnumerable <int> poss, List <PinyinSyllable> sylls)
        {
            List <ResWithEntry> resList = new List <ResWithEntry>();

            // Yes, we only open our file on-demand
            // But we do this within each lookup's scope, so lookup stays thread-safe
            // Look at each entry: load, verify, keep or drop
            foreach (int pos in poss)
            {
                // Load up entry from file
                br.Position = pos;
                CedictEntry entry = new CedictEntry(br);

                // Find query syllables in entry
                int syllStart = -1;
                for (int i = 0; i <= entry.PinyinCount - sylls.Count; ++i)
                {
                    int j;
                    for (j = 0; j != sylls.Count; ++j)
                    {
                        PinyinSyllable syllEntry = entry.GetPinyinAt(i + j);
                        PinyinSyllable syllQuery = sylls[j];
                        if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text)
                        {
                            break;
                        }
                        if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone)
                        {
                            break;
                        }
                    }
                    if (j == sylls.Count)
                    {
                        syllStart = i;
                        break;
                    }
                }
                // Entry is a keeper if query syllables found
                if (syllStart == -1)
                {
                    continue;
                }

                // Drop if there's any unprintable Hanzi
                if (!areHanziCovered(entry))
                {
                    continue;
                }

                // Keeper!
                CedictResult res   = new CedictResult(pos, entry.HanziPinyinMap, syllStart, sylls.Count);
                ResWithEntry resWE = new ResWithEntry(res, entry);
                resList.Add(resWE);
            }
            return(resList);
        }
Ejemplo n.º 4
0
        public static string WritePinyin(CedictEntry entry)
        {
            StringBuilder sb = new StringBuilder();

            for (int i = 0; i != entry.PinyinCount; ++i)
            {
                if (i != 0)
                {
                    sb.Append(' ');
                }
                sb.Append(entry.GetPinyinAt(i).GetDisplayString(false));
            }
            return(sb.ToString());
        }
Ejemplo n.º 5
0
            private List <ResWithEntry> retrieveVerifyPinyin(List <int> cands, List <PinyinSyllable> qsylls)
            {
                List <ResWithEntry> resList = new List <ResWithEntry>();

                foreach (int blobId in cands)
                {
                    // Load entry from DB
                    CedictEntry entry = loadFromBlob(blobId);

                    // Find query syllables in entry
                    int syllStart = -1;
                    for (int i = 0; i <= entry.PinyinCount - qsylls.Count; ++i)
                    {
                        int j;
                        for (j = 0; j != qsylls.Count; ++j)
                        {
                            PinyinSyllable syllEntry = entry.GetPinyinAt(i + j);
                            PinyinSyllable syllQuery = qsylls[j];
                            if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text)
                            {
                                break;
                            }
                            if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone)
                            {
                                break;
                            }
                        }
                        if (j == qsylls.Count)
                        {
                            syllStart = i;
                            break;
                        }
                    }
                    // Entry is a keeper if query syllables found
                    if (syllStart == -1)
                    {
                        continue;
                    }

                    // Keeper!
                    CedictResult cres  = new CedictResult(blobId, entry.HanziPinyinMap, syllStart, qsylls.Count);
                    ResWithEntry resWE = new ResWithEntry(cres, entry);
                    resList.Add(resWE);
                }
                return(resList);
            }
Ejemplo n.º 6
0
        private void verifyPinyin(CedictEntry entry, int entryId, List <PinyinSyllable> sylls, List <CedictResult> res)
        {
            // Find query syllables in entry
            int syllStart = -1;

            for (int i = 0; i <= entry.PinyinCount - sylls.Count; ++i)
            {
                int j;
                for (j = 0; j != sylls.Count; ++j)
                {
                    PinyinSyllable syllEntry = entry.GetPinyinAt(i + j);
                    PinyinSyllable syllQuery = sylls[j];
                    if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text)
                    {
                        break;
                    }
                    if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone)
                    {
                        break;
                    }
                }
                if (j == sylls.Count)
                {
                    syllStart = i;
                    break;
                }
            }
            // Entry is a keeper if query syllables found
            if (syllStart == -1)
            {
                return;
            }
            // Keeper!
            CedictResult cr = new CedictResult(entry, entry.HanziPinyinMap, syllStart, sylls.Count);

            res.Add(cr);
        }
Ejemplo n.º 7
0
        /// <summary>
        /// <para>Makes an array of headword syllables from entry's data.</para>
        /// <para>Result has same length as entry's headword.</para>
        /// </summary>
        private void addHeadIfNew(List <HeadwordSyll[]> cdHeads, CedictEntry entry, bool unihanFilter)
        {
            // The new headword: with pinyin lower-cased
            HeadwordSyll[] res = new HeadwordSyll[entry.ChSimpl.Length];
            for (int i = 0; i != res.Length; ++i)
            {
                string pyLower = entry.GetPinyinAt(i).GetDisplayString(true);
                // Do not lower-case single latin letter
                if (pyLower.Length == 1 && pyLower[0] >= 'A' && pyLower[0] <= 'Z')
                { /* NOP */
                }
                else
                {
                    pyLower = pyLower.ToLowerInvariant();
                }
                res[i] = new HeadwordSyll(entry.ChSimpl[i], entry.ChTrad[i], pyLower);
            }
            // Is it already on list?
            // Do traditional chars make sense?
            UniHanziInfo[] uhis = null;
            if (unihanFilter)
            {
                char[] simp = new char[entry.ChSimpl.Length];
                for (int i = 0; i != simp.Length; ++i)
                {
                    simp[i] = entry.ChSimpl[i];
                }
                uhis = GetUnihanInfo(simp);
            }
            bool toSkip = false;

            foreach (HeadwordSyll[] x in cdHeads)
            {
                // Only add if new
                bool different = false;
                for (int i = 0; i != res.Length; ++i)
                {
                    if (x[i].Simp != res[i].Simp)
                    {
                        different = true; break;
                    }
                    if (x[i].Trad != res[i].Trad)
                    {
                        different = true; break;
                    }
                    if (x[i].Pinyin != res[i].Pinyin)
                    {
                        different = true; break;
                    }
                }
                if (!different)
                {
                    toSkip = true;
                    break;
                }
            }
            // Drop those where traditional character is odd
            if (unihanFilter)
            {
                for (int i = 0; i != res.Length; ++i)
                {
                    if (uhis[i] == null)
                    {
                        continue;
                    }
                    if (Array.IndexOf(uhis[i].TradVariants, res[i].Trad) < 0)
                    {
                        toSkip = true; break;
                    }
                }
            }
            // If traditionals chars are OK and HW is new, add
            if (!toSkip)
            {
                cdHeads.Add(res);
            }
        }
Ejemplo n.º 8
0
        public IActionResult GetEditEntryData([FromQuery] string entryId, [FromQuery] string lang)
        {
            if (entryId == null || lang == null)
            {
                return(StatusCode(400, "Missing parameter(s)."));
            }

            // The data we'll return.
            EditEntryData res = new EditEntryData();

            // Is this an authenticated user?
            int    userId;
            string userName;

            auth.CheckSession(HttpContext.Request.Headers, out userId, out userName);
            // Can she approve entries?
            if (userId != -1)
            {
                res.CanApprove = auth.CanApprove(userId);
            }

            // Retrieve entry
            int         idVal = EntryId.StringToId(entryId);
            string      hw, trg;
            EntryStatus status;

            SqlDict.GetEntryById(idVal, out hw, out trg, out status);
            CedictParser parser = new CedictParser();
            CedictEntry  entry  = parser.ParseEntry(hw + " " + trg, 0, null);

            res.Status     = status.ToString().ToLowerInvariant();
            res.HeadSimp   = entry.ChSimpl;
            res.HeadTrad   = entry.ChTrad;
            res.HeadPinyin = "";
            for (int i = 0; i != entry.PinyinCount; ++i)
            {
                if (res.HeadPinyin.Length > 0)
                {
                    res.HeadPinyin += " ";
                }
                var pys = entry.GetPinyinAt(i);
                res.HeadPinyin += pys.GetDisplayString(false);
            }
            res.TrgTxt = trg.Trim('/').Replace('/', '\n').Replace('\\', '/');

            // Entry HTML
            entry.Status = status;
            EntryRenderer er = new EntryRenderer(lang, entry, true, "mainEntry");

            er.OneLineHanziLimit = 12;
            StringBuilder sb = new StringBuilder();

            er.Render(sb, null);
            res.EntryHtml = sb.ToString();

            // Entry history
            List <ChangeItem> changes = SqlDict.GetEntryChanges(idVal);

            sb.Clear();
            HistoryRenderer.RenderEntryChanges(sb, hw, trg, status, changes, lang);
            res.HistoryHtml = sb.ToString();

            return(new ObjectResult(res));
        }
Ejemplo n.º 9
0
        private bool validateHeadword(string lang, string simp, string trad, string pinyin,
                                      List <HeadwordProblem> errorsSimp, List <HeadwordProblem> errorsTrad, List <HeadwordProblem> errorsPinyin)
        {
            var    tprov = TextProvider.Instance;
            string msg;

            // Check each simplified: is it really simplified?
            UniHanziInfo[] uhiSimp = langRepo.GetUnihanInfo(simp);
            for (int i = 0; i != uhiSimp.Length; ++i)
            {
                var uhi = uhiSimp[i];
                if (!uhi.CanBeSimp)
                {
                    msg = tprov.GetString(lang, "editEntry.hwProblemNotSimplified");
                    msg = string.Format(msg, simp[i]);
                    errorsSimp.Add(new HeadwordProblem(false, msg));
                }
            }
            // Check each traditional: is it really traditional?
            UniHanziInfo[] uhiTrad = langRepo.GetUnihanInfo(trad);
            for (int i = 0; i != uhiTrad.Length; ++i)
            {
                var uhi = uhiTrad[i];
                // Traditional chars are listed as their own traditional variant
                if (Array.IndexOf(uhi.TradVariants, trad[i]) < 0)
                {
                    msg = tprov.GetString(lang, "editEntry.hwProblemNotTraditional");
                    msg = string.Format(msg, trad[i]);
                    errorsTrad.Add(new HeadwordProblem(false, msg));
                }
            }
            // Check each traditional against its simplified friend
            if (trad.Length != simp.Length)
            {
                errorsTrad.Add(new HeadwordProblem(true, tprov.GetString(lang, "editEntry.hwProblemSimpTradCounts")));
            }
            else
            {
                for (int i = 0; i != uhiSimp.Length; ++i)
                {
                    var uhi = uhiSimp[i];
                    if (Array.IndexOf(uhi.TradVariants, trad[i]) < 0)
                    {
                        msg = tprov.GetString(lang, "editEntry.hwProblemNotTradForSimp");
                        msg = string.Format(msg, simp[i], trad[i]);
                        errorsTrad.Add(new HeadwordProblem(false, msg));
                    }
                }
            }
            // Normalize pinyin (multiple spaces, leading/trailing spaces)
            string pyNorm = pinyin;

            while (true)
            {
                string x = pyNorm.Replace("  ", " ");
                if (x == pyNorm)
                {
                    break;
                }
                pyNorm = x;
            }
            pyNorm = pyNorm.Trim();
            if (pyNorm != pinyin)
            {
                errorsPinyin.Add(new HeadwordProblem(true, tprov.GetString(lang, "editEntry.hwProblemExtraSpacesPinyin")));
            }
            // Try to match up normalized pinyin with simplified Hanzi
            CedictParser parser = new CedictParser();
            CedictEntry  ee     = null;

            try { ee = parser.ParseEntry(trad + " " + simp + " [" + pyNorm + "] /x/", 0, null); }
            catch { }
            if (ee == null)
            {
                errorsPinyin.Add(new HeadwordProblem(true, tprov.GetString(lang, "editEntry.hwProblemInvalidPinyin")));
            }
            else
            {
                if (simp.Length == ee.ChSimpl.Length)
                {
                    for (int i = 0; i != uhiSimp.Length; ++i)
                    {
                        var uhi = uhiSimp[i];
                        var py  = ee.GetPinyinAt(i);
                        var cnt = uhi.Pinyin.Count(x => x.GetDisplayString(false) == py.GetDisplayString(false));
                        if (cnt == 0)
                        {
                            msg = tprov.GetString(lang, "editEntry.hwProblemWrongPinyin");
                            msg = string.Format(msg, py.GetDisplayString(false), simp[i]);
                            errorsPinyin.Add(new HeadwordProblem(false, msg));
                        }
                    }
                }
            }
            return(ee != null);
        }