Example #1
0
 /// <summary>
 /// Render HW's Hanzi in annotation mode
 /// </summary>
 private void renderHanzi(string query, CedictEntry entry, int annStart, int annLength, HtmlTextWriter writer)
 {
     for (int i = 0; i != query.Length; ++i)
     {
         char           c  = query[i];
         PinyinSyllable py = null;
         if (i >= annStart && i < annStart + annLength)
         {
             int pyIx = entry.HanziPinyinMap[i - annStart];
             if (pyIx != -1)
             {
                 py = entry.Pinyin[pyIx];
             }
         }
         // Class to put on hanzi
         string cls = "";
         // We mark up tones if needed
         if (tones != UiTones.None && py != null)
         {
             if (py.Tone == 1)
             {
                 cls = "tone1";
             }
             else if (py.Tone == 2)
             {
                 cls = "tone2";
             }
             else if (py.Tone == 3)
             {
                 cls = "tone3";
             }
             else if (py.Tone == 4)
             {
                 cls = "tone4";
             }
             // -1 for unknown, and 0 for neutral: we don't mark up anything
         }
         // Whatever's outside annotation is faint
         if (i < annStart || i >= annStart + annLength)
         {
             cls += " faint";
         }
         // Mark up character for stroke order animation
         if (hanim)
         {
             cls += " hanim";
         }
         // Render with enclosing span if we have a relevant class
         if (!string.IsNullOrEmpty(cls))
         {
             writer.AddAttribute("class", cls);
             writer.RenderBeginTag(HtmlTextWriterTag.Span);
         }
         writer.WriteEncodedText(c.ToString());
         if (!string.IsNullOrEmpty(cls))
         {
             writer.RenderEndTag();
         }
     }
 }
Example #2
0
 /// <summary>
 /// See <see cref="ZD.Common.IHeadwordInfo.GetUnihanInfo"/>.
 /// </summary>
 public UniHanziInfo[] GetUnihanInfo(char[] chars)
 {
     UniHanziInfo[] res = new UniHanziInfo[chars.Length];
     using (BinReader br = new BinReader(dataFileName))
     {
         for (int i = 0; i != chars.Length; ++i)
         {
             char c = chars[i];
             // Character is an upper-case letter or a digit: itself
             if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z'))
             {
                 PinyinSyllable syll = new PinyinSyllable(c.ToString(), -1);
                 UniHanziInfo   uhi  = new UniHanziInfo(true, new char[] { c }, new PinyinSyllable[] { syll });
                 res[i] = uhi;
             }
             // Get genuine Hanzi info, if present
             else
             {
                 int pos = chrPoss[(int)c];
                 if (pos == 0)
                 {
                     continue;
                 }
                 br.Position = pos;
                 res[i]      = new UniHanziInfo(br);
             }
         }
     }
     return(res);
 }
Example #3
0
        /// <summary>
        /// Ctor: serialize from binary.
        /// </summary>
        public UniHanziInfo(BinReader br)
        {
            byte b = br.ReadByte();

            if (b == 0)
            {
                CanBeSimp = false;
            }
            else
            {
                CanBeSimp = true;
            }
            b            = br.ReadByte();
            TradVariants = new char[b];
            for (byte i = 0; i != b; ++i)
            {
                TradVariants[i] = br.ReadChar();
            }
            b      = br.ReadByte();
            Pinyin = new PinyinSyllable[b];
            for (byte i = 0; i != b; ++i)
            {
                Pinyin[i] = new PinyinSyllable(br);
            }
        }
Example #4
0
            /// <summary>
            /// Add Pinyin to DB's index/instance tables.
            /// </summary>
            private void indexPinyin(CedictEntry entry, int entryId)
            {
                // Count only one occurrence
                List <PinyinSyllable> uniqueList = new List <PinyinSyllable>();

                foreach (PinyinSyllable ps in entry.Pinyin)
                {
                    // Normalize to lower case
                    PinyinSyllable normps = new PinyinSyllable(ps.Text.ToLowerInvariant(), ps.Tone);
                    // Add one instance
                    bool onList = false;
                    foreach (PinyinSyllable x in uniqueList)
                    {
                        if (x.Text == normps.Text && x.Tone == normps.Tone)
                        {
                            onList = true; break;
                        }
                    }
                    if (!onList)
                    {
                        uniqueList.Add(normps);
                    }
                }
                // Index each item we have on unique list
                cmdInsPinyinInstance.Parameters["@syll_count"].Value = uniqueList.Count;
                cmdInsPinyinInstance.Parameters["@blob_id"].Value    = entryId;
                foreach (PinyinSyllable ps in uniqueList)
                {
                    int hash = CedictEntry.Hash(ps.Text);
                    cmdInsPinyinInstance.Parameters["@pinyin_hash"].Value = hash;
                    cmdInsPinyinInstance.Parameters["@tone"].Value        = ps.Tone;
                    cmdInsPinyinInstance.ExecuteNonQuery();
                }
            }
Example #5
0
        /// <summary>
        /// Render HW's Hanzi in normal lookup result
        /// </summary>
        private void renderHanzi(CedictEntry entry, bool simp, bool faintIdentTrad, HtmlTextWriter writer)
        {
            string hzStr = simp ? entry.ChSimpl : entry.ChTrad;

            for (int i = 0; i != hzStr.Length; ++i)
            {
                char           c    = hzStr[i];
                int            pyIx = entry.HanziPinyinMap[i];
                PinyinSyllable py   = null;
                if (pyIx != -1)
                {
                    py = entry.Pinyin[pyIx];
                }
                // Class to put on hanzi
                string cls = "";
                // We mark up tones if needed
                if (tones != UiTones.None && py != null)
                {
                    if (py.Tone == 1)
                    {
                        cls = "tone1";
                    }
                    else if (py.Tone == 2)
                    {
                        cls = "tone2";
                    }
                    else if (py.Tone == 3)
                    {
                        cls = "tone3";
                    }
                    else if (py.Tone == 4)
                    {
                        cls = "tone4";
                    }
                    // -1 for unknown, and 0 for neutral: we don't mark up anything
                }
                // If we're rendering both scripts, then show faint traditional chars where same as simp
                if (faintIdentTrad && c == entry.ChSimpl[i])
                {
                    cls += " faint";
                }
                // Mark up character for stroke order animation
                if (hanim)
                {
                    cls += " hanim";
                }
                // Render with enclosing span if we have a relevant class
                if (!string.IsNullOrEmpty(cls))
                {
                    writer.AddAttribute("class", cls);
                    writer.RenderBeginTag(HtmlTextWriterTag.Span);
                }
                writer.WriteEncodedText(c.ToString());
                if (!string.IsNullOrEmpty(cls))
                {
                    writer.RenderEndTag();
                }
            }
        }
Example #6
0
 /// <summary>
 /// Render HW's Hanzi in annotation mode
 /// </summary>
 private void renderHanzi(string query, CedictEntry entry, int annStart, int annLength, StringBuilder sb)
 {
     for (int i = 0; i != query.Length; ++i)
     {
         char           c  = query[i];
         PinyinSyllable py = null;
         if (i >= annStart && i < annStart + annLength)
         {
             int pyIx = entry.HanziPinyinMap[i - annStart];
             if (pyIx != -1)
             {
                 py = entry.Pinyin[pyIx];
             }
         }
         // Class to put on hanzi
         string cls = "";
         // We mark up tones if needed
         if (tones != UiTones.None && py != null)
         {
             if (py.Tone == 1)
             {
                 cls = "tone1";
             }
             else if (py.Tone == 2)
             {
                 cls = "tone2";
             }
             else if (py.Tone == 3)
             {
                 cls = "tone3";
             }
             else if (py.Tone == 4)
             {
                 cls = "tone4";
             }
             // -1 for unknown, and 0 for neutral: we don't mark up anything
         }
         // Whatever's outside annotation is faint
         if (i < annStart || i >= annStart + annLength)
         {
             cls += " faint";
         }
         // Mark up character for stroke order animation
         if (hanim)
         {
             cls += " hanim";
         }
         // Render with enclosing span if we have a relevant class
         if (!string.IsNullOrEmpty(cls))
         {
             sb.Append("<span class='" + cls + "'>");
         }
         sb.Append(HtmlEncoder.Default.Encode(c.ToString()));
         if (!string.IsNullOrEmpty(cls))
         {
             sb.Append("</span>");
         }
     }
 }
Example #7
0
        /// <summary>
        /// Render HW's Hanzi in normal lookup result
        /// </summary>
        private void renderHanzi(CedictEntry entry, bool simp, bool faintIdentTrad, StringBuilder sb)
        {
            string hzStr = simp ? entry.ChSimpl : entry.ChTrad;

            for (int i = 0; i != hzStr.Length; ++i)
            {
                char           c    = hzStr[i];
                int            pyIx = entry.HanziPinyinMap[i];
                PinyinSyllable py   = null;
                if (pyIx != -1)
                {
                    py = entry.Pinyin[pyIx];
                }
                // Class to put on hanzi
                string cls = "";
                // We mark up tones if needed
                if (tones != UiTones.None && py != null)
                {
                    if (py.Tone == 1)
                    {
                        cls = "tone1";
                    }
                    else if (py.Tone == 2)
                    {
                        cls = "tone2";
                    }
                    else if (py.Tone == 3)
                    {
                        cls = "tone3";
                    }
                    else if (py.Tone == 4)
                    {
                        cls = "tone4";
                    }
                    // -1 for unknown, and 0 for neutral: we don't mark up anything
                }
                // If we're rendering both scripts, then show faint traditional chars where same as simp
                if (faintIdentTrad && c == entry.ChSimpl[i])
                {
                    cls += " faint";
                }
                // Mark up character for stroke order animation
                if (hanim)
                {
                    cls += " hanim";
                }
                // Render with enclosing span if we have a relevant class
                if (!string.IsNullOrEmpty(cls))
                {
                    sb.Append("<span class='" + cls + "'>");
                }
                sb.Append(HtmlEncoder.Default.Encode(c.ToString()));
                if (!string.IsNullOrEmpty(cls))
                {
                    sb.Append("</span>");
                }
            }
        }
Example #8
0
        /// <summary>
        /// Retrieves pinyin lookup candidates, verifies actual presence of search expression in headword.
        /// </summary>
        List <ResWithEntry> doLoadVerifyPinyin(BinReader br, IEnumerable <int> poss, List <PinyinSyllable> sylls)
        {
            List <ResWithEntry> resList = new List <ResWithEntry>();

            // Yes, we only open our file on-demand
            // But we do this within each lookup's scope, so lookup stays thread-safe
            // Look at each entry: load, verify, keep or drop
            foreach (int pos in poss)
            {
                // Load up entry from file
                br.Position = pos;
                CedictEntry entry = new CedictEntry(br);

                // Find query syllables in entry
                int syllStart = -1;
                for (int i = 0; i <= entry.PinyinCount - sylls.Count; ++i)
                {
                    int j;
                    for (j = 0; j != sylls.Count; ++j)
                    {
                        PinyinSyllable syllEntry = entry.GetPinyinAt(i + j);
                        PinyinSyllable syllQuery = sylls[j];
                        if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text)
                        {
                            break;
                        }
                        if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone)
                        {
                            break;
                        }
                    }
                    if (j == sylls.Count)
                    {
                        syllStart = i;
                        break;
                    }
                }
                // Entry is a keeper if query syllables found
                if (syllStart == -1)
                {
                    continue;
                }

                // Drop if there's any unprintable Hanzi
                if (!areHanziCovered(entry))
                {
                    continue;
                }

                // Keeper!
                CedictResult res   = new CedictResult(pos, entry.HanziPinyinMap, syllStart, sylls.Count);
                ResWithEntry resWE = new ResWithEntry(res, entry);
                resList.Add(resWE);
            }
            return(resList);
        }
Example #9
0
            /// <summary>
            /// Returns syllable's ID, or 0 if not a standard syllable.
            /// </summary>
            public int GetId(PinyinSyllable syll)
            {
                string lo = syll.Text.ToLowerInvariant();

                if (!pyToId.ContainsKey(lo))
                {
                    return(0);
                }
                return(pyToId[lo]);
            }
Example #10
0
            private List <ResWithEntry> retrieveVerifyPinyin(List <int> cands, List <PinyinSyllable> qsylls)
            {
                List <ResWithEntry> resList = new List <ResWithEntry>();

                foreach (int blobId in cands)
                {
                    // Load entry from DB
                    CedictEntry entry = loadFromBlob(blobId);

                    // Find query syllables in entry
                    int syllStart = -1;
                    for (int i = 0; i <= entry.PinyinCount - qsylls.Count; ++i)
                    {
                        int j;
                        for (j = 0; j != qsylls.Count; ++j)
                        {
                            PinyinSyllable syllEntry = entry.GetPinyinAt(i + j);
                            PinyinSyllable syllQuery = qsylls[j];
                            if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text)
                            {
                                break;
                            }
                            if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone)
                            {
                                break;
                            }
                        }
                        if (j == qsylls.Count)
                        {
                            syllStart = i;
                            break;
                        }
                    }
                    // Entry is a keeper if query syllables found
                    if (syllStart == -1)
                    {
                        continue;
                    }

                    // Keeper!
                    CedictResult cres  = new CedictResult(blobId, entry.HanziPinyinMap, syllStart, qsylls.Count);
                    ResWithEntry resWE = new ResWithEntry(cres, entry);
                    resList.Add(resWE);
                }
                return(resList);
            }
        /// <summary>
        /// Calculates pinyin layout.
        /// </summary>
        private void doAnalyzePinyin(Graphics g)
        {
            // If already measured, nothing to do
            if (pinyinInfo != null)
            {
                return;
            }

            // This is how we measure
            StringFormat sf = StringFormat.GenericTypographic;

            g.TextRenderingHint = System.Drawing.Text.TextRenderingHint.AntiAlias;

            pinyinInfo = new PinyinInfo();
            // Measure each pinyin syllable
            bool diacritics = true;
            var  pcoll      = entry.GetPinyinForDisplay(diacritics,
                                                        res.PinyinHiliteStart, res.PinyinHiliteLength,
                                                        out pinyinInfo.HiliteStart, out pinyinInfo.HiliteLength);
            float cx   = headInfo.HeadwordRight + (float)padMid;
            float ctop = padTop;

            for (int i = 0; i != pcoll.Count; ++i)
            {
                PinyinSyllable ps = pcoll[i];
                // New pinyin block
                PinyinBlock pb = new PinyinBlock();
                // Text: syllable's display text
                string text = ps.GetDisplayString(true);
                pb.TextPos = textPool.PoolString(text);
                // If text is punctuation, glue it to previous syllable
                if (text.Length == 1 && CedictFormatter.SticksLeft(text) && i > 0)
                {
                    cx -= pinyinSpaceWidth;
                }
                // Block's size and relative location
                SizeF sz = g.MeasureString(text, getFont(fntPinyinHead), 65535, sf);
                pb.Rect = new RectangleF(cx, ctop, sz.Width, sz.Height);
                cx     += sz.Width + pinyinSpaceWidth;
                // Add block
                pinyinInfo.Blocks.Add(pb);
            }
            // Height of whole pinyin area
            pinyinInfo.PinyinHeight = pinyinInfo.Blocks[0].Rect.Height;
        }
Example #12
0
        public static CedictEntry BuildEntry(string simp, string trad, string pinyin, string trg)
        {
            // Prepare pinyin as list of proper syllables
            List <PinyinSyllable> pyList = new List <PinyinSyllable>();

            string[] pyRawArr = pinyin.Split(' ');
            foreach (string pyRaw in pyRawArr)
            {
                PinyinSyllable ps = PinyinSyllable.FromDisplayString(pyRaw);
                if (ps == null)
                {
                    ps = new PinyinSyllable(pyRaw, -1);
                }
                pyList.Add(ps);
            }

            // Build TRG entry in "canonical" form; parse; render
            trg = trg.Replace("\r\n", "\n");
            string[] senses = trg.Split('\n');
            string   can    = trad + " " + simp + " [";

            for (int i = 0; i != pyList.Count; ++i)
            {
                if (i != 0)
                {
                    can += " ";
                }
                can += pyList[i].GetDisplayString(false);
            }
            can += "] /";
            foreach (string str in senses)
            {
                can += str.Replace('/', '\\') + "/";
            }
            CedictParser parser = new CedictParser();

            return(parser.ParseEntry(can, 0, null));
        }
Example #13
0
        private void verifyPinyin(CedictEntry entry, int entryId, List <PinyinSyllable> sylls, List <CedictResult> res)
        {
            // Find query syllables in entry
            int syllStart = -1;

            for (int i = 0; i <= entry.PinyinCount - sylls.Count; ++i)
            {
                int j;
                for (j = 0; j != sylls.Count; ++j)
                {
                    PinyinSyllable syllEntry = entry.GetPinyinAt(i + j);
                    PinyinSyllable syllQuery = sylls[j];
                    if (syllEntry.Text.ToLowerInvariant() != syllQuery.Text)
                    {
                        break;
                    }
                    if (syllQuery.Tone != -1 && syllEntry.Tone != syllQuery.Tone)
                    {
                        break;
                    }
                }
                if (j == sylls.Count)
                {
                    syllStart = i;
                    break;
                }
            }
            // Entry is a keeper if query syllables found
            if (syllStart == -1)
            {
                return;
            }
            // Keeper!
            CedictResult cr = new CedictResult(entry, entry.HanziPinyinMap, syllStart, sylls.Count);

            res.Add(cr);
        }
Example #14
0
        public IActionResult VerifyHead([FromQuery] string lang, [FromQuery] string simp, [FromQuery] string trad, [FromQuery] string pinyin)
        {
            if (simp == null)
            {
                return(StatusCode(400, "Missing 'simp' parameter."));
            }
            if (trad == null)
            {
                return(StatusCode(400, "Missing 'trad' parameter."));
            }
            if (pinyin == null)
            {
                return(StatusCode(400, "Missing 'pinyin' parameter."));
            }

            NewEntryVerifyHeadResult res = new NewEntryVerifyHeadResult();
            StringBuilder            sb  = new StringBuilder();

            // Prepare pinyin as list of proper syllables
            List <PinyinSyllable> pyList = new List <PinyinSyllable>();

            string[] pyRawArr = pinyin.Split(' ');
            foreach (string pyRaw in pyRawArr)
            {
                PinyinSyllable syll = PinyinSyllable.FromDisplayString(pyRaw);
                if (syll == null)
                {
                    syll = new PinyinSyllable(pyRaw, -1);
                }
                pyList.Add(syll);
            }
            string pyInOne = "";

            foreach (var syll in pyList)
            {
                if (pyInOne != "")
                {
                    pyInOne += " ";
                }
                pyInOne += syll.GetDisplayString(false);
            }

            // Is this a dupe?
            string      head          = trad + " " + simp + " [" + pyInOne + "]";
            CedictEntry existingEntry = SqlDict.GetEntryByHead(head);

            if (existingEntry != null)
            {
                res.Duplicate       = true;
                res.ExistingEntryId = EntryId.IdToString(existingEntry.StableId);
                EntryRenderer er = new EntryRenderer(lang, existingEntry, true);
                er.Render(sb, null);
                res.ExistingEntry = sb.ToString();
                return(new ObjectResult(res));
            }

            // OK, we're good.
            res.Duplicate = false;

            // Return all entries, CEDICT and HanDeDict, rendered as HTML
            CedictEntry[] ced, hdd;
            langRepo.GetEntries(simp, out ced, out hdd);
            sb.Append("<div id='newEntryRefCED'>");
            foreach (CedictEntry entry in ced)
            {
                EntryRenderer er = new EntryRenderer(lang, entry, trad, pyList);
                er.Render(sb, null);
            }
            sb.Append("</div>");
            sb.Append("<div id='newEntryRefHDD'>");
            foreach (CedictEntry entry in hdd)
            {
                EntryRenderer er = new EntryRenderer(lang, entry, trad, pyList);
                er.Render(sb, null);
            }
            sb.Append("</div>");
            res.RefEntries = sb.ToString();

            // Tell our caller
            return(new ObjectResult(res));
        }
Example #15
0
        private void renderSpecialSense(StringBuilder sb, string equiv)
        {
            sb.Append("<span class='sense-meta'>");
            sb.Append(HtmlEncoder.Default.Encode(TextProvider.Instance.GetString(lang, "displayEntry.classifier")));
            sb.Append(' ');
            sb.Append("</span>");
            equiv = equiv.Substring(equiv.IndexOf(':') + 1);
            string[] items = equiv.Split(',');
            bool     first = true;

            foreach (string itm in items)
            {
                char   ch1    = '\0';
                char   ch2    = '\0';
                string pinyin = null;
                for (int i = 0; i != itm.Length; ++i)
                {
                    char c = itm[i];
                    if (i == 0)
                    {
                        ch1 = c;
                    }
                    else if (i == 1 && c == '|')
                    {
                        continue;
                    }
                    else if (i == 1 && c == '[')
                    {
                        pinyin = ""; continue;
                    }
                    else if (i == 1)
                    {
                        break;
                    }
                    else if (i == 2 && pinyin == null)
                    {
                        ch2 = c; continue;
                    }
                    else if (i == 2)
                    {
                        pinyin += c; continue;
                    }
                    else if (i == 3 && c == '[')
                    {
                        pinyin = ""; continue;
                    }
                    else if (c != ']')
                    {
                        pinyin += c;
                    }
                }
                if (ch1 == '\0' || pinyin == null)
                {
                    continue;
                }
                if (!first)
                {
                    sb.Append("; ");
                }
                // We have simp + trad
                if (ch2 != '\0')
                {
                    if (script == UiScript.Simp)
                    {
                        sb.Append(HtmlEncoder.Default.Encode(ch2.ToString()));
                    }
                    else if (script == UiScript.Trad)
                    {
                        sb.Append(HtmlEncoder.Default.Encode(ch1.ToString()));
                    }
                    else
                    {
                        sb.Append(HtmlEncoder.Default.Encode(ch2.ToString()));
                        sb.Append("·");
                        sb.Append(HtmlEncoder.Default.Encode(ch1.ToString()));
                    }
                }
                // Simp=Trad
                else
                {
                    sb.Append(HtmlEncoder.Default.Encode(ch1.ToString()));
                }
                sb.Append("·");
                PinyinSyllable psl = null;
                try
                {
                    if (pinyin.Length < 2)
                    {
                        continue;
                    }
                    int tone;
                    if (!int.TryParse(pinyin.Substring(pinyin.Length - 1), out tone))
                    {
                        tone = 0;
                    }
                    psl = new PinyinSyllable(pinyin.Substring(0, pinyin.Length - 1), tone);
                }
                catch { }
                if (psl != null)
                {
                    sb.Append(HtmlEncoder.Default.Encode(psl.GetDisplayString(true)));
                }
                else
                {
                    sb.Append(HtmlEncoder.Default.Encode(pinyin));
                }
                first = false;
            }
        }
Example #16
0
            /// <summary>
            /// Parses a pinyin query string into normalized syllables.
            /// </summary>
            public List <PinyinSyllable> ParsePinyinQuery(string query)
            {
                // If query is empty string or WS only: no syllables
                query = query.Trim();
                if (query == string.Empty)
                {
                    return(new List <PinyinSyllable>());
                }

                // Only deal with lower-case
                query = query.ToLowerInvariant();
                // Convert "u:" > "v" and "ü" > "v"
                query = query.Replace("u:", "v");
                query = query.Replace("ü", "v");

                // Split by syllables and apostrophes
                string[] explicitSplit = query.Split(new char[] { ' ', '\'', '’' });
                // Further split each part, in case input did not have spaces
                List <string> pinyinSplit = new List <string>();

                foreach (string str in explicitSplit)
                {
                    // Find numbers 1 thru 5: tone marks always come at end of syllable
                    // Important: this also eliminates empty syllables
                    List <string> numSplit = doPinyinSplitDigits(str);
                    // Split the rest by matching known pinyin syllables
                    foreach (string str2 in numSplit)
                    {
                        List <string> syllSplit = doPinyinSplitSyllables(str2);
                        pinyinSplit.AddRange(syllSplit);
                    }
                }
                // Create normalized syllable by separating tone mark, if present
                List <PinyinSyllable> res = new List <PinyinSyllable>();

                foreach (string str in pinyinSplit)
                {
                    char c   = str[str.Length - 1];
                    int  val = (int)(c - '0');
                    // Tone mark here
                    if (val >= 1 && val <= 5 && str.Length > 1)
                    {
                        if (val == 5)
                        {
                            val = 0;
                        }
                        res.Add(new PinyinSyllable(str.Substring(0, str.Length - 1), val));
                    }
                    // No tone mark: add as unspecified
                    else
                    {
                        res.Add(new PinyinSyllable(str, -1));
                    }
                }
                // If we have syllables ending in "r", split that into separate "r5"
                for (int i = 0; i < res.Count; ++i)
                {
                    PinyinSyllable ps = res[i];
                    if (ps.Text != "er" && ps.Text.Length > 1 && ps.Text.EndsWith("r"))
                    {
                        PinyinSyllable ps1 = new PinyinSyllable(ps.Text.Substring(0, ps.Text.Length - 1), ps.Tone);
                        PinyinSyllable ps2 = new PinyinSyllable("r", 0);
                        res[i] = ps1;
                        res.Insert(i + 1, ps2);
                    }
                }
                // Done
                return(res);
            }
Example #17
0
        /// <summary>
        ///
        /// </summary>
        public override void Process()
        {
            string simp = Req.Params["simp"];

            if (simp == null)
            {
                throw new ApiException(400, "Missing 'simp' parameter.");
            }
            string trad = Req.Params["trad"];

            if (trad == null)
            {
                throw new ApiException(400, "Missing 'trad' parameter.");
            }
            string pinyin = Req.Params["pinyin"];

            if (pinyin == null)
            {
                throw new ApiException(400, "Missing 'pinyin' parameter.");
            }

            Result res = new Result();

            res.Passed = true;

            // DBG
            if (simp == "大家" || simp == "污染")
            {
                res.Passed = false;
            }

            // Prepare pinyin as list of proper syllables
            List <PinyinSyllable> pyList = new List <PinyinSyllable>();

            string[] pyRawArr = pinyin.Split(' ');
            foreach (string pyRaw in pyRawArr)
            {
                pyList.Add(PinyinSyllable.FromDisplayString(pyRaw));
            }

            // Return all entries, CEDICT and HanDeDict, rendered as HTML
            CedictEntry[] ced, hdd;
            Global.HWInfo.GetEntries(simp, out ced, out hdd);
            StringBuilder sb = new StringBuilder();

            using (HtmlTextWriter writer = new HtmlTextWriter(new StringWriter(sb)))
            {
                writer.AddAttribute(HtmlTextWriterAttribute.Id, "newEntryRefCED");
                writer.RenderBeginTag(HtmlTextWriterTag.Div);
                foreach (CedictEntry entry in ced)
                {
                    EntryRenderer er = new EntryRenderer(entry, trad, pyList);
                    er.Render(writer);
                }
                writer.RenderEndTag();
                writer.AddAttribute(HtmlTextWriterAttribute.Id, "newEntryRefHDD");
                writer.RenderBeginTag(HtmlTextWriterTag.Div);
                foreach (CedictEntry entry in hdd)
                {
                    EntryRenderer er = new EntryRenderer(entry, trad, pyList);
                    er.Render(writer);
                }
                writer.RenderEndTag();
            }
            res.RefEntries = sb.ToString();

            // Tell our caller
            Res = res;
        }
Example #18
0
        private UniHanziInfo getInfo(char c, CharInfo ci)
        {
            bool        canBeSimp    = false;
            List <char> tradVariants = new List <char>();

            // Character can be used as simplified or not
            // And its traditional variants
            // As per http://www.unicode.org/reports/tr38/index.html#SCTC from Unihan report
            // 1: Simp and trad forms identical
            if (ci.TradVars == null && ci.SimpVars == null)
            {
                canBeSimp = true;
                tradVariants.Add(c);
            }
            // 2: Only trad
            else if (ci.TradVars == null && ci.SimpVars != null)
            {
                canBeSimp = false;
                tradVariants.Add(c);
            }
            // 3: Only simp
            else if (ci.TradVars != null && ci.SimpVars == null)
            {
                canBeSimp = true;
                tradVariants.AddRange(ci.TradVars);
            }
            else
            {
                canBeSimp = true;
                // 4/1: Both; may remain or get mapped in traditional
                if (ci.TradVars.Contains(c))
                {
                    tradVariants.AddRange(ci.TradVars);
                }
                // 4/2: Both; different meaning
                else
                {
                    tradVariants.AddRange(ci.TradVars);
                }
            }

            List <string> pinyin = new List <string>();

            // Pinyin reading: use Mandarin only if no other source available
            // Otherwise, combine ranking of sources
            if (ci.Pinlu == null && ci.Pinyin == null && ci.XHC == null)
            {
                pinyin.Add(ci.Mandarin);
            }
            else
            {
                int max = 0;
                if (ci.Pinlu != null)
                {
                    max = ci.Pinlu.Length;
                }
                if (ci.Pinyin != null && max < ci.Pinyin.Length)
                {
                    max = ci.Pinyin.Length;
                }
                if (ci.XHC != null && max < ci.XHC.Length)
                {
                    max = ci.XHC.Length;
                }
                Dictionary <string, PyCalc> cnts = new Dictionary <string, PyCalc>();
                if (ci.Pinlu != null)
                {
                    for (int i = 0; i != ci.Pinlu.Length; ++i)
                    {
                        PyCalc pyCalc;
                        if (cnts.ContainsKey(ci.Pinlu[i]))
                        {
                            pyCalc = cnts[ci.Pinlu[i]];
                        }
                        else
                        {
                            pyCalc = new PyCalc(); pyCalc.Py = ci.Pinlu[i]; cnts[ci.Pinlu[i]] = pyCalc;
                        }
                        pyCalc.Cnt++;
                        pyCalc.Sum += max - i;
                    }
                }
                if (ci.Pinyin != null)
                {
                    for (int i = 0; i != ci.Pinyin.Length; ++i)
                    {
                        PyCalc pyCalc;
                        if (cnts.ContainsKey(ci.Pinyin[i]))
                        {
                            pyCalc = cnts[ci.Pinyin[i]];
                        }
                        else
                        {
                            pyCalc = new PyCalc(); pyCalc.Py = ci.Pinyin[i]; cnts[ci.Pinyin[i]] = pyCalc;
                        }
                        pyCalc.Cnt++;
                        pyCalc.Sum += max - i;
                    }
                }
                if (ci.XHC != null)
                {
                    for (int i = 0; i != ci.XHC.Length; ++i)
                    {
                        PyCalc pyCalc;
                        if (cnts.ContainsKey(ci.XHC[i]))
                        {
                            pyCalc = cnts[ci.XHC[i]];
                        }
                        else
                        {
                            pyCalc = new PyCalc(); pyCalc.Py = ci.XHC[i]; cnts[ci.XHC[i]] = pyCalc;
                        }
                        pyCalc.Cnt++;
                        pyCalc.Sum += max - i;
                    }
                }
                List <PyCalc> lst = new List <PyCalc>();
                lst.AddRange(cnts.Values);
                lst.Sort((x, y) => y.Sum.CompareTo(x.Sum));
                foreach (var x in lst)
                {
                    pinyin.Add(x.Py);
                }
            }

            // Convert to typed Pinyin syllables
            PinyinSyllable[] sylls = new PinyinSyllable[pinyin.Count];
            for (int i = 0; i != pinyin.Count; ++i)
            {
                sylls[i] = PinyinSyllable.FromDisplayString(pinyin[i]);
            }

            // Done.
            return(new UniHanziInfo(canBeSimp, tradVariants.ToArray(), sylls));
        }