Ejemplo n.º 1
0
        /// <summary>
        /// Ctor: serialize from binary.
        /// </summary>
        public UniHanziInfo(BinReader br)
        {
            byte b = br.ReadByte();

            if (b == 0)
            {
                CanBeSimp = false;
            }
            else
            {
                CanBeSimp = true;
            }
            b            = br.ReadByte();
            TradVariants = new char[b];
            for (byte i = 0; i != b; ++i)
            {
                TradVariants[i] = br.ReadChar();
            }
            b      = br.ReadByte();
            Pinyin = new PinyinSyllable[b];
            for (byte i = 0; i != b; ++i)
            {
                Pinyin[i] = new PinyinSyllable(br);
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// <para>Gets the entry's pinyin display string: normalized; may have fewer items than raw syllables.</para>
        /// <para>Calculates highlights in transformed UI string.</para>
        /// </summary>
        /// <param name="diacritics">If yes, adds diacritics for tone marks; otherwise, appends number.</param>
        /// <param name="origHiliteStart">Start of pinyin highlight from result, or -1.</param>
        /// <param name="origHiliteLength">Length of pinyin highlight from result, or 0.</param>
        /// <param name="hiliteStart">Start of pinyin hilight in returned collection, or -1.</param>
        /// <param name="hiliteLength">Length of pinyin hilight in returned collection, or 0.</param>
        /// <returns>String representation to show in UI.</returns>
        public ReadOnlyCollection <PinyinSyllable> GetPinyinForDisplay(bool diacritics,
                                                                       int origHiliteStart, int origHiliteLength,
                                                                       out int hiliteStart, out int hiliteLength)
        {
            // If pinyin does not have an "r5", no transformation needed
            if (Array.FindIndex(pinyin, x => x.Text == "r" && x.Tone == 0) == -1)
            {
                hiliteStart  = origHiliteStart;
                hiliteLength = origHiliteLength;
                return(Pinyin);
            }
            // Create new array where we merge "r" into previous syllable
            // Map decomposed positions to merged positions
            int[] posMap = new int[pinyin.Length];
            for (int i = 0; i != posMap.Length; ++i)
            {
                posMap[i] = i;
            }
            List <PinyinSyllable> res = new List <PinyinSyllable>(pinyin);
            int mi = 0;

            for (int i = 0; i < res.Count; ++i, ++mi)
            {
                PinyinSyllable ps = res[i];
                if (i >= 0 && ps.Text == "r" && ps.Tone == 0)
                {
                    res[i - 1] = new PinyinSyllable(res[i - 1].Text + "r", res[i - 1].Tone);
                    res.RemoveAt(i);
                    for (int j = mi; j != posMap.Length; ++j)
                    {
                        --posMap[j];
                    }
                }
            }
            // Done.
            if (origHiliteStart == -1)
            {
                hiliteStart = -1; hiliteLength = 0;
            }
            else
            {
                hiliteStart = posMap[origHiliteStart];
                int hiliteEnd = origHiliteStart + origHiliteLength - 1;
                hiliteEnd    = posMap[hiliteEnd];
                hiliteLength = hiliteEnd - hiliteStart + 1;
            }
            return(new ReadOnlyCollection <PinyinSyllable>(res));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// <para>Returns a typed Pinyin syllable from its display string (with diacritics).</para>
        /// <para>Returns null if provded string is not recognized as a Pinyin syllable.</para>
        /// </summary>
        public static PinyinSyllable FromDisplayString(string ds)
        {
            PinyinSyllable syll = null;

            foreach (var x in toneMap)
            {
                string pure = null;
                int    tone = -1;
                if (x.Value[0] == ds)
                {
                    pure = x.Value[0]; tone = 0;
                }
                else if (x.Value[1] == ds)
                {
                    pure = x.Value[0]; tone = 1;
                }
                else if (x.Value[2] == ds)
                {
                    pure = x.Value[0]; tone = 2;
                }
                else if (x.Value[3] == ds)
                {
                    pure = x.Value[0]; tone = 3;
                }
                else if (x.Value[4] == ds)
                {
                    pure = x.Value[0]; tone = 4;
                }
                if (tone != -1)
                {
                    syll = new PinyinSyllable(pure, tone);
                    break;
                }
            }
            if (ds == "r")
            {
                syll = new PinyinSyllable("r", 0);
            }
            return(syll);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Ctor: read from binary stream.
        /// </summary>
        public TextRunZho(BinReader br)
        {
            // Read flags
            // 1: Traditional and simplified are different
            // 2: Pinyin present
            byte flags = br.ReadByte();

            // Read simplified
            if ((flags & 1) == 1)
            {
                Simp = br.ReadString();
            }
            // Is traditional different?
            if ((flags & 2) == 2)
            {
                Trad = br.ReadString();
            }
            else
            {
                Trad = Simp;
            }
            // Is pinyin present?
            if ((flags & 4) == 4)
            {
                int pinyinCount = (int)br.ReadByte();
                Pinyin = new PinyinSyllable[pinyinCount];
                for (int i = 0; i != pinyinCount; ++i)
                {
                    Pinyin[i] = new PinyinSyllable(br);
                }
            }
            else
            {
                Pinyin = null;
            }
        }
Ejemplo n.º 5
0
 /// <summary>
 /// Normalizes array of Cedict-style pinyin syllables into our format.
 /// </summary>
 private void normalizePinyin(string[] parts, out PinyinSyllable[] syllsArr, out List<int> pinyinMap)
 {
     // What this function does:
     // - Separates tone mark from text (unless it's a "weird" syllable
     // - Replaces "u:" with "v"
     // - Maps every non-weird input syllable to r5-merged output syllables
     //   List has as many values as there are non-weird input syllables
     //   Values in list point into "sylls" output array
     //   Up to two positions can have same value (for r5 appending)
     pinyinMap = new List<int>();
     List<PinyinSyllable> sylls = new List<PinyinSyllable>();
     foreach (string ps in parts)
     {
         // Does not end with a tone mark (1 thru 5): weird
         char chrLast = ps[ps.Length - 1];
         if (chrLast < '1' || chrLast > '5')
         {
             sylls.Add(new PinyinSyllable(ps, -1));
             continue;
         }
         // Separate tone and text
         string text = ps.Substring(0, ps.Length - 1);
         int tone = ((int)chrLast) - ((int)'0');
         // Neutral tone for us is 0, not five
         if (tone == 5) tone = 0;
         // "u:" is for us "v"
         text = text.Replace("u:", "v");
         text = text.Replace("U:", "V");
         // Store new syllable
         sylls.Add(new PinyinSyllable(text, tone));
         // Add to map
         pinyinMap.Add(sylls.Count - 1);
     }
     // Result: the syllables as an array.
     syllsArr = sylls.ToArray();
 }
Ejemplo n.º 6
0
        /// <summary>
        /// Parses a pinyin query string into normalized syllables.
        /// </summary>
        private static List<PinyinSyllable> doParsePinyin(string query)
        {
            // If query is empty string or WS only: no syllables
            query = query.Trim();
            if (query == string.Empty) return new List<PinyinSyllable>();

            // Only deal with lower-case
            query = query.ToLowerInvariant();
            // Convert "u:" > "v" and "ü" > "v"
            query = query.Replace("u:", "v");
            query = query.Replace("ü", "v");

            // Split by syllables and apostrophes
            string[] explicitSplit = query.Split(new char[] { ' ', '\'', '’' });
            // Further split each part, in case input did not have spaces
            List<string> pinyinSplit = new List<string>();
            foreach (string str in explicitSplit)
            {
                // Find numbers 1 thru 5: tone marks always come at end of syllable
                // Important: this also eliminates empty syllables
                List<string> numSplit = doPinyinSplitDigits(str);
                // Split the rest by matching known pinyin syllables
                foreach (string str2 in numSplit)
                {
                    List<string> syllSplit = doPinyinSplitSyllables(str2);
                    pinyinSplit.AddRange(syllSplit);
                }
            }
            // Create normalized syllable by separating tone mark, if present
            List<PinyinSyllable> res = new List<PinyinSyllable>();
            foreach (string str in pinyinSplit)
            {
                char c = str[str.Length - 1];
                int val = (int)(c - '0');
                // Tone mark here
                if (val >= 1 && val <= 5 && str.Length > 1)
                {
                    if (val == 5) val = 0;
                    res.Add(new PinyinSyllable(str.Substring(0, str.Length - 1), val));
                }
                // No tone mark: add as unspecified
                else res.Add(new PinyinSyllable(str, -1));
            }
            // If we have syllables ending in "r", split that into separate "r5"
            for (int i = 0; i < res.Count; ++i)
            {
                PinyinSyllable ps = res[i];
                if (ps.Text != "er" && ps.Text.Length > 1 && ps.Text.EndsWith("r"))
                {
                    PinyinSyllable ps1 = new PinyinSyllable(ps.Text.Substring(0, ps.Text.Length - 1), ps.Tone);
                    PinyinSyllable ps2 = new PinyinSyllable("r", 0);
                    res[i] = ps1;
                    res.Insert(i + 1, ps2);
                }
            }
            // Done
            return res;
        }
Ejemplo n.º 7
0
 /// <summary>
 /// <para>Gets the entry's pinyin display string: normalized; may have fewer items than raw syllables.</para>
 /// <para>Calculates highlights in transformed UI string.</para>
 /// </summary>
 /// <param name="diacritics">If yes, adds diacritics for tone marks; otherwise, appends number.</param>
 /// <param name="origHiliteStart">Start of pinyin highlight from result, or -1.</param>
 /// <param name="origHiliteLength">Length of pinyin highlight from result, or 0.</param>
 /// <param name="hiliteStart">Start of pinyin hilight in returned collection, or -1.</param>
 /// <param name="hiliteLength">Length of pinyin hilight in returned collection, or 0.</param>
 /// <returns>String representation to show in UI.</returns>
 public ReadOnlyCollection<PinyinSyllable> GetPinyinForDisplay(bool diacritics,
     int origHiliteStart, int origHiliteLength,
     out int hiliteStart, out int hiliteLength)
 {
     // If pinyin does not have an "r5", no transformation needed
     if (Array.FindIndex(pinyin, x => x.Text == "r" && x.Tone == 0) == -1)
     {
         hiliteStart = origHiliteStart;
         hiliteLength = origHiliteLength;
         return Pinyin;
     }
     // Create new array where we merge "r" into previous syllable
     // Map decomposed positions to merged positions
     int[] posMap = new int[pinyin.Length];
     for (int i = 0; i != posMap.Length; ++i) posMap[i] = i;
     List<PinyinSyllable> res = new List<PinyinSyllable>(pinyin);
     int mi = 0;
     for (int i = 0; i < res.Count; ++i, ++mi)
     {
         PinyinSyllable ps = res[i];
         if (i >= 0 && ps.Text == "r" && ps.Tone == 0)
         {
             res[i - 1] = new PinyinSyllable(res[i - 1].Text + "r", res[i - 1].Tone);
             res.RemoveAt(i);
             for (int j = mi; j != posMap.Length; ++j) --posMap[j];
         }
     }
     // Done.
     if (origHiliteStart == -1){ hiliteStart = -1; hiliteLength = 0; }
     else
     {
         hiliteStart = posMap[origHiliteStart];
         int hiliteEnd = origHiliteStart + origHiliteLength - 1;
         hiliteEnd = posMap[hiliteEnd];
         hiliteLength = hiliteEnd - hiliteStart + 1;
     }
     return new ReadOnlyCollection<PinyinSyllable>(res);
 }