/// <summary> /// Ctor: serialize from binary. /// </summary> public UniHanziInfo(BinReader br) { byte b = br.ReadByte(); if (b == 0) { CanBeSimp = false; } else { CanBeSimp = true; } b = br.ReadByte(); TradVariants = new char[b]; for (byte i = 0; i != b; ++i) { TradVariants[i] = br.ReadChar(); } b = br.ReadByte(); Pinyin = new PinyinSyllable[b]; for (byte i = 0; i != b; ++i) { Pinyin[i] = new PinyinSyllable(br); } }
/// <summary> /// <para>Gets the entry's pinyin display string: normalized; may have fewer items than raw syllables.</para> /// <para>Calculates highlights in transformed UI string.</para> /// </summary> /// <param name="diacritics">If yes, adds diacritics for tone marks; otherwise, appends number.</param> /// <param name="origHiliteStart">Start of pinyin highlight from result, or -1.</param> /// <param name="origHiliteLength">Length of pinyin highlight from result, or 0.</param> /// <param name="hiliteStart">Start of pinyin hilight in returned collection, or -1.</param> /// <param name="hiliteLength">Length of pinyin hilight in returned collection, or 0.</param> /// <returns>String representation to show in UI.</returns> public ReadOnlyCollection <PinyinSyllable> GetPinyinForDisplay(bool diacritics, int origHiliteStart, int origHiliteLength, out int hiliteStart, out int hiliteLength) { // If pinyin does not have an "r5", no transformation needed if (Array.FindIndex(pinyin, x => x.Text == "r" && x.Tone == 0) == -1) { hiliteStart = origHiliteStart; hiliteLength = origHiliteLength; return(Pinyin); } // Create new array where we merge "r" into previous syllable // Map decomposed positions to merged positions int[] posMap = new int[pinyin.Length]; for (int i = 0; i != posMap.Length; ++i) { posMap[i] = i; } List <PinyinSyllable> res = new List <PinyinSyllable>(pinyin); int mi = 0; for (int i = 0; i < res.Count; ++i, ++mi) { PinyinSyllable ps = res[i]; if (i >= 0 && ps.Text == "r" && ps.Tone == 0) { res[i - 1] = new PinyinSyllable(res[i - 1].Text + "r", res[i - 1].Tone); res.RemoveAt(i); for (int j = mi; j != posMap.Length; ++j) { --posMap[j]; } } } // Done. if (origHiliteStart == -1) { hiliteStart = -1; hiliteLength = 0; } else { hiliteStart = posMap[origHiliteStart]; int hiliteEnd = origHiliteStart + origHiliteLength - 1; hiliteEnd = posMap[hiliteEnd]; hiliteLength = hiliteEnd - hiliteStart + 1; } return(new ReadOnlyCollection <PinyinSyllable>(res)); }
/// <summary> /// <para>Returns a typed Pinyin syllable from its display string (with diacritics).</para> /// <para>Returns null if provded string is not recognized as a Pinyin syllable.</para> /// </summary> public static PinyinSyllable FromDisplayString(string ds) { PinyinSyllable syll = null; foreach (var x in toneMap) { string pure = null; int tone = -1; if (x.Value[0] == ds) { pure = x.Value[0]; tone = 0; } else if (x.Value[1] == ds) { pure = x.Value[0]; tone = 1; } else if (x.Value[2] == ds) { pure = x.Value[0]; tone = 2; } else if (x.Value[3] == ds) { pure = x.Value[0]; tone = 3; } else if (x.Value[4] == ds) { pure = x.Value[0]; tone = 4; } if (tone != -1) { syll = new PinyinSyllable(pure, tone); break; } } if (ds == "r") { syll = new PinyinSyllable("r", 0); } return(syll); }
/// <summary> /// Ctor: read from binary stream. /// </summary> public TextRunZho(BinReader br) { // Read flags // 1: Traditional and simplified are different // 2: Pinyin present byte flags = br.ReadByte(); // Read simplified if ((flags & 1) == 1) { Simp = br.ReadString(); } // Is traditional different? if ((flags & 2) == 2) { Trad = br.ReadString(); } else { Trad = Simp; } // Is pinyin present? if ((flags & 4) == 4) { int pinyinCount = (int)br.ReadByte(); Pinyin = new PinyinSyllable[pinyinCount]; for (int i = 0; i != pinyinCount; ++i) { Pinyin[i] = new PinyinSyllable(br); } } else { Pinyin = null; } }
/// <summary> /// Normalizes array of Cedict-style pinyin syllables into our format. /// </summary> private void normalizePinyin(string[] parts, out PinyinSyllable[] syllsArr, out List<int> pinyinMap) { // What this function does: // - Separates tone mark from text (unless it's a "weird" syllable // - Replaces "u:" with "v" // - Maps every non-weird input syllable to r5-merged output syllables // List has as many values as there are non-weird input syllables // Values in list point into "sylls" output array // Up to two positions can have same value (for r5 appending) pinyinMap = new List<int>(); List<PinyinSyllable> sylls = new List<PinyinSyllable>(); foreach (string ps in parts) { // Does not end with a tone mark (1 thru 5): weird char chrLast = ps[ps.Length - 1]; if (chrLast < '1' || chrLast > '5') { sylls.Add(new PinyinSyllable(ps, -1)); continue; } // Separate tone and text string text = ps.Substring(0, ps.Length - 1); int tone = ((int)chrLast) - ((int)'0'); // Neutral tone for us is 0, not five if (tone == 5) tone = 0; // "u:" is for us "v" text = text.Replace("u:", "v"); text = text.Replace("U:", "V"); // Store new syllable sylls.Add(new PinyinSyllable(text, tone)); // Add to map pinyinMap.Add(sylls.Count - 1); } // Result: the syllables as an array. syllsArr = sylls.ToArray(); }
/// <summary> /// Parses a pinyin query string into normalized syllables. /// </summary> private static List<PinyinSyllable> doParsePinyin(string query) { // If query is empty string or WS only: no syllables query = query.Trim(); if (query == string.Empty) return new List<PinyinSyllable>(); // Only deal with lower-case query = query.ToLowerInvariant(); // Convert "u:" > "v" and "ü" > "v" query = query.Replace("u:", "v"); query = query.Replace("ü", "v"); // Split by syllables and apostrophes string[] explicitSplit = query.Split(new char[] { ' ', '\'', '’' }); // Further split each part, in case input did not have spaces List<string> pinyinSplit = new List<string>(); foreach (string str in explicitSplit) { // Find numbers 1 thru 5: tone marks always come at end of syllable // Important: this also eliminates empty syllables List<string> numSplit = doPinyinSplitDigits(str); // Split the rest by matching known pinyin syllables foreach (string str2 in numSplit) { List<string> syllSplit = doPinyinSplitSyllables(str2); pinyinSplit.AddRange(syllSplit); } } // Create normalized syllable by separating tone mark, if present List<PinyinSyllable> res = new List<PinyinSyllable>(); foreach (string str in pinyinSplit) { char c = str[str.Length - 1]; int val = (int)(c - '0'); // Tone mark here if (val >= 1 && val <= 5 && str.Length > 1) { if (val == 5) val = 0; res.Add(new PinyinSyllable(str.Substring(0, str.Length - 1), val)); } // No tone mark: add as unspecified else res.Add(new PinyinSyllable(str, -1)); } // If we have syllables ending in "r", split that into separate "r5" for (int i = 0; i < res.Count; ++i) { PinyinSyllable ps = res[i]; if (ps.Text != "er" && ps.Text.Length > 1 && ps.Text.EndsWith("r")) { PinyinSyllable ps1 = new PinyinSyllable(ps.Text.Substring(0, ps.Text.Length - 1), ps.Tone); PinyinSyllable ps2 = new PinyinSyllable("r", 0); res[i] = ps1; res.Insert(i + 1, ps2); } } // Done return res; }
/// <summary> /// <para>Gets the entry's pinyin display string: normalized; may have fewer items than raw syllables.</para> /// <para>Calculates highlights in transformed UI string.</para> /// </summary> /// <param name="diacritics">If yes, adds diacritics for tone marks; otherwise, appends number.</param> /// <param name="origHiliteStart">Start of pinyin highlight from result, or -1.</param> /// <param name="origHiliteLength">Length of pinyin highlight from result, or 0.</param> /// <param name="hiliteStart">Start of pinyin hilight in returned collection, or -1.</param> /// <param name="hiliteLength">Length of pinyin hilight in returned collection, or 0.</param> /// <returns>String representation to show in UI.</returns> public ReadOnlyCollection<PinyinSyllable> GetPinyinForDisplay(bool diacritics, int origHiliteStart, int origHiliteLength, out int hiliteStart, out int hiliteLength) { // If pinyin does not have an "r5", no transformation needed if (Array.FindIndex(pinyin, x => x.Text == "r" && x.Tone == 0) == -1) { hiliteStart = origHiliteStart; hiliteLength = origHiliteLength; return Pinyin; } // Create new array where we merge "r" into previous syllable // Map decomposed positions to merged positions int[] posMap = new int[pinyin.Length]; for (int i = 0; i != posMap.Length; ++i) posMap[i] = i; List<PinyinSyllable> res = new List<PinyinSyllable>(pinyin); int mi = 0; for (int i = 0; i < res.Count; ++i, ++mi) { PinyinSyllable ps = res[i]; if (i >= 0 && ps.Text == "r" && ps.Tone == 0) { res[i - 1] = new PinyinSyllable(res[i - 1].Text + "r", res[i - 1].Tone); res.RemoveAt(i); for (int j = mi; j != posMap.Length; ++j) --posMap[j]; } } // Done. if (origHiliteStart == -1){ hiliteStart = -1; hiliteLength = 0; } else { hiliteStart = posMap[origHiliteStart]; int hiliteEnd = origHiliteStart + origHiliteLength - 1; hiliteEnd = posMap[hiliteEnd]; hiliteLength = hiliteEnd - hiliteStart + 1; } return new ReadOnlyCollection<PinyinSyllable>(res); }