Esempio n. 1
0
        /// <summary>
        /// Ctor: init immutable instance.
        /// </summary>
        public CedictSense(HybridText domain, HybridText equiv, HybridText note)
        {
            Domain = domain;
            Equiv = equiv;
            Note = note;

        }
Esempio n. 2
0
        /// <summary>
        /// <para>Tokenize the sense's equiv, presented as hybrid text.</para>
        /// <para>During parsing, creates new word IDs as tokens come up.</para>
        /// </summary>
        public ReadOnlyCollection <EquivToken> Tokenize(HybridText txt)
        {
            List <EquivToken> res = new List <EquivToken>();
            int runIX             = -1;

            foreach (TextRun tr in txt.Runs)
            {
                ++runIX;
                if (tr is TextRunZho)
                {
                    int        idZho = wh.IdZho;
                    EquivToken eqt   = new EquivToken
                    {
                        TokenId     = idZho,
                        RunIx       = runIX,
                        StartInRun  = 0,
                        LengthInRun = 0,
                    };
                    res.Add(eqt);
                    continue;
                }
                string str = tr.GetPlainText();
                tokenizeRun(str, runIX, res);
            }
            return(new ReadOnlyCollection <EquivToken>(res));
        }
Esempio n. 3
0
        /// <summary>
        /// Converts a hybrid text to CEDICT-formatted plain text (marking up hanzi+pinyin sections).
        /// </summary>
        public static string HybridToCedict(HybridText ht)
        {
            StringBuilder sb    = new StringBuilder();
            bool          first = true;

            for (int i = 0; i != ht.RunCount; ++i)
            {
                TextRun tr = ht.GetRunAt(i);
                if (tr is TextRunLatin)
                {
                    string strRun = tr.GetPlainText();
                    if (!first && strRun != string.Empty && !char.IsPunctuation(strRun[0]))
                    {
                        sb.Append(' ');
                    }
                    sb.Append(strRun);
                }
                else
                {
                    if (!first)
                    {
                        sb.Append(' ');
                    }
                    TextRunZho trz = tr as TextRunZho;
                    if (!string.IsNullOrEmpty(trz.Simp))
                    {
                        sb.Append(trz.Simp);
                    }
                    if (trz.Trad != trz.Simp && !string.IsNullOrEmpty(trz.Trad))
                    {
                        sb.Append('|');
                        sb.Append(trz.Trad);
                    }
                    if (trz.Pinyin != null)
                    {
                        sb.Append('[');
                        sb.Append(GetPinyinCedict(trz.Pinyin));
                        sb.Append(']');
                    }
                }
                first = false;
            }
            return(sb.ToString());
        }
Esempio n. 4
0
 /// <summary>
 /// Returns true if display font covers all Hanzi in hybrid text; false otherwise.
 /// </summary>
 private bool areHanziCovered(HybridText ht)
 {
     if (ht.IsEmpty)
     {
         return(true);
     }
     for (int i = 0; i != ht.RunCount; ++i)
     {
         TextRun    tr    = ht.GetRunAt(i);
         TextRunZho trJoe = tr as TextRunZho;
         if (trJoe == null)
         {
             continue;
         }
         if (trJoe.Simp == null)
         {
             continue;
         }
         foreach (char c in trJoe.Simp)
         {
             if (!cvr.GetCoverage(c).HasFlag(FontCoverageFlags.Simp))
             {
                 return(false);
             }
         }
         if (trJoe.Trad == null)
         {
             continue;
         }
         foreach (char c in trJoe.Trad)
         {
             if (!cvr.GetCoverage(c).HasFlag(FontCoverageFlags.Trad))
             {
                 return(false);
             }
         }
     }
     return(true);
 }
Esempio n. 5
0
        /// <summary>
        /// Converts a hybrid text to HTML (marking up hanzi+pinyin sections).
        /// </summary>
        public static string HybridToHtml(HybridText ht, SearchScript script)
        {
            StringBuilder sb = new StringBuilder();

            bool first = true;
            for (int i = 0; i != ht.RunCount; ++i)
            {
                TextRun tr = ht.GetRunAt(i);
                if (tr is TextRunLatin)
                {
                    string strRun = tr.GetPlainText();
                    if (!first && strRun != string.Empty && !char.IsPunctuation(strRun[0])) sb.Append(' ');
                    sb.Append(strRun);
                }
                else
                {
                    if (!first) sb.Append(' ');
                    TextRunZho trz = tr as TextRunZho;

                    string hanzi1 = (script == SearchScript.Traditional) ? trz.Trad : trz.Simp;
                    if (string.IsNullOrEmpty(hanzi1)) hanzi1 = null;
                    string hanzi2 = null;
                    if (hanzi1 != null && script == SearchScript.Both && !string.IsNullOrEmpty(trz.Trad))
                        hanzi2 = trz.Trad;
                    if (hanzi1 != null) hanzi1 = escape(hanzi1);
                    if (hanzi2 != null) hanzi2 = escape(hanzi2);

                    if (hanzi1 != null || hanzi2 != null) sb.Append(templateSenseHanziOpen);
                    if (hanzi1 != null) sb.Append(hanzi1);
                    if (hanzi2 != null)
                    {
                        sb.Append(' ');
                        sb.Append(templateBullet);
                        sb.Append(' ');
                        sb.Append(hanzi2);
                    }
                    if (hanzi1 != null || hanzi2 != null) sb.Append(templateSenseHanziClose);

                    if (trz.Pinyin != null)
                    {
                        if (hanzi1 != null) sb.Append(' ');
                        sb.Append('[');
                        sb.Append(escape(trz.GetPinyinInOne(true)));
                        sb.Append(']');
                    }
                }
                first = false;
            }
            return sb.ToString();
        }
Esempio n. 6
0
        /// <summary>
        /// Breaks down body content into typographic blocks and caches the size of these.
        /// </summary>
        /// <param name="g">A Graphics object used for measurements.</param>
        private void doMeasureBlocks(Graphics g)
        {
            // Once measured, blocks don't change. Nothing to do then.
            if (measuredBlocks != null) return;

            // This is how we measure
            StringFormat sf = StringFormat.GenericTypographic;
            g.TextRenderingHint = System.Drawing.Text.TextRenderingHint.AntiAlias;

            // Decide about size of sense ID up front: that's always a square, letter-height
            SizeF xSize = g.MeasureString("x", getFont(fntSenseLatin), 65535, sf);
            ushort senseIdxWidth = (ushort)Math.Ceiling(xSize.Height);

            // Create array with as many items as senses
            // Each item is null, or highlight in sense's equiv
            CedictTargetHighlight[] hlArr = new CedictTargetHighlight[entry.SenseCount];
            foreach (CedictTargetHighlight hl in res.TargetHilites) hlArr[hl.SenseIx] = hl;

            // Recreate list of blocks
            List<Block> newBlocks = new List<Block>();
            // Collect links here. Will only keep at end if not empty.
            List<LinkArea> newLinks = new List<LinkArea>();

            int senseIdx = -1;
            int displaySenseIdx = -1;
            bool lastWasClassifier = false;
            foreach (CedictSense cm in entry.Senses)
            {
                ++senseIdx;
                // Is this sense a classifier?
                bool classifier = cm.Domain.EqualsPlainText("CL:");
                if (!classifier) ++displaySenseIdx;
                // Add one block for sense ID, unless this is a classifier "sense"
                if (!classifier)
                {
                    Block sidBlock = new Block
                    {
                        Width = senseIdxWidth,
                        StickRight = true,
                        TextPos = textPool.PoolString(getSenseIdString(displaySenseIdx)),
                        NewLine = lastWasClassifier,
                        SenseId = true,
                        FirstInCedictSense = true,
                    };
                    newBlocks.Add(sidBlock);
                }
                // Split domain, equiv and note into typographic parts
                // Splits along spaces and dashes
                // Unpacks Chinese ranges
                // Domain is localized text for "Classifier:" if, well, this is a classifier sense
                int startIX = newBlocks.Count;
                if (!classifier) makeBlocks(cm.Domain, true, null, newBlocks, newLinks);
                else
                {
                    string strClassifier = tprov.GetString("ResultCtrlClassifier");
                    HybridText htClassifier = new HybridText(strClassifier);
                    int ix = newBlocks.Count;
                    makeBlocks(htClassifier, true, null, newBlocks, newLinks);
                    Block xb = newBlocks[ix];
                    xb.NewLine = true;
                    newBlocks[ix] = xb;
                }
                makeBlocks(cm.Equiv, false, hlArr[senseIdx], newBlocks, newLinks);
                makeBlocks(cm.Note, true, null, newBlocks, newLinks);
                // If sense is a classifier, mark first block as sense starter
                if (classifier)
                {
                    Block sstart = newBlocks[startIX];
                    sstart.FirstInCedictSense = true;
                    newBlocks[startIX] = sstart;
                }
                // Measure each block
                for (int i = startIX; i != newBlocks.Count; ++i)
                {
                    Block tb = newBlocks[i];
                    bool isHanzi = !(tb.FontIdx == fntMetaLatin || tb.FontIdx == fntSenseLatin);
                    SizeF sz;
                    if (!isHanzi) sz = g.MeasureString(textPool.GetString(tb.TextPos), getFont(tb.FontIdx), 65535, sf);
                    else sz = HanziRenderer.MeasureString(g, Magic.ZhoContentFontFamily, textPool.GetString(tb.TextPos), Magic.LemmaHanziFontSize);
                    tb.Width = (ushort)Math.Round(sz.Width);
                    newBlocks[i] = tb;
                }
                lastWasClassifier = classifier;
            }
            if (newLinks.Count != 0) targetLinks = newLinks;
            measuredBlocks = newBlocks.ToArray();
        }
Esempio n. 7
0
        /// <summary>
        /// <para>Produces unmeasured display blocks from a single hybrid text. Marks highlights, if any.</para>
        /// <para>Does not fill in blocks' size, but fills in everything else.</para>
        /// </summary>
        /// <param name="htxt">Hybrid text to break down into blocks and measure.</param>
        /// <param name="isMeta">True if this is a domain or note (displayed in italics).</param>
        /// <param name="hl">Highlight to show in hybrid text, or null.</param>
        /// <param name="blocks">List of blocks to append to.</param>
        /// <param name="links">List to gather links (appending to list).</param>
        private void makeBlocks(HybridText htxt, bool isMeta, CedictTargetHighlight hl,
            List<Block> blocks, List<LinkArea> links)
        {
            byte fntIdxLatin = isMeta ? fntMetaLatin : fntSenseLatin;
            byte fntIdxZhoSimp = isMeta ? fntMetaHanziSimp : fntSenseHanziSimp;
            byte fntIdxZhoTrad = isMeta ? fntMetaHanziTrad : fntSenseHanziTrad;
            // Go run by run
            for (int runIX = 0; runIX != htxt.RunCount; ++runIX)
            {
                TextRun run = htxt.GetRunAt(runIX);
                // Latin run: split by spaces first
                if (run is TextRunLatin)
                {
                    string[] bySpaces = run.GetPlainText().Split(new char[] { ' ' });
                    // Each word: also by dash
                    int latnPos = 0;
                    foreach (string str in bySpaces)
                    {
                        string[] byDashes = splitByDash(str);
                        // Add block for each
                        int ofsPos = 0;
                        foreach (string blockStr in byDashes)
                        {
                            Block tb = new Block
                            {
                                TextPos = textPool.PoolString(blockStr),
                                FontIdx = fntIdxLatin,
                                SpaceAfter = false, // will set this true for last block in "byDashes"
                            };
                            // Does block's text intersect with highlight?
                            if (hl != null && hl.RunIx == runIX)
                            {
                                int blockStart = latnPos + ofsPos;
                                int blockEnd = blockStart + blockStr.Length;
                                if (blockStart >= hl.HiliteStart && blockStart < hl.HiliteStart + hl.HiliteLength)
                                    tb.Hilite = true;
                                else if (blockEnd > hl.HiliteStart && blockEnd <= hl.HiliteStart + hl.HiliteLength)
                                    tb.Hilite = true;
                                else if (blockStart < hl.HiliteStart && blockEnd >= hl.HiliteStart + hl.HiliteLength)
                                    tb.Hilite = true;
                            }
                            blocks.Add(tb);
                            // Keep track of position for highlight
                            ofsPos += blockStr.Length;
                        }
                        // Make sure last one is followed by space
                        Block xb = blocks[blocks.Count - 1];
                        xb.SpaceAfter = true;
                        blocks[blocks.Count - 1] = xb;
                        // Keep track of position in text - for highlights
                        latnPos += str.Length + 1;
                    }
                }
                // Chinese: depends on T/S/Both display mode, and on available info
                else
                {
                    TextRunZho zhoRun = run as TextRunZho;
                    // Chinese range is made up of:
                    // Simplified (empty string if only traditional requested)
                    // Separator (if both simplified and traditional are requested)
                    // Traditional (empty string if only simplified requested)
                    // Pinyin with accents as tone marks, in brackets (if present)
                    string strSimp = string.Empty;
                    if (analyzedScript != SearchScript.Traditional && zhoRun.Simp != null) strSimp = zhoRun.Simp;
                    string strTrad = string.Empty;
                    if (analyzedScript != SearchScript.Simplified && zhoRun.Trad != null) strTrad = zhoRun.Trad;
                    string strPy = string.Empty;
                    // Convert pinyin to display format (tone marks as diacritics; r5 glued)
                    if (zhoRun.Pinyin != null) strPy = "[" + zhoRun.GetPinyinInOne(true) + "]";

                    // Create link area, with query string
                    string strPyNumbers = string.Empty; // Pinyin with numbers as tone marks
                    if (zhoRun.Pinyin != null) strPyNumbers = zhoRun.GetPinyinRaw();
                    LinkArea linkArea = new LinkArea(strSimp, strTrad, strPyNumbers, analyzedScript);

                    // Block for simplified, if present
                    if (strSimp != string.Empty)
                    {
                        Block tb = new Block
                        {
                            TextPos = textPool.PoolString(strSimp),
                            FontIdx = fntIdxZhoSimp,
                            SpaceAfter = true,
                        };
                        blocks.Add(tb);
                        linkArea.BlockIds.Add(blocks.Count - 1);
                    }
                    // Separator if both simplified and traditional are there
                    // AND they are different...
                    if (strSimp != string.Empty && strTrad != string.Empty && strSimp != strTrad)
                    {
                        Block xb = blocks[blocks.Count - 1];
                        xb.StickRight = true;
                        blocks[blocks.Count - 1] = xb;
                        Block tb = new Block
                        {
                            TextPos = textPool.PoolString("•"),
                            FontIdx = fntIdxLatin,
                            SpaceAfter = true,
                        };
                        blocks.Add(tb);
                        linkArea.BlockIds.Add(blocks.Count - 1);
                    }
                    // Traditional, if present
                    if (strTrad != string.Empty && strTrad != strSimp)
                    {
                        Block tb = new Block
                        {
                            TextPos = textPool.PoolString(strTrad),
                            FontIdx = fntIdxZhoTrad,
                            SpaceAfter = true,
                        };
                        blocks.Add(tb);
                        linkArea.BlockIds.Add(blocks.Count - 1);
                    }
                    // Pinyin, if present
                    if (strPy != string.Empty)
                    {
                        // Split by spaces
                        string[] pyParts = strPy.Split(new char[] { ' ' });
                        foreach (string pyPart in pyParts)
                        {
                            Block tb = new Block
                            {
                                TextPos = textPool.PoolString(pyPart),
                                FontIdx = fntIdxLatin,
                                SpaceAfter = true,
                            };
                            blocks.Add(tb);
                            linkArea.BlockIds.Add(blocks.Count - 1);
                        }
                    }
                    // Last part will have requested a space after.
                    // Look ahead and if next text run is Latin and starts with punctuation, make it stick
                    TextRunLatin nextLatinRun = null;
                    if (runIX + 1 < htxt.RunCount) nextLatinRun = htxt.GetRunAt(runIX + 1) as TextRunLatin;
                    if (nextLatinRun != null && char.IsPunctuation(nextLatinRun.GetPlainText()[0]))
                    {
                        Block xb = blocks[blocks.Count - 1];
                        xb.SpaceAfter = false;
                        blocks[blocks.Count - 1] = xb;
                    }
                    // Collect link area
                    links.Add(linkArea);
                }
            }
        }
        /// <summary>
        /// <para>Produces unmeasured display blocks from a single hybrid text. Marks highlights, if any.</para>
        /// <para>Does not fill in blocks' size, but fills in everything else.</para>
        /// </summary>
        /// <param name="htxt">Hybrid text to break down into blocks and measure.</param>
        /// <param name="isMeta">True if this is a domain or note (displayed in italics).</param>
        /// <param name="hl">Highlight to show in hybrid text, or null.</param>
        /// <param name="blocks">List of blocks to append to.</param>
        /// <param name="links">List to gather links (appending to list).</param>
        private void makeBlocks(HybridText htxt, bool isMeta, CedictTargetHighlight hl,
                                List <Block> blocks, List <LinkArea> links)
        {
            byte fntIdxLatin   = isMeta ? fntMetaLatin : fntSenseLatin;
            byte fntIdxZhoSimp = isMeta ? fntMetaHanziSimp : fntSenseHanziSimp;
            byte fntIdxZhoTrad = isMeta ? fntMetaHanziTrad : fntSenseHanziTrad;

            // Go run by run
            for (int runIX = 0; runIX != htxt.RunCount; ++runIX)
            {
                TextRun run = htxt.GetRunAt(runIX);
                // Latin run: split by spaces first
                if (run is TextRunLatin)
                {
                    string[] bySpaces = run.GetPlainText().Split(new char[] { ' ' });
                    // Each word: also by dash
                    int latnPos = 0;
                    foreach (string str in bySpaces)
                    {
                        string[] byDashes = splitByDash(str);
                        // Add block for each
                        int ofsPos = 0;
                        foreach (string blockStr in byDashes)
                        {
                            Block tb = new Block
                            {
                                TextPos    = textPool.PoolString(blockStr),
                                FontIdx    = fntIdxLatin,
                                SpaceAfter = false, // will set this true for last block in "byDashes"
                            };
                            // Does block's text intersect with highlight?
                            if (hl != null && hl.RunIx == runIX)
                            {
                                int blockStart = latnPos + ofsPos;
                                int blockEnd   = blockStart + blockStr.Length;
                                if (blockStart >= hl.HiliteStart && blockStart < hl.HiliteStart + hl.HiliteLength)
                                {
                                    tb.Hilite = true;
                                }
                                else if (blockEnd > hl.HiliteStart && blockEnd <= hl.HiliteStart + hl.HiliteLength)
                                {
                                    tb.Hilite = true;
                                }
                                else if (blockStart < hl.HiliteStart && blockEnd >= hl.HiliteStart + hl.HiliteLength)
                                {
                                    tb.Hilite = true;
                                }
                            }
                            blocks.Add(tb);
                            // Keep track of position for highlight
                            ofsPos += blockStr.Length;
                        }
                        // Make sure last one is followed by space
                        Block xb = blocks[blocks.Count - 1];
                        xb.SpaceAfter            = true;
                        blocks[blocks.Count - 1] = xb;
                        // Keep track of position in text - for highlights
                        latnPos += str.Length + 1;
                    }
                }
                // Chinese: depends on T/S/Both display mode, and on available info
                else
                {
                    TextRunZho zhoRun = run as TextRunZho;
                    // Chinese range is made up of:
                    // Simplified (empty string if only traditional requested)
                    // Separator (if both simplified and traditional are requested)
                    // Traditional (empty string if only simplified requested)
                    // Pinyin with accents as tone marks, in brackets (if present)
                    string strSimp = string.Empty;
                    if (analyzedScript != SearchScript.Traditional && zhoRun.Simp != null)
                    {
                        strSimp = zhoRun.Simp;
                    }
                    string strTrad = string.Empty;
                    if (analyzedScript != SearchScript.Simplified && zhoRun.Trad != null)
                    {
                        strTrad = zhoRun.Trad;
                    }
                    string strPy = string.Empty;
                    // Convert pinyin to display format (tone marks as diacritics; r5 glued)
                    if (zhoRun.Pinyin != null)
                    {
                        strPy = "[" + zhoRun.GetPinyinInOne(true) + "]";
                    }

                    // Create link area, with query string
                    string strPyNumbers = string.Empty; // Pinyin with numbers as tone marks
                    if (zhoRun.Pinyin != null)
                    {
                        strPyNumbers = zhoRun.GetPinyinRaw();
                    }
                    LinkArea linkArea = new LinkArea(strSimp, strTrad, strPyNumbers, analyzedScript);

                    // Block for simplified, if present
                    if (strSimp != string.Empty)
                    {
                        Block tb = new Block
                        {
                            TextPos    = textPool.PoolString(strSimp),
                            FontIdx    = fntIdxZhoSimp,
                            SpaceAfter = true,
                        };
                        blocks.Add(tb);
                        linkArea.BlockIds.Add(blocks.Count - 1);
                    }
                    // Separator if both simplified and traditional are there
                    // AND they are different...
                    if (strSimp != string.Empty && strTrad != string.Empty && strSimp != strTrad)
                    {
                        Block xb = blocks[blocks.Count - 1];
                        xb.StickRight            = true;
                        blocks[blocks.Count - 1] = xb;
                        Block tb = new Block
                        {
                            TextPos    = textPool.PoolString("•"),
                            FontIdx    = fntIdxLatin,
                            SpaceAfter = true,
                        };
                        blocks.Add(tb);
                        linkArea.BlockIds.Add(blocks.Count - 1);
                    }
                    // Traditional, if present
                    if (strTrad != string.Empty && strTrad != strSimp)
                    {
                        Block tb = new Block
                        {
                            TextPos    = textPool.PoolString(strTrad),
                            FontIdx    = fntIdxZhoTrad,
                            SpaceAfter = true,
                        };
                        blocks.Add(tb);
                        linkArea.BlockIds.Add(blocks.Count - 1);
                    }
                    // Pinyin, if present
                    if (strPy != string.Empty)
                    {
                        // Split by spaces
                        string[] pyParts = strPy.Split(new char[] { ' ' });
                        foreach (string pyPart in pyParts)
                        {
                            Block tb = new Block
                            {
                                TextPos    = textPool.PoolString(pyPart),
                                FontIdx    = fntIdxLatin,
                                SpaceAfter = true,
                            };
                            blocks.Add(tb);
                            linkArea.BlockIds.Add(blocks.Count - 1);
                        }
                    }
                    // Last part will have requested a space after.
                    // Look ahead and if next text run is Latin and starts with punctuation, make it stick
                    TextRunLatin nextLatinRun = null;
                    if (runIX + 1 < htxt.RunCount)
                    {
                        nextLatinRun = htxt.GetRunAt(runIX + 1) as TextRunLatin;
                    }
                    if (nextLatinRun != null && char.IsPunctuation(nextLatinRun.GetPlainText()[0]))
                    {
                        Block xb = blocks[blocks.Count - 1];
                        xb.SpaceAfter            = false;
                        blocks[blocks.Count - 1] = xb;
                    }
                    // Collect link area
                    links.Add(linkArea);
                }
            }
        }
Esempio n. 9
0
        /// <summary>
        /// Converts a hybrid text to HTML (marking up hanzi+pinyin sections).
        /// </summary>
        public static string HybridToHtml(HybridText ht, SearchScript script)
        {
            StringBuilder sb = new StringBuilder();

            bool first = true;

            for (int i = 0; i != ht.RunCount; ++i)
            {
                TextRun tr = ht.GetRunAt(i);
                if (tr is TextRunLatin)
                {
                    string strRun = tr.GetPlainText();
                    if (!first && strRun != string.Empty && !char.IsPunctuation(strRun[0]))
                    {
                        sb.Append(' ');
                    }
                    sb.Append(strRun);
                }
                else
                {
                    if (!first)
                    {
                        sb.Append(' ');
                    }
                    TextRunZho trz = tr as TextRunZho;

                    string hanzi1 = (script == SearchScript.Traditional) ? trz.Trad : trz.Simp;
                    if (string.IsNullOrEmpty(hanzi1))
                    {
                        hanzi1 = null;
                    }
                    string hanzi2 = null;
                    if (hanzi1 != null && script == SearchScript.Both && !string.IsNullOrEmpty(trz.Trad))
                    {
                        hanzi2 = trz.Trad;
                    }
                    if (hanzi1 != null)
                    {
                        hanzi1 = escape(hanzi1);
                    }
                    if (hanzi2 != null)
                    {
                        hanzi2 = escape(hanzi2);
                    }

                    if (hanzi1 != null || hanzi2 != null)
                    {
                        sb.Append(templateSenseHanziOpen);
                    }
                    if (hanzi1 != null)
                    {
                        sb.Append(hanzi1);
                    }
                    if (hanzi2 != null)
                    {
                        sb.Append(' ');
                        sb.Append(templateBullet);
                        sb.Append(' ');
                        sb.Append(hanzi2);
                    }
                    if (hanzi1 != null || hanzi2 != null)
                    {
                        sb.Append(templateSenseHanziClose);
                    }

                    if (trz.Pinyin != null)
                    {
                        if (hanzi1 != null)
                        {
                            sb.Append(' ');
                        }
                        sb.Append('[');
                        sb.Append(escape(trz.GetPinyinInOne(true)));
                        sb.Append(']');
                    }
                }
                first = false;
            }
            return(sb.ToString());
        }
Esempio n. 10
0
 /// <summary>
 /// Ctor: read from binary stream.
 /// </summary>
 public CedictSense(BinReader br)
 {
     Domain = HybridText.Deserialize(br);
     Equiv = HybridText.Deserialize(br);
     Note = HybridText.Deserialize(br);
 }
Esempio n. 11
0
        /// <summary>
        /// Retrieves matching entries for a target-language search expression.
        /// </summary>
        private List<CedictResult> doTargetLookup(BinReader br, string query)
        {
            // Empty query string: no results
            query = query.Trim();
            if (query == string.Empty) return new List<CedictResult>();

            // Tokenize query string
            HybridText txtQuery = new HybridText(query);
            ReadOnlyCollection<EquivToken> txtTokenized = tokenizer.Tokenize(txtQuery);
            // Get query string's token IDs
            bool anyUnknown = false;
            HashSet<int> idSet = new HashSet<int>();
            foreach (EquivToken eqt in txtTokenized)
            {
                if (eqt.TokenId == WordHolder.IdUnknown || eqt.TokenId == index.WordHolder.IdZho)
                { anyUnknown = true; break; }
                idSet.Add(eqt.TokenId);
            }
            // Any unknown tokens - no match, we know that immediately
            List<CedictResult> res = new List<CedictResult>();
            if (anyUnknown) return res;
            // Collect IDs of tokenized senses that contain one or more of our query IDs
            Dictionary<int, SenseLookupInfo> senseTokenCounts = new Dictionary<int, SenseLookupInfo>();
            bool firstToken = true;
            // For each token...
            foreach (int tokenId in idSet)
            {
                // Get sense instances where it occurs
                List<SenseInfo> instances = index.SenseIndex[tokenId].GetOrLoadInstances(br);
                foreach (SenseInfo si in instances)
                {
                    SenseLookupInfo sli;
                    // We already have a count for this token ID
                    if (senseTokenCounts.ContainsKey(si.TokenizedSenseId))
                        ++senseTokenCounts[si.TokenizedSenseId].NumOfQueryTokensInSense;
                    // Or this is the first time we're seeing it
                    // We only record counts for the first token
                    // We're looking for senses that contain *all* query tokens
                    else if (firstToken)
                    {
                        sli = new SenseLookupInfo
                        {
                            NumOfQueryTokensInSense = 0,
                            TokensInSense = si.TokensInSense
                        };
                        senseTokenCounts[si.TokenizedSenseId] = sli;
                        ++sli.NumOfQueryTokensInSense;
                    }
                }
                firstToken = false;
            }
            // Keep those sense IDs (positions) that contain all of our query tokens
            // We already eliminated some candidates through "firstToken" trick before, but not all
            List<int> sensePosList = new List<int>();
            foreach (var x in senseTokenCounts)
            {
                if (x.Value.NumOfQueryTokensInSense == idSet.Count)
                    sensePosList.Add(x.Key);
            }
            // Load each tokenized sense to find out:
            // - whether entry is a real match
            // - entry ID
            // - best score for entry (multiple senses may hold query string)
            // - highlights
            Dictionary<int, EntryMatchInfo> entryIdToInfo = new Dictionary<int, EntryMatchInfo>();
            foreach (int senseId in sensePosList)
                doVerifyTarget(txtTokenized, senseId, entryIdToInfo, br);

            // Sort entry IDs by their best score
            // Drop entries with unprintable hanzi in HW now
            List<EntryMatchInfo> entryInfoList = new List<EntryMatchInfo>();
            foreach (var x in entryIdToInfo)
            {
                // Check coverage. Because we don't load full entry, it's possible
                // that some unsupported chars in hybrid text of senses slip through.
                // There's a limit to perfectionism.
                string simp, trad;
                br.Position = x.Value.EntryId;
                CedictEntry.DeserializeHanzi(br, out simp, out trad);
                if (!areHanziCovered(simp, trad)) continue;
                // Queue up for sorting.
                entryInfoList.Add(x.Value);
            }
            entryInfoList.Sort((a, b) => b.BestSenseScore.CompareTo(a.BestSenseScore));
            // Load entries, wrap into results
            foreach (EntryMatchInfo emi in entryInfoList)
            {
                CedictResult cr = new CedictResult(emi.EntryId,
                    new ReadOnlyCollection<CedictTargetHighlight>(emi.TargetHilites));
                res.Add(cr);
            }
            return res;
        }
Esempio n. 12
0
        /// <summary>
        /// Retrieves matching entries for a target-language search expression.
        /// </summary>
        private List <CedictResult> doTargetLookup(BinReader br, string query)
        {
            // Empty query string: no results
            query = query.Trim();
            if (query == string.Empty)
            {
                return(new List <CedictResult>());
            }

            // Tokenize query string
            HybridText txtQuery = new HybridText(query);
            ReadOnlyCollection <EquivToken> txtTokenized = tokenizer.Tokenize(txtQuery);
            // Get query string's token IDs
            bool          anyUnknown = false;
            HashSet <int> idSet      = new HashSet <int>();

            foreach (EquivToken eqt in txtTokenized)
            {
                if (eqt.TokenId == WordHolder.IdUnknown || eqt.TokenId == index.WordHolder.IdZho)
                {
                    anyUnknown = true; break;
                }
                idSet.Add(eqt.TokenId);
            }
            // Any unknown tokens - no match, we know that immediately
            List <CedictResult> res = new List <CedictResult>();

            if (anyUnknown)
            {
                return(res);
            }
            // Collect IDs of tokenized senses that contain one or more of our query IDs
            Dictionary <int, SenseLookupInfo> senseTokenCounts = new Dictionary <int, SenseLookupInfo>();
            bool firstToken = true;

            // For each token...
            foreach (int tokenId in idSet)
            {
                // Get sense instances where it occurs
                List <SenseInfo> instances = index.SenseIndex[tokenId].GetOrLoadInstances(br);
                foreach (SenseInfo si in instances)
                {
                    SenseLookupInfo sli;
                    // We already have a count for this token ID
                    if (senseTokenCounts.ContainsKey(si.TokenizedSenseId))
                    {
                        ++senseTokenCounts[si.TokenizedSenseId].NumOfQueryTokensInSense;
                    }
                    // Or this is the first time we're seeing it
                    // We only record counts for the first token
                    // We're looking for senses that contain *all* query tokens
                    else if (firstToken)
                    {
                        sli = new SenseLookupInfo
                        {
                            NumOfQueryTokensInSense = 0,
                            TokensInSense           = si.TokensInSense
                        };
                        senseTokenCounts[si.TokenizedSenseId] = sli;
                        ++sli.NumOfQueryTokensInSense;
                    }
                }
                firstToken = false;
            }
            // Keep those sense IDs (positions) that contain all of our query tokens
            // We already eliminated some candidates through "firstToken" trick before, but not all
            List <int> sensePosList = new List <int>();

            foreach (var x in senseTokenCounts)
            {
                if (x.Value.NumOfQueryTokensInSense == idSet.Count)
                {
                    sensePosList.Add(x.Key);
                }
            }
            // Load each tokenized sense to find out:
            // - whether entry is a real match
            // - entry ID
            // - best score for entry (multiple senses may hold query string)
            // - highlights
            Dictionary <int, EntryMatchInfo> entryIdToInfo = new Dictionary <int, EntryMatchInfo>();

            foreach (int senseId in sensePosList)
            {
                doVerifyTarget(txtTokenized, senseId, entryIdToInfo, br);
            }

            // Sort entry IDs by their best score
            // Drop entries with unprintable hanzi in HW now
            List <EntryMatchInfo> entryInfoList = new List <EntryMatchInfo>();

            foreach (var x in entryIdToInfo)
            {
                // Check coverage. Because we don't load full entry, it's possible
                // that some unsupported chars in hybrid text of senses slip through.
                // There's a limit to perfectionism.
                string simp, trad;
                br.Position = x.Value.EntryId;
                CedictEntry.DeserializeHanzi(br, out simp, out trad);
                if (!areHanziCovered(simp, trad))
                {
                    continue;
                }
                // Queue up for sorting.
                entryInfoList.Add(x.Value);
            }
            entryInfoList.Sort((a, b) => b.BestSenseScore.CompareTo(a.BestSenseScore));
            // Load entries, wrap into results
            foreach (EntryMatchInfo emi in entryInfoList)
            {
                CedictResult cr = new CedictResult(emi.EntryId,
                                                   new ReadOnlyCollection <CedictTargetHighlight>(emi.TargetHilites));
                res.Add(cr);
            }
            return(res);
        }
Esempio n. 13
0
        /// <summary>
        /// Breaks down body content into typographic blocks and caches the size of these.
        /// </summary>
        /// <param name="g">A Graphics object used for measurements.</param>
        private void doMeasureBlocks(Graphics g)
        {
            // Once measured, blocks don't change. Nothing to do then.
            if (measuredBlocks != null)
            {
                return;
            }

            // This is how we measure
            StringFormat sf = StringFormat.GenericTypographic;

            g.TextRenderingHint = System.Drawing.Text.TextRenderingHint.AntiAlias;

            // Decide about size of sense ID up front: that's always a square, letter-height
            SizeF  xSize         = g.MeasureString("x", getFont(fntSenseLatin), 65535, sf);
            ushort senseIdxWidth = (ushort)Math.Ceiling(xSize.Height);

            // Create array with as many items as senses
            // Each item is null, or highlight in sense's equiv
            CedictTargetHighlight[] hlArr = new CedictTargetHighlight[entry.SenseCount];
            foreach (CedictTargetHighlight hl in res.TargetHilites)
            {
                hlArr[hl.SenseIx] = hl;
            }

            // Recreate list of blocks
            List <Block> newBlocks = new List <Block>();
            // Collect links here. Will only keep at end if not empty.
            List <LinkArea> newLinks = new List <LinkArea>();

            int  senseIdx          = -1;
            int  displaySenseIdx   = -1;
            bool lastWasClassifier = false;

            foreach (CedictSense cm in entry.Senses)
            {
                ++senseIdx;
                // Is this sense a classifier?
                bool classifier = cm.Domain.EqualsPlainText("CL:");
                if (!classifier)
                {
                    ++displaySenseIdx;
                }
                // Add one block for sense ID, unless this is a classifier "sense"
                if (!classifier)
                {
                    Block sidBlock = new Block
                    {
                        Width              = senseIdxWidth,
                        StickRight         = true,
                        TextPos            = textPool.PoolString(getSenseIdString(displaySenseIdx)),
                        NewLine            = lastWasClassifier,
                        SenseId            = true,
                        FirstInCedictSense = true,
                    };
                    newBlocks.Add(sidBlock);
                }
                // Split domain, equiv and note into typographic parts
                // Splits along spaces and dashes
                // Unpacks Chinese ranges
                // Domain is localized text for "Classifier:" if, well, this is a classifier sense
                int startIX = newBlocks.Count;
                if (!classifier)
                {
                    makeBlocks(cm.Domain, true, null, newBlocks, newLinks);
                }
                else
                {
                    string     strClassifier = tprov.GetString("ResultCtrlClassifier");
                    HybridText htClassifier  = new HybridText(strClassifier);
                    int        ix            = newBlocks.Count;
                    makeBlocks(htClassifier, true, null, newBlocks, newLinks);
                    Block xb = newBlocks[ix];
                    xb.NewLine    = true;
                    newBlocks[ix] = xb;
                }
                makeBlocks(cm.Equiv, false, hlArr[senseIdx], newBlocks, newLinks);
                makeBlocks(cm.Note, true, null, newBlocks, newLinks);
                // If sense is a classifier, mark first block as sense starter
                if (classifier)
                {
                    Block sstart = newBlocks[startIX];
                    sstart.FirstInCedictSense = true;
                    newBlocks[startIX]        = sstart;
                }
                // Measure each block
                for (int i = startIX; i != newBlocks.Count; ++i)
                {
                    Block tb      = newBlocks[i];
                    bool  isHanzi = !(tb.FontIdx == fntMetaLatin || tb.FontIdx == fntSenseLatin);
                    SizeF sz;
                    if (!isHanzi)
                    {
                        sz = g.MeasureString(textPool.GetString(tb.TextPos), getFont(tb.FontIdx), 65535, sf);
                    }
                    else
                    {
                        sz = HanziRenderer.MeasureString(g, Magic.ZhoContentFontFamily, textPool.GetString(tb.TextPos), Magic.LemmaHanziFontSize);
                    }
                    tb.Width     = (ushort)Math.Round(sz.Width);
                    newBlocks[i] = tb;
                }
                lastWasClassifier = classifier;
            }
            if (newLinks.Count != 0)
            {
                targetLinks = newLinks;
            }
            measuredBlocks = newBlocks.ToArray();
        }
Esempio n. 14
0
 /// <summary>
 /// Converts a hybrid text to CEDICT-formatted plain text (marking up hanzi+pinyin sections).
 /// </summary>
 public static string HybridToCedict(HybridText ht)
 {
     StringBuilder sb = new StringBuilder();
     bool first = true;
     for (int i = 0; i != ht.RunCount; ++i)
     {
         TextRun tr = ht.GetRunAt(i);
         if (tr is TextRunLatin)
         {
             string strRun = tr.GetPlainText();
             if (!first && strRun != string.Empty && !char.IsPunctuation(strRun[0])) sb.Append(' ');
             sb.Append(strRun);
         }
         else
         {
             if (!first) sb.Append(' ');
             TextRunZho trz = tr as TextRunZho;
             if (!string.IsNullOrEmpty(trz.Simp)) sb.Append(trz.Simp);
             if (trz.Trad != trz.Simp && !string.IsNullOrEmpty(trz.Trad))
             {
                 sb.Append('|');
                 sb.Append(trz.Trad);
             }
             if (trz.Pinyin != null)
             {
                 sb.Append('[');
                 sb.Append(GetPinyinCedict(trz.Pinyin));
                 sb.Append(']');
             }
         }
         first = false;
     }
     return sb.ToString();
 }
Esempio n. 15
0
        /// <summary>
        /// Parses an entry (line) that has been separated into headword and rest.
        /// </summary>
        private static CedictEntry parseEntry(string strHead, string strBody, StreamWriter logStream, int lineNum)
        {
            // Decompose head
            Match hm = reHead.Match(strHead);

            if (!hm.Success)
            {
                string msg = "Line {0}: ERROR: Invalid header syntax: {1}";
                msg = string.Format(msg, lineNum, strHead);
                if (logStream != null)
                {
                    logStream.WriteLine(msg);
                }
                return(null);
            }

            // Split pinyin by spaces
            string[] pinyinParts = hm.Groups[3].Value.Split(new char[] { ' ' });

            // Convert pinyin to our normalized format
            PinyinSyllable[] pinyinSylls;
            List <int>       pinyinMap;

            normalizePinyin(pinyinParts, out pinyinSylls, out pinyinMap);
            // Weird syllables found > warning
            if (Array.FindIndex(pinyinSylls, x => x.Tone == -1) != -1)
            {
                string msg = "Line {0}: Warning: Weird pinyin syllable: {1}";
                msg = string.Format(msg, lineNum, strHead);
                if (logStream != null)
                {
                    logStream.WriteLine(msg);
                }
            }
            // Trad and simp MUST have same # of chars, always
            if (hm.Groups[1].Value.Length != hm.Groups[2].Value.Length)
            {
                string msg = "Line {0}: ERROR: Trad/simp char count mismatch: {1}";
                msg = string.Format(msg, lineNum, strHead);
                if (logStream != null)
                {
                    logStream.WriteLine(msg);
                }
                return(null);
            }
            // Transform map so it says, for each hanzi, which pinyin syllable it corresponds to
            // Some chars in hanzi may have no pinyin: when hanzi includes a non-ideagraphic character
            short[] hanziToPinyin = transformPinyinMap(hm.Groups[1].Value, pinyinMap);
            // Headword MUST have same number of ideo characters as non-weird pinyin syllables
            if (hanziToPinyin == null)
            {
                string msg = "Line {0}: Warning: Failed to match hanzi to pinyin: {1}";
                msg = string.Format(msg, lineNum, strHead);
                if (logStream != null)
                {
                    logStream.WriteLine(msg);
                }
            }
            // Split meanings by slash
            string[]      meaningsRaw = strBody.Split(new char[] { '/' });
            List <string> meanings    = new List <string>();

            foreach (string s in meaningsRaw)
            {
                if (s.Trim() != "")
                {
                    meanings.Add(s.Trim());
                }
            }
            if (meaningsRaw.Length != meanings.Count)
            {
                string msg = "Line {0}: Warning: Empty sense in entry: {1}";
                msg = string.Format(msg, lineNum, strBody);
                if (logStream != null)
                {
                    logStream.WriteLine(msg);
                }
            }
            // At least one meaning!
            if (meanings.Count == 0)
            {
                string msg = "Line {0}: ERROR: No sense: {1}";
                msg = string.Format(msg, lineNum, strBody);
                if (logStream != null)
                {
                    logStream.WriteLine(msg);
                }
                return(null);
            }
            // Separate domain, equiv and not in each sense
            List <CedictSense> cedictSenses = new List <CedictSense>();

            foreach (string s in meanings)
            {
                string domain, equiv, note;
                trimSense(s, out domain, out equiv, out note);
                // Equiv is empty: merits at least a warning
                if (equiv == "")
                {
                    string msg = "Line {0}: Warning: No equivalent in sense, only domain/notes: {1}";
                    msg = string.Format(msg, lineNum, s);
                    if (logStream != null)
                    {
                        logStream.WriteLine(msg);
                    }
                }
                // Convert all parts of sense to hybrid text
                HybridText hDomain = plainTextToHybrid(domain, lineNum, logStream);
                HybridText hEquiv  = plainTextToHybrid(equiv, lineNum, logStream);
                HybridText hNote   = plainTextToHybrid(note, lineNum, logStream);
                // Store new sense - unless we failed to parse anything properly
                if (hDomain != null && hEquiv != null && hNote != null)
                {
                    cedictSenses.Add(new CedictSense(hDomain, hEquiv, hNote));
                }
            }
            // If there are no senses, we failed. But that will have been logged before, so just return null.
            if (cedictSenses.Count == 0)
            {
                return(null);
            }
            // Done with entry
            CedictEntry res = new CedictEntry(hm.Groups[2].Value, hm.Groups[1].Value,
                                              new ReadOnlyCollection <PinyinSyllable>(pinyinSylls),
                                              new ReadOnlyCollection <CedictSense>(cedictSenses),
                                              hanziToPinyin);

            return(res);
        }
Esempio n. 16
0
 /// <summary>
 /// Returns true if display font covers all Hanzi in hybrid text; false otherwise.
 /// </summary>
 private bool areHanziCovered(HybridText ht)
 {
     if (ht.IsEmpty) return true;
     for (int i = 0; i != ht.RunCount; ++i)
     {
         TextRun tr = ht.GetRunAt(i);
         TextRunZho trJoe = tr as TextRunZho;
         if (trJoe == null) continue;
         if (trJoe.Simp == null) continue;
         foreach (char c in trJoe.Simp)
             if (!cvr.GetCoverage(c).HasFlag(FontCoverageFlags.Simp))
                 return false;
         if (trJoe.Trad == null) continue;
         foreach (char c in trJoe.Trad)
             if (!cvr.GetCoverage(c).HasFlag(FontCoverageFlags.Trad))
                 return false;
     }
     return true;
 }
Esempio n. 17
0
        private void renderEquiv(HtmlTextWriter writer, HybridText equiv, CedictTargetHighlight hl, bool nobr)
        {
            HybridTextConsumer htc = new HybridTextConsumer(equiv, hl);
            bool firstWordOver     = false;
            bool hlOn = false;
            char c;
            bool inHL;

            while (true)
            {
                htc.GetNext(out c, out inHL);
                if (c == (char)0)
                {
                    break;
                }
                // Highlight starts?
                if (inHL && !hlOn)
                {
                    // Very first word gets special highlight if hilite goes beyond first space, and we're in nobr mode
                    if (!firstWordOver && nobr && htc.IsNextSpaceInHilite())
                    {
                        writer.AddAttribute(HtmlTextWriterAttribute.Class, "sense-hl-start");
                        writer.RenderBeginTag(HtmlTextWriterTag.Span);
                    }
                    // Plain old hilite start everywhere else
                    else
                    {
                        writer.AddAttribute(HtmlTextWriterAttribute.Class, "sense-hl");
                        writer.RenderBeginTag(HtmlTextWriterTag.Span);
                    }
                    hlOn = true;
                }
                // Highlight ends?
                else if (!inHL && hlOn)
                {
                    writer.RenderEndTag();
                    hlOn = false;
                }
                // Space - close "nobr" span if first word's just over
                if (c == ' ' && !firstWordOver && nobr)
                {
                    firstWordOver = true;
                    writer.RenderEndTag();
                    if (hlOn)
                    {
                        writer.RenderEndTag();
                        writer.AddAttribute(HtmlTextWriterAttribute.Class, "sense-hl-end");
                        writer.RenderBeginTag(HtmlTextWriterTag.Span);
                    }
                }
                // Render character
                writer.WriteEncodedText(c.ToString());
            }
            // Close hilite and nobr that we may have open
            if (!firstWordOver && nobr)
            {
                writer.RenderEndTag();
            }
            if (hlOn)
            {
                writer.RenderEndTag();
            }
        }
Esempio n. 18
0
 public HybridTextConsumer(HybridText txt, CedictTargetHighlight hl)
 {
     this.txt = txt;
     this.hl  = hl;
     runTxt   = txt.GetRunAt(0).GetPlainText();
 }