Exemple #1
0
        /// <summary>
        /// <para>Parses embedded Chinese ranges in string to create hybrid text with mixed runs.</para>
        /// </summary>
        /// <param name="str">String to parse. Can be null or empty.</param>
        /// <param name="lineNum">Line number in input file (to log errors/warnings).</param>
        /// <param name="logStream">Log stream (to log errors/warnings).</param>
        /// <returns>The input's hybrid text representation, or null if we failed to parse.</returns>
        private static HybridText plainTextToHybrid(string str, int lineNum, StreamWriter logStream)
        {
            if (string.IsNullOrEmpty(str))
            {
                return(HybridText.Empty);
            }

            // Start with a single run, assumed to be pure Latin
            List <TextRun> runs = new List <TextRun>();

            runs.Add(new TextRunLatin(str));

            // Go over each run and replace it with one or more runs, as per result of inreasingly more narrow matchers
            runs = splitRuns(runs, reSTP, new MatchTranslatorSTP());
            runs = splitRuns(runs, reSP, new MatchTranslatorSP());
            runs = splitRuns(runs, reST, new MatchTranslatorST());
            runs = splitRuns(runs, reS, new MatchTranslatorS());
            runs = splitRuns(runs, reP, new MatchTranslatorP());

            // Sanity checks
            foreach (TextRun tr in runs)
            {
                TextRunLatin trl = tr as TextRunLatin;
                if (trl == null)
                {
                    continue;
                }
                // Must not have | or hanzi in Latin
                string rtext = trl.GetPlainText();
                if (rtext.Contains("|") || hasIdeo(rtext))
                {
                    string msg = "Line {0}: ERROR: Failed to convert sense to hybrid text: {1}";
                    msg = string.Format(msg, lineNum, str);
                    if (logStream != null)
                    {
                        logStream.WriteLine(msg);
                    }
                    return(null);
                }
            }

            // Done.
            return(new HybridText(new ReadOnlyCollection <TextRun>(runs)));
        }
Exemple #2
0
            /// <summary>
            /// Extract normalized tokens from Latin text run; add each to set.
            /// </summary>
            private void getTokens(TextRunLatin tr, HashSet <string> tokens)
            {
                string str = tr.GetPlainText();

                string[] parts = str.Split(new char[] { ' ', '-' });
                foreach (string wd in parts)
                {
                    string x = wd.Trim(trimPuncChars);
                    if (x == string.Empty)
                    {
                        continue;
                    }
                    if (reNumbers.IsMatch(x))
                    {
                        x = "*num*";
                    }
                    x = x.ToLowerInvariant();
                    tokens.Add(x);
                }
            }
 /// <summary>
 /// Extract normalized tokens from Latin text run; add each to set.
 /// </summary>
 private void getTokens(TextRunLatin tr, HashSet<string> tokens)
 {
     string str = tr.GetPlainText();
     string[] parts = str.Split(new char[] { ' ', '-' });
     foreach (string wd in parts)
     {
         string x = wd.Trim(trimPuncChars);
         if (x == string.Empty) continue;
         if (reNumbers.IsMatch(x)) x = "*num*";
         x = x.ToLowerInvariant();
         tokens.Add(x);
     }
 }
        /// <summary>
        /// <para>Produces unmeasured display blocks from a single hybrid text. Marks highlights, if any.</para>
        /// <para>Does not fill in blocks' size, but fills in everything else.</para>
        /// </summary>
        /// <param name="htxt">Hybrid text to break down into blocks and measure.</param>
        /// <param name="isMeta">True if this is a domain or note (displayed in italics).</param>
        /// <param name="hl">Highlight to show in hybrid text, or null.</param>
        /// <param name="blocks">List of blocks to append to.</param>
        /// <param name="links">List to gather links (appending to list).</param>
        private void makeBlocks(HybridText htxt, bool isMeta, CedictTargetHighlight hl,
                                List <Block> blocks, List <LinkArea> links)
        {
            byte fntIdxLatin   = isMeta ? fntMetaLatin : fntSenseLatin;
            byte fntIdxZhoSimp = isMeta ? fntMetaHanziSimp : fntSenseHanziSimp;
            byte fntIdxZhoTrad = isMeta ? fntMetaHanziTrad : fntSenseHanziTrad;

            // Go run by run
            for (int runIX = 0; runIX != htxt.RunCount; ++runIX)
            {
                TextRun run = htxt.GetRunAt(runIX);
                // Latin run: split by spaces first
                if (run is TextRunLatin)
                {
                    string[] bySpaces = run.GetPlainText().Split(new char[] { ' ' });
                    // Each word: also by dash
                    int latnPos = 0;
                    foreach (string str in bySpaces)
                    {
                        string[] byDashes = splitByDash(str);
                        // Add block for each
                        int ofsPos = 0;
                        foreach (string blockStr in byDashes)
                        {
                            Block tb = new Block
                            {
                                TextPos    = textPool.PoolString(blockStr),
                                FontIdx    = fntIdxLatin,
                                SpaceAfter = false, // will set this true for last block in "byDashes"
                            };
                            // Does block's text intersect with highlight?
                            if (hl != null && hl.RunIx == runIX)
                            {
                                int blockStart = latnPos + ofsPos;
                                int blockEnd   = blockStart + blockStr.Length;
                                if (blockStart >= hl.HiliteStart && blockStart < hl.HiliteStart + hl.HiliteLength)
                                {
                                    tb.Hilite = true;
                                }
                                else if (blockEnd > hl.HiliteStart && blockEnd <= hl.HiliteStart + hl.HiliteLength)
                                {
                                    tb.Hilite = true;
                                }
                                else if (blockStart < hl.HiliteStart && blockEnd >= hl.HiliteStart + hl.HiliteLength)
                                {
                                    tb.Hilite = true;
                                }
                            }
                            blocks.Add(tb);
                            // Keep track of position for highlight
                            ofsPos += blockStr.Length;
                        }
                        // Make sure last one is followed by space
                        Block xb = blocks[blocks.Count - 1];
                        xb.SpaceAfter            = true;
                        blocks[blocks.Count - 1] = xb;
                        // Keep track of position in text - for highlights
                        latnPos += str.Length + 1;
                    }
                }
                // Chinese: depends on T/S/Both display mode, and on available info
                else
                {
                    TextRunZho zhoRun = run as TextRunZho;
                    // Chinese range is made up of:
                    // Simplified (empty string if only traditional requested)
                    // Separator (if both simplified and traditional are requested)
                    // Traditional (empty string if only simplified requested)
                    // Pinyin with accents as tone marks, in brackets (if present)
                    string strSimp = string.Empty;
                    if (analyzedScript != SearchScript.Traditional && zhoRun.Simp != null)
                    {
                        strSimp = zhoRun.Simp;
                    }
                    string strTrad = string.Empty;
                    if (analyzedScript != SearchScript.Simplified && zhoRun.Trad != null)
                    {
                        strTrad = zhoRun.Trad;
                    }
                    string strPy = string.Empty;
                    // Convert pinyin to display format (tone marks as diacritics; r5 glued)
                    if (zhoRun.Pinyin != null)
                    {
                        strPy = "[" + zhoRun.GetPinyinInOne(true) + "]";
                    }

                    // Create link area, with query string
                    string strPyNumbers = string.Empty; // Pinyin with numbers as tone marks
                    if (zhoRun.Pinyin != null)
                    {
                        strPyNumbers = zhoRun.GetPinyinRaw();
                    }
                    LinkArea linkArea = new LinkArea(strSimp, strTrad, strPyNumbers, analyzedScript);

                    // Block for simplified, if present
                    if (strSimp != string.Empty)
                    {
                        Block tb = new Block
                        {
                            TextPos    = textPool.PoolString(strSimp),
                            FontIdx    = fntIdxZhoSimp,
                            SpaceAfter = true,
                        };
                        blocks.Add(tb);
                        linkArea.BlockIds.Add(blocks.Count - 1);
                    }
                    // Separator if both simplified and traditional are there
                    // AND they are different...
                    if (strSimp != string.Empty && strTrad != string.Empty && strSimp != strTrad)
                    {
                        Block xb = blocks[blocks.Count - 1];
                        xb.StickRight            = true;
                        blocks[blocks.Count - 1] = xb;
                        Block tb = new Block
                        {
                            TextPos    = textPool.PoolString("•"),
                            FontIdx    = fntIdxLatin,
                            SpaceAfter = true,
                        };
                        blocks.Add(tb);
                        linkArea.BlockIds.Add(blocks.Count - 1);
                    }
                    // Traditional, if present
                    if (strTrad != string.Empty && strTrad != strSimp)
                    {
                        Block tb = new Block
                        {
                            TextPos    = textPool.PoolString(strTrad),
                            FontIdx    = fntIdxZhoTrad,
                            SpaceAfter = true,
                        };
                        blocks.Add(tb);
                        linkArea.BlockIds.Add(blocks.Count - 1);
                    }
                    // Pinyin, if present
                    if (strPy != string.Empty)
                    {
                        // Split by spaces
                        string[] pyParts = strPy.Split(new char[] { ' ' });
                        foreach (string pyPart in pyParts)
                        {
                            Block tb = new Block
                            {
                                TextPos    = textPool.PoolString(pyPart),
                                FontIdx    = fntIdxLatin,
                                SpaceAfter = true,
                            };
                            blocks.Add(tb);
                            linkArea.BlockIds.Add(blocks.Count - 1);
                        }
                    }
                    // Last part will have requested a space after.
                    // Look ahead and if next text run is Latin and starts with punctuation, make it stick
                    TextRunLatin nextLatinRun = null;
                    if (runIX + 1 < htxt.RunCount)
                    {
                        nextLatinRun = htxt.GetRunAt(runIX + 1) as TextRunLatin;
                    }
                    if (nextLatinRun != null && char.IsPunctuation(nextLatinRun.GetPlainText()[0]))
                    {
                        Block xb = blocks[blocks.Count - 1];
                        xb.SpaceAfter            = false;
                        blocks[blocks.Count - 1] = xb;
                    }
                    // Collect link area
                    links.Add(linkArea);
                }
            }
        }