示例#1
0
        /// <summary>
        /// Parses the given <paramref name="rtfText"/>
        /// </summary>
        /// <param name="rtfText">text</param>
        public void ParseRtfText(string rtfText)
        {
            HtmlContent = null;
            var stringBuilder            = new StringBuilder();
            var htmlExtraction           = false;
            var rtfContaintsEmbeddedHtml = false;
            var hexBuffer = string.Empty;

            using (var stringReader = new StringReader(rtfText))
                using (var reader = new Reader(stringReader))
                {
                    while (reader.ReadToken() != null)
                    {
                        if (reader.LastToken?.Key == "'" &&
                            reader?.Keyword != "'" &&
                            hexBuffer != string.Empty &&
                            !RuntimeEncoding.IsSingleByte)
                        {
                            var value = HexValueToChar(hexBuffer);
                            if (value != null)
                            {
                                stringBuilder.Append(value);
                            }
                            else
                            {
                                // Double byte charset was detected for the last token but only one byte was used so far.
                                // This token should carry the second byte but it doesn't.
                                // Workaround: To display it anyway, we treat it as a single byte char.
                                var buff = new[] { byte.Parse(hexBuffer, NumberStyles.HexNumber) };
                                stringBuilder.Append(RuntimeEncoding.GetString(buff));
                            }

                            hexBuffer = string.Empty;

                            if (reader.TokenType == RtfTokenType.Text)
                            {
                                stringBuilder.Append(reader.Keyword);
                                continue;
                            }
                        }

                        switch (reader.Keyword)
                        {
                        case Consts.Ansi:
                            break;

                        case Consts.Ansicpg:
                            // Read default encoding
                            _defaultEncoding = Encoding.GetEncoding(reader.Parameter);
                            break;

                        case Consts.Info:
                            // Read document information
                            ReadDocumentInfo(reader);
                            return;

                        case Consts.FromHtml:
                            rtfContaintsEmbeddedHtml = true;
                            htmlExtraction           = true;
                            break;

                        case Consts.Generator:
                            // Read document generator
                            Generator = ReadInnerText(reader, true);
                            break;

                        case Consts.FormatConverter:
                            FormatConverter = ReadInnerText(reader, true);
                            break;

                        case Consts.Fonttbl:
                            // Read font table
                            ReadFontTable(reader);
                            break;

                        case Consts.F:
                            if (reader.TokenType == RtfTokenType.Text)
                            {
                                goto default;
                            }

                            try
                            {
                                _fontChartset = FontTable[reader.Parameter].Encoding;
                            }
                            catch
                            {
                                _fontChartset = Encoding.Default;
                            }

                            break;

                        case Consts.Af:
                            _associateFontChartset = FontTable[reader.Parameter].Encoding;
                            break;

                        case Consts.HtmlRtf:

                            switch (reader.HasParam)
                            {
                            case false:
                                htmlExtraction = true;
                                break;

                            case true when reader.Parameter == 0:
                                htmlExtraction = false;
                                break;
                            }

                            break;

                        case Consts.MHtmlTag:
                            if (reader.HasParam && reader.Parameter == 0)
                            {
                                htmlExtraction = false;
                            }
                            else
                            {
                                if (hexBuffer != string.Empty)
                                {
                                    var buff = new[] { byte.Parse(hexBuffer, NumberStyles.HexNumber) };
                                    hexBuffer = string.Empty;
                                    stringBuilder.Append(RuntimeEncoding.GetString(buff));
                                    htmlExtraction = true;
                                }
                                else
                                {
                                    htmlExtraction = false;
                                }
                            }

                            break;

                        case Consts.HtmlTag:
                        {
                            if (reader.InnerReader.Peek() == ' ')
                            {
                                reader.InnerReader.Read();
                            }

                            var text = ReadInnerText(reader, null, true, false, true);

                            if (!string.IsNullOrEmpty(text))
                            {
                                stringBuilder.Append(text);
                            }

                            break;
                        }

                        case Consts.HtmlBase:
                        {
                            var text = ReadInnerText(reader, null, true, false, true);

                            if (!string.IsNullOrEmpty(text))
                            {
                                stringBuilder.Append(text);
                            }

                            break;
                        }

                        case Consts.Background:
                        case Consts.Fillcolor:
                        case Consts.Field:
                            ReadInnerText(reader, null, false, true, false);
                            break;

                        //case Consts.Par:
                        //case Consts.Line:
                        //    stringBuilder.Append(Environment.NewLine);
                        //    break;

                        case Consts.Tab:
                            stringBuilder.Append("\t");
                            break;

                        case Consts.Lquote:
                            stringBuilder.Append("&lsquo;");
                            break;

                        case Consts.Rquote:
                            stringBuilder.Append("&rsquo;");
                            break;

                        case Consts.LdblQuote:
                            stringBuilder.Append("&ldquo;");
                            break;

                        case Consts.RdblQuote:
                            stringBuilder.Append("&rdquo;");
                            break;

                        case Consts.Bullet:
                            stringBuilder.Append("&bull;");
                            break;

                        case Consts.Endash:
                            stringBuilder.Append("&ndash;");
                            break;

                        case Consts.Emdash:
                            stringBuilder.Append("&mdash;");
                            break;

                        case Consts.Tilde:
                            stringBuilder.Append("&nbsp;");
                            break;

                        case Consts.Underscore:
                            stringBuilder.Append("&shy;");
                            break;

                        case Consts.Pntext:
                            reader.ReadToEndOfGroup();
                            break;

                        case Consts.U:

                            //if (reader.TokenType == RtfTokenType.Control && !htmlExtraction)
                            //{
                            //    stringBuilder.Append(HttpUtility.UrlDecode("*", _defaultEncoding));
                            //    continue;
                            //}

                            if (reader.Parameter.ToString().StartsWith("c", StringComparison.InvariantCultureIgnoreCase))
                            {
                                throw new Exception("\\uc parameter not yet supported, please contact the developer on GitHub");
                            }

                            if (reader.Parameter.ToString().StartsWith("-"))
                            {
                                // The Unicode standard permanently reserves these code point values for
                                // UTF-16 encoding of the high and low surrogates
                                // U+D800 to U+DFFF
                                // 55296  -  57343

                                var value = 65536 + int.Parse(reader.Parameter.ToString());

                                if (value >= 0xD800 && value <= 0xDFFF)
                                {
                                    if (!reader.ParsingHighLowSurrogate)
                                    {
                                        reader.ParsingHighLowSurrogate = true;
                                        reader.HighSurrogateValue      = value;
                                    }
                                    else
                                    {
                                        var combined = ((reader.HighSurrogateValue - 0xD800) << 10) + (value - 0xDC00) + 0x10000;
                                        stringBuilder.Append($"&#{combined};");
                                        reader.ParsingHighLowSurrogate = false;
                                        reader.HighSurrogateValue      = null;
                                    }
                                }
                                else
                                {
                                    reader.ParsingHighLowSurrogate = false;
                                    stringBuilder.Append($"&#{value};");
                                }
                            }
                            else
                            {
                                stringBuilder.Append($"&#{reader.Parameter};");
                            }

                            break;

                        case Consts.Apostrophe:
                            if (reader.TokenType != RtfTokenType.Control || htmlExtraction)
                            {
                                continue;
                            }

                            // Convert HEX value directly when we have a single byte charset
                            if (RuntimeEncoding.IsSingleByte)
                            {
                                if (string.IsNullOrEmpty(hexBuffer))
                                {
                                    hexBuffer = reader.CurrentToken.Hex;
                                }

                                var buff = new[] { byte.Parse(hexBuffer, NumberStyles.HexNumber) };
                                hexBuffer = string.Empty;
                                stringBuilder.Append(RuntimeEncoding.GetString(buff));
                            }
                            else
                            {
                                // If we have a double byte charset like Chinese then store the value and wait for the next HEX value
                                if (hexBuffer == string.Empty)
                                {
                                    hexBuffer = reader.CurrentToken.Hex;
                                }
                                else
                                {
                                    // Append the second HEX value and convert it
                                    var buff = new[]
                                    {
                                        byte.Parse(hexBuffer, NumberStyles.HexNumber),
                                        byte.Parse(reader.CurrentToken.Hex, NumberStyles.HexNumber)
                                    };

                                    stringBuilder.Append(RuntimeEncoding.GetString(buff));

                                    // Empty the HEX buffer
                                    hexBuffer = string.Empty;
                                }
                            }

                            break;

                        default:

                            switch (reader.TokenType)
                            {
                            //case RtfTokenType.GroupEnd:
                            //    htmlExtraction = false;
                            //    break;

                            case RtfTokenType.Text:
                                if (!htmlExtraction)
                                {
                                    stringBuilder.Append(reader.Keyword);
                                }
                                break;
                            }

                            break;
                        }
                    }
                }

            if (rtfContaintsEmbeddedHtml)
            {
                HtmlContent = stringBuilder.ToString();
            }
        }
示例#2
0
        /// <summary>
        /// Reads the font table
        /// </summary>
        /// <param name="reader"></param>
        private void ReadFontTable(Reader reader)
        {
            FontTable.Clear();

            while (reader.ReadToken() != null)
            {
                if (reader.TokenType == RtfTokenType.GroupEnd)
                {
                    break;
                }

                if (reader.TokenType != RtfTokenType.GroupStart)
                {
                    continue;
                }

                var    index   = -1;
                string name    = null;
                var    charset = 1;
                var    nilFlag = false;

                while (reader.ReadToken() != null)
                {
                    if (reader.TokenType == RtfTokenType.GroupEnd)
                    {
                        break;
                    }

                    if (reader.TokenType == RtfTokenType.GroupStart)
                    {
                        // If we meet nested levels, then ignore
                        reader.ReadToken();
                        reader.ReadToEndOfGroup();
                        reader.ReadToken();
                    }
                    else
                    {
                        switch (reader.Keyword)
                        {
                        case "f" when reader.HasParam:
                            index = reader.Parameter;
                            break;

                        case "fnil":
                            name    = SystemFonts.DefaultFont.Name;
                            nilFlag = true;
                            break;

                        case Consts.Fcharset:
                            charset = reader.Parameter;
                            break;

                        default:
                            if (reader.CurrentToken.IsTextToken)
                            {
                                name = ReadInnerText(reader, reader.CurrentToken, false, false, false);

                                if (name != null)
                                {
                                    name = name.Trim();

                                    if (name.EndsWith(";"))
                                    {
                                        name = name.Substring(0, name.Length - 1);
                                    }
                                }
                            }

                            break;
                        }
                    }
                }

                if (index < 0 || name == null)
                {
                    continue;
                }

                if (name.EndsWith(";"))
                {
                    name = name.Substring(0, name.Length - 1);
                }

                name = name.Trim();

                if (string.IsNullOrEmpty(name))
                {
                    name = SystemFonts.DefaultFont.Name;
                }

                var font = new Font(index, name)
                {
                    Charset = charset, NilFlag = nilFlag
                };
                FontTable.Add(font);
            }
        }