bool IgnoredDelimiterIsRequired(RtfTokenizer tokenizer, RtfToken token, RtfToken previous)
    {
        // Word inserts required delimiters when required, and optional delimiters for beautification
        // and readability.  Strip the optional delimiters while retaining the required ones.
        if (previous.Type != RtfTokenType.ControlWord)
        {
            return(false);
        }
        var current = tokenizer.Current;

        try
        {
            while (tokenizer.MoveNext())
            {
                var next     = tokenizer.Current;
                var canMerge = CanMergeToControlWord(previous, next);
                if (canMerge == null)
                {
                    continue;
                }
                return(canMerge.Value);
            }
        }
        finally
        {
            tokenizer.MoveTo(current);
        }
        return(false);
    }
    public string GetNormalizedString()
    {
        StringBuilder sb        = new StringBuilder();
        var           tokenizer = new RtfTokenizer(Rtf);
        RtfToken      previous  = RtfToken.None;

        while (tokenizer.MoveNext())
        {
            previous = AddCurrentToken(tokenizer, sb, previous);
        }
        return(sb.ToString());
    }
    private void AddControlWord(RtfTokenizer tokenizer, RtfToken token, StringBuilder sb, RtfToken previous)
    {
        // Carriage return, usually ignored.
        // Rich Text Format (RTF) Specification, Version 1.9.1, p 151:
        // RTF writers should not use the carriage return/line feed (CR/LF) combination to break up pictures
        // in binary format. If they do, the CR/LF combination is treated as literal text and considered part of the picture data.
        AddContent(tokenizer, token, sb, previous);
        int binaryLength;

        if (IsBinaryToken(token, out binaryLength))
        {
            if (tokenizer.MoveFixedLength(binaryLength))
            {
                AddContent(tokenizer, tokenizer.Current, sb, previous);
            }
        }
    }
    private RtfToken AddCurrentToken(RtfTokenizer tokenizer, StringBuilder sb, RtfToken previous)
    {
        var token = tokenizer.Current;

        switch (token.Type)
        {
        case RtfTokenType.None:
            break;

        case RtfTokenType.StartGroup:
            AddPushGroup(tokenizer, token, sb, previous);
            break;

        case RtfTokenType.EndGroup:
            AddPopGroup(tokenizer, token, sb, previous);
            break;

        case RtfTokenType.ControlWord:
            AddControlWord(tokenizer, token, sb, previous);
            break;

        case RtfTokenType.ControlSymbol:
            AddControlSymbol(tokenizer, token, sb, previous);
            break;

        case RtfTokenType.IgnoredDelimiter:
            AddIgnoredDelimiter(tokenizer, token, sb, previous);
            break;

        case RtfTokenType.CRLF:
            AddCarriageReturn(tokenizer, token, sb, previous);
            break;

        case RtfTokenType.Content:
            AddContent(tokenizer, token, sb, previous);
            break;

        default:
            Debug.Assert(false, "Unknown token type " + token.ToString());
            break;
        }
        return(token);
    }
    private static bool?CanMergeToControlWord(RtfToken previous, RtfToken next)
    {
        if (previous.Type != RtfTokenType.ControlWord)
        {
            throw new ArgumentException();
        }
        if (next.Type == RtfTokenType.CRLF)
        {
            return(null);    // Can't tell
        }
        if (next.Type != RtfTokenType.Content)
        {
            return(false);
        }
        if (previous.Length < 2)
        {
            return(false);    // Internal error?
        }
        if (next.Length < 1)
        {
            return(null);    // Internal error?
        }
        var lastCh = previous.Rtf[previous.StartIndex + previous.Length - 1];
        var nextCh = next.Rtf[next.StartIndex];

        if (RtfTokenizer.IsAsciiLetter(lastCh))
        {
            return(RtfTokenizer.IsAsciiLetter(nextCh) || RtfTokenizer.IsAsciiMinus(nextCh) || RtfTokenizer.IsAsciiDigit(nextCh));
        }
        else if (RtfTokenizer.IsAsciiMinus(lastCh))
        {
            return(RtfTokenizer.IsAsciiDigit(nextCh));
        }
        else if (RtfTokenizer.IsAsciiDigit(lastCh))
        {
            return(RtfTokenizer.IsAsciiDigit(nextCh));
        }
        else
        {
            Debug.Assert(false, "unknown final character for control word token \"" + previous.ToString() + "\"");
            return(false);
        }
    }
 private void AddIgnoredDelimiter(RtfTokenizer tokenizer, RtfToken token, StringBuilder sb, RtfToken previous)
 {
     // Rich Text Format (RTF) Specification, Version 1.9.1, p 151:
     // an RTF file does not have to contain any carriage return/line feed pairs (CRLFs) and CRLFs should be ignored by RTF readers except that
     // they can act as control word delimiters. RTF files are more readable when CRLFs occur at major group boundaries.
     //
     // but then later:
     //
     // If a single space delimits the control word, the space does not appear in the document (it’s ignored). Any characters following the single space delimiter, including any subsequent spaces,
     // will appear as text or spaces in the document. For this reason, you should use spaces only where necessary. It is recommended to avoid spaces as a means of breaking up RTF syntax to make
     // it easier to read. You can use paragraph marks (CR, LF, or CRLF) to break up lines without changing the meaning except in destinations that contain \binN.
     // In this document, a control word that takes a numeric parameter N is written with the N, as shown here for \binN, unless the control word appears with an explicit value. The only exceptions to
     // this are “toggle” control words like \b (bold toggle), which have only two states. When such a control word has no parameter or has a nonzero parameter, the control word turns the property on.
     // When such a control word has a parameter of 0, the control word turns the property off. For example, \b turns on bold and \b0 turns off bold. In the definitions of these toggle control words,
     // the control word names are followed by an asterisk.
     if (IgnoredDelimiterIsRequired(tokenizer, token, previous))
     {
         // There *May* be a need for a delimiter,
         AddContent(tokenizer, " ", sb, previous);
     }
 }
 bool IsBinaryToken(RtfToken token, out int binaryLength)
 {
     // Rich Text Format (RTF) Specification, Version 1.9.1, p 209:
     //      Remember that binary data can occur when you’re skipping RTF.
     //      A simple way to skip a group in RTF is to keep a running count of the opening braces the RTF reader
     //      has encountered in the RTF stream. When the RTF reader sees an opening brace, it increments the count.
     //      When the reader sees a closing brace, it decrements the count. When the count becomes negative, the end
     //      of the group was found. Unfortunately, this does not work when the RTF file contains a \binN control; the
     //      reader must explicitly check each control word found to see if it is a \binN control, and if found,
     //      skip that many bytes before resuming its scanning for braces.
     if (string.CompareOrdinal(binPrefix, 0, token.Rtf, token.StartIndex, binPrefix.Length) == 0)
     {
         if (RtfTokenizer.IsControlWordNumericParameter(token, token.StartIndex + binPrefix.Length))
         {
             bool ok = int.TryParse(token.Rtf.Substring(token.StartIndex + binPrefix.Length, token.Length - binPrefix.Length),
                                    NumberStyles.Integer, CultureInfo.InvariantCulture,
                                    out binaryLength);
             return(ok);
         }
     }
     binaryLength = -1;
     return(false);
 }
 private void AddPopGroup(RtfTokenizer tokenizer, RtfToken token, StringBuilder sb, RtfToken previous)
 {
     AddContent(tokenizer, token, sb, previous);
 }
 private void AddContent(RtfTokenizer tokenizer, string content, StringBuilder sb, RtfToken previous)
 {
     sb.Append(content);
 }
 private void AddContent(RtfTokenizer tokenizer, RtfToken token, StringBuilder sb, RtfToken previous)
 {
     sb.Append(token.ToString());
 }
 private void AddCarriageReturn(RtfTokenizer tokenizer, RtfToken token, StringBuilder sb, RtfToken previous)
 {
     // DO NOTHING.
 }
 private void AddControlSymbol(RtfTokenizer tokenizer, RtfToken token, StringBuilder sb, RtfToken previous)
 {
     AddContent(tokenizer, token, sb, previous);
 }