Beispiel #1
0
        private ScrTag CreateTag(string marker)
        {
            // If tag already exists update with addtl info (normally from custom.sty)
            int tagIndex;

            if (tagIndexDictionary.TryGetValue(marker, out tagIndex))
            {
                return(tags[tagIndex]);
            }

            ScrTag tag = new ScrTag(marker);

            // Sigh.
            // COM based stylesheet support assumes that for all markers except c & v
            // the default is that they ARE publishable.
            if (marker != "c" && marker != "v")
            {
                tag.TextProperties = TextProperties.scPublishable;
            }

            tags.Add(tag);
            tagIndexDictionary[tag.Marker] = tags.Count - 1;

            return(tag);
        }
Beispiel #2
0
        /// <summary>
        /// Create a style corresponding to the list of markers given
        /// </summary>
        /// <param name="markers"></param>
        /// <returns></returns>
        public ScrTag NestedStyle(string[] markers)
        {
            ScrTag tag = GetTag(markers[0]).Clone();

            if (tag == null)
            {
                return(null);
            }

            for (int i = 1; i < markers.GetLength(0); ++i)
            {
                ScrTag nestedTag = GetTag(markers[i]);
                if (nestedTag == null)
                {
                    return(null);
                }

                tag.Marker += "X" + markers[i];
                tag.AddNestedFormatting(nestedTag);
            }

            tag.Endmarker   = tag.Marker + "*";
            tag.Description = null;
            tag.Name        = tag.Marker;

            return(tag);
        }
Beispiel #3
0
        internal static ScrTag MakeEndMarker(string marker)
        {
            ScrTag qTagEndMarker = new ScrTag(marker);

            qTagEndMarker.StyleType = ScrStyleType.scEndStyle;

            return(qTagEndMarker);
        }
Beispiel #4
0
        private List <string> Parse(IEnumerable <string> fileLines)
        {
            List <StylesheetEntry> entries = SplitStylesheet(fileLines);

            HashSet <string> foundStyles = new HashSet <string>();
            List <string>    errors      = new List <string>();
            bool             foundMarker = false;

            for (int i = 0; i < entries.Count; ++i)
            {
                StylesheetEntry entry = entries[i];

                if (entry.Marker != "marker")
                {
                    continue;
                }
                string[] parts = entry.Text.Split();
                if (parts.Length > 1 && parts[1] == "-")
                {
                    // If the entry looks like "\marker xy -" remove the tag and its end tag if any
                    foundMarker = true;
                    RemoveTag(parts[0]);
                    RemoveTag(parts[0] + "*");
                    continue;
                }

                foundMarker = true;
                ScrTag        tag = CreateTag(entry.Text);
                List <string> tagErrors;
                ScrTag        endTag = ScrTag.ParseSTYMarkerEntry(tag, entries, i + 1, out tagErrors);

                errors.AddRange(tagErrors);

                if (endTag != null && !tagIndexDictionary.ContainsKey(endTag.Marker))
                {
                    tags.Add(endTag);
                    tagIndexDictionary[endTag.Marker] = tags.Count - 1;
                }

                if (foundStyles.Contains(entry.Text))
                {
                    errors.Add(ScrTag.GetMessage(entry.LineNumber,
                                                 string.Format(Localizer.Str("Duplicate style definition '{0}'"), entry.Text)));
                }

                foundStyles.Add(entry.Text);
            }

            if (!foundMarker)
            {
                errors.Add(ScrTag.GetMessage(1, Localizer.Str("No styles defined")));
                return(errors);
            }

            return(errors.Count == 0 ? null : errors);
        }
Beispiel #5
0
        /// <summary>
        /// Apply formatting properties of nested marker to this marker
        /// </summary>
        public void AddNestedFormatting(ScrTag nested)
        {
            if (nested.Fontname != "")
            {
                Fontname = nested.Fontname;
            }

            if (nested.RawFontSize != null)
            {
                FontSize = nested.FontSize;
            }

            if (nested.RawBold != null)
            {
                Bold = nested.Bold;
            }

            if (nested.RawSmallCaps != null)
            {
                SmallCaps = nested.SmallCaps;
            }

            if (nested.RawSubscript != null)
            {
                Subscript = nested.Subscript;
            }

            if (nested.RawItalic != null)
            {
                Italic = nested.Italic;
            }

            if (nested.RawUnderline != null)
            {
                Underline = nested.Underline;
            }

            if (nested.RawSuperscript != null)
            {
                Superscript = nested.Superscript;
            }

            if (nested.RawColor != null)
            {
                RawColor = (int)nested.RawColor;
            }

            if (nested.Regular)
            {
                Bold        = false;
                Italic      = false;
                Superscript = false;
            }
        }
Beispiel #6
0
            private void ValidateOccursUnder(UsfmParserState state, string marker, string contextMarker, bool includeMarkerInError)
            {
                ScrTag tag         = scrStylesheet.GetTag(marker);
                var    occursUnder = tag.OccursUnderList;

                if (occursUnder.Count > 0 && (contextMarker == null || !occursUnder.Contains(contextMarker)))
                {
                    recordError(new VerseRef(state.VerseRef), includeMarkerInError ? "\\" + marker : "", state.VerseOffset,
                                GetErrorMessage(notHereMessage, marker));
                }
            }
Beispiel #7
0
            private bool HasNonDefaultAttributes(ScrTag tag, NamedAttribute[] namedAttributes)
            {
                if (namedAttributes.Length == 0)
                {
                    return(false);
                }

                if (namedAttributes.Length > 1)
                {
                    return(true);
                }

                return(namedAttributes[0].Name != tag.DefaultAttribute);
            }
Beispiel #8
0
        /// <summary>
        /// Adds the specified tag to stylesheet.
        /// </summary>
        protected void AddTagInternal(ScrTag tag)
        {
            int existingTagIndex;

            if (tagIndexDictionary.TryGetValue(tag.Marker, out existingTagIndex))
            {
                tags[existingTagIndex] = tag;
            }
            else
            {
                tags.Add(tag);
                tagIndexDictionary[tag.Marker] = tagIndexDictionary.Count;
            }
        }
Beispiel #9
0
        private static void ParseTextType(ScrTag qTag, StylesheetEntry entry)
        {
            string text = entry.Text.ToLowerInvariant();

            if (text == "chapternumber")
            {
                qTag.AddTextProperty(TextProperties.scChapter);
            }
            if (text == "versenumber")
            {
                qTag.AddTextProperty(TextProperties.scVerse);
            }

            qTag.TextType = ParseEnum(entry.Text, propToTextType);
        }
Beispiel #10
0
            /// <summary>
            /// Determines if a paragraph tag is valid. To validate a series, start with
            /// an empty stack and call repeatedly with each paragraph style.
            /// </summary>
            /// <param name="stack">stack to use (will be modified)</param>
            /// <param name="tag">tag to check</param>
            /// <param name="addTag">true to add tag to stack, false to check only</param>
            /// <returns>true if valid</returns>
            private static bool IsParagraphTagValid(List <ScrTag> stack, ScrTag tag, bool addTag)
            {
                // If stack empty, add and return success
                if (stack.Count == 0)
                {
                    if (addTag)
                    {
                        stack.Add(tag);
                    }
                    return(true);
                }

                var occursUnderList = tag.OccursUnderList;

                if (occursUnderList.Count == 0)
                {
                    return(true);
                }

                // Go backwards up stack looking for suitable occurs under
                for (int i = stack.Count - 1; i >= 0; i--)
                {
                    // If allowable occurs under
                    if (occursUnderList.Contains(stack[i].Marker))
                    {
                        // If rank of next is less or equal
                        if ((stack.Count - 1 == i) ||
                            tag.Rank == 0 ||    // no rank requirement for this tag
                            (stack[i + 1].Rank <= tag.Rank))
                        {
                            if (addTag)
                            {
                                // Clear rest of stack
                                if (stack.Count - 1 > i)
                                {
                                    stack.RemoveRange(i + 1, stack.Count - i - 1);
                                }

                                // Add tag and return success
                                stack.Add(tag);
                            }
                            return(true);
                        }
                    }
                }
                return(false);
            }
Beispiel #11
0
            private void ValidateAttributes(UsfmParserState state, ScrTag tag, string marker,
                                            NamedAttribute[] namedAttributes)
            {
                if (!markerCheck.allowVersion3Usfm)
                {
                    if (marker == "fig")
                    {
                        if (namedAttributes.Any(a => !figureAttributes.Contains(a.Name.InternalValue)))
                        {
                            recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset,
                                        GetErrorMessage(unsupportedAttributesMessage, marker));
                        }
                    }
                    else if (tag.StyleType == ScrStyleType.scCharacterStyle && HasNonDefaultAttributes(tag, namedAttributes))
                    {
                        recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset,
                                    GetErrorMessage(unsupportedAttributesMessage, marker));
                    }
                }

                // for character styles, find missing required attributes
                string[] missingAttributes =
                    tag.Attributes.Where(a => a.IsRequired && namedAttributes.All(na => na.Name != a.Name))
                    .Select(a => a.Name.InternalValue).ToArray();
                if (missingAttributes.Length > 0)
                {
                    string errMsg = Localizer.Str(@"Missing required attributes ({0})");
                    errMsg = string.Format(errMsg, string.Join(", ", missingAttributes));
                    RecordMarkerError(state, marker, errMsg + markerSlot);
                }

                // find attributes that don't start with x- and aren't defined for the character style
                // also, link attributes of link-href, link-title and link-name are valid on any style
                // for figures, the standard attributes are already stripped out, so can skip the check for attributes on the CharTag (which will be null)
                string[] unknownAttributes =
                    namedAttributes.Where(
                        na => !na.Name.InternalValue.StartsWith("x-", StringComparison.OrdinalIgnoreCase))
                    .Where(na => !linkAttributes.Contains(na.Name.InternalValue))
                    .Where(na => tag.Attributes.All(a => a.Name != na.Name))
                    .Select(na => na.Name.InternalValue).ToArray();
                if (unknownAttributes.Length > 0)
                {
                    string errMsg = Localizer.Str(@"Unknown attributes ({0})");
                    errMsg = string.Format(errMsg, string.Join(", ", unknownAttributes));
                    RecordMarkerError(state, marker, errMsg + markerSlot);
                }
            }
Beispiel #12
0
        public int GetTagIndex(string marker)
        {
            lock (syncRoot)
            {
                int index;
                if (tagIndexDictionary.TryGetValue(marker, out index))
                {
                    return(index);
                }

                // Create tag
                ScrTag tag = CreateTag(marker);
                tag.StyleType = ScrStyleType.scUnknownStyle;
                tag.Color     = new RgbColor(255, 0, 0);
                return(tagIndexDictionary[tag.Marker]);
            }
        }
Beispiel #13
0
        private static void ParseTextProperties(ScrTag qTag, StylesheetEntry entry)
        {
            string text = entry.Text.ToLowerInvariant();

            string[] parts = text.Split();

            foreach (string part in parts)
            {
                if (part.Trim() == "")
                {
                    continue;
                }

                qTag.AddTextProperty(ParseEnum(part, propToTextProps));
            }

            if (qTag.HasTextProperty(TextProperties.scNonpublishable))
            {
                qTag.RemoveTextProperty(TextProperties.scPublishable);
            }
        }
Beispiel #14
0
        /// <summary>
        /// ScrTag as parsed from the specified stylesheet entries.
        /// </summary>
        /// <param name="qTag">qTag needs to have the Marker set when calling this method</param>
        /// <param name="stylesheetEntries"></param>
        /// <param name="entryIndex"></param>
        /// <param name="errors"></param>
        /// <returns></returns>
        internal static ScrTag ParseSTYMarkerEntry(ScrTag qTag, List <StylesheetEntry> stylesheetEntries, int entryIndex,
                                                   out List <string> errors)
        {
            int markerLineNumber = stylesheetEntries[entryIndex - 1].LineNumber;

            // The following items are present for conformance with
            // Paratext release 5.0 stylesheets.  Release 6.0 and later
            // follows the guidelines set in InitPropertyMaps.
            // Make sure \id gets book property
            if (qTag.Marker == "id")
            {
                qTag.AddTextProperty(TextProperties.scBook);
            }

            errors = new List <string>();
            HashSet <string> foundAttribs  = new HashSet <string>();
            ScrTag           qTagEndMarker = null;

            while (entryIndex < stylesheetEntries.Count)
            {
                StylesheetEntry entry = stylesheetEntries[entryIndex];
                ++entryIndex;

                if (entry.Marker == "marker")
                {
                    break;
                }

                if (foundAttribs.Contains(entry.Marker))
                {
                    errors.Add(GetMessage(entry.LineNumber, string.Format(Localizer.Str("Duplicate style attribute '{0}'"), entry.Marker)));
                }

                try
                {
                    switch (entry.Marker)
                    {
                    case "name": qTag.Name = entry.Text; break;

                    case "description": qTag.Description = entry.Text; break;

                    case "fontname": qTag.Fontname = entry.Text; break;

                    case "fontsize": qTag.FontSize = entry.Text == "-" ? 0 : ParseI(entry); break;

                    case "xmltag": qTag.XMLTag = entry.Text; break;

                    case "encoding": qTag.Encoding = entry.Text; break;

                    case "linespacing": qTag.LineSpacing = ParseI(entry); break;

                    case "spacebefore": qTag.SpaceBefore = ParseI(entry); break;

                    case "spaceafter": qTag.SpaceAfter = ParseI(entry); break;

                    case "leftmargin": qTag.LeftMargin = ParseF(entry); break;

                    case "rightmargin": qTag.RightMargin = ParseF(entry); break;

                    case "firstlineindent": qTag.FirstLineIndent = ParseF(entry); break;

                    case "rank": qTag.Rank = entry.Text == "-" ? 0 : ParseI(entry); break;

                    case "bold": qTag.Bold = (entry.Text != "-"); break;

                    case "smallcaps": qTag.SmallCaps = (entry.Text != "-"); break;

                    case "subscript": qTag.Subscript = (entry.Text != "-"); break;

                    case "italic": qTag.Italic = (entry.Text != "-"); break;

                    // FB 23177 - added the \Regular tag so that there is a way to reset Italic, Bold and Superscript
                    // that is compatible with the ptx2pdf macros used by PrintDraft
                    case "regular":
                        qTag.Italic  = qTag.Bold = qTag.Superscript = false;
                        qTag.Regular = true;
                        break;

                    case "underline": qTag.Underline = (entry.Text != "-"); break;

                    case "superscript": qTag.Superscript = (entry.Text != "-"); break;

                    case "testylename": break;     // Ignore this tag, later we will use it to tie to FW styles

                    case "notrepeatable": qTag.NotRepeatable = (entry.Text != "-"); break;

                    case "textproperties": ParseTextProperties(qTag, entry); break;

                    case "texttype": ParseTextType(qTag, entry); break;

                    case "color": qTag.RawColor = entry.Text == "-" ? 0 : ParseColor(entry); break;

                    case "colorname": qTag.RawColor = entry.Text == "-" ? 0 : GetThemeColor(entry); break;

                    case "justification": qTag.JustificationType = ParseEnum(entry.Text, propToJustification); break;

                    case "styletype": qTag.StyleType = ParseEnum(entry.Text, propToStyleType); break;

                    case "attributes":
                        try
                        {
                            qTag.RawAttributes = entry.Text;
                        }
                        catch (ArgumentException e)
                        {
                            errors.Add(GetMessage(entry.LineNumber, e.Message));
                        }
                        break;

                    case "occursunder":
                        qTag.OccursUnder = String.Join(" ", entry.Text.Split(spaceSep, StringSplitOptions.RemoveEmptyEntries));
                        break;

                    case "endmarker":
                        qTagEndMarker  = MakeEndMarker(entry.Text);
                        qTag.Endmarker = entry.Text;
                        break;

                    default:
                        errors.Add(GetMessage(entry.LineNumber, string.Format(Localizer.Str("Unknown marker: {0}"), entry.Marker)));
                        break;
                    }
                }
                catch (ArgumentOutOfRangeException e)
                {
                    errors.Add(GetMessage(entry.LineNumber,
                                          string.Format(Localizer.Str("Invalid definition for marker '{0}': {1}"), entry.Marker, e.ActualValue)));
                }
                foundAttribs.Add(entry.Marker);
            }

            if (string.IsNullOrEmpty(qTag.Name))
            {
                errors.Add(GetMessage(markerLineNumber, string.Format(Localizer.Str("Missing name for style: {0}"), qTag.Marker)));
            }

            // If we have not seen an end marker but this is a character style
            if (qTag.StyleType == ScrStyleType.scCharacterStyle && qTagEndMarker == null)
            {
                string endMarker = qTag.Marker + "*";
                qTagEndMarker  = MakeEndMarker(endMarker);
                qTag.Endmarker = endMarker;
            }
            else if (qTag.StyleType == ScrStyleType.scMilestone)
            {
                if (qTagEndMarker != null)
                {
                    qTagEndMarker.StyleType     = ScrStyleType.scMilestoneEnd;
                    qTagEndMarker.RawAttributes = "?id"; // id is always an optional attribute for the end marker
                    qTagEndMarker.Name          = qTag.Name;
                }
                else
                {
                    errors.Add(GetMessage(markerLineNumber,
                                          string.Format(Localizer.Str("Missing end marker for style: {0}"), qTag.Marker)));
                }
            }

            // Special cases
            if (qTag.TextType == ScrTextType.scOther &&
                !qTag.HasTextProperty(TextProperties.scNonpublishable) &&
                !qTag.HasTextProperty(TextProperties.scChapter) &&
                !qTag.HasTextProperty(TextProperties.scVerse) &&
                (qTag.StyleType == ScrStyleType.scCharacterStyle || qTag.StyleType == ScrStyleType.scParagraphStyle))
            {
                qTag.AddTextProperty(TextProperties.scPublishable);
            }

            return(qTagEndMarker);
        }
Beispiel #15
0
 private static bool MarkerRequiresClose(string markerWithoutPlus, ScrTag scrTag)
 {
     return(markerWithoutPlus == "fig" || scrTag.OccursUnderList.Contains("NEST"));
 }
Beispiel #16
0
        /// <summary>
        /// Tokenize the specified USFM text
        /// </summary>
        /// <param name="scrStylesheet">stylesheet to use</param>
        /// <param name="usfm">usfm string</param>
        /// <param name="preserveWhitespace">true to preserve all whitespaces verbatim in tokens</param>
        /// <returns>list of tokens</returns>
        public static List <UsfmToken> Tokenize(ScrStylesheet scrStylesheet, string usfm, bool preserveWhitespace)
        {
            List <UsfmToken> tokens = new List <UsfmToken>();
            UsfmToken        lastTokenWithAttributes = null;

            int index = 0;              // Current position

            while (index < usfm.Length)
            {
                int nextMarkerIndex = (index < usfm.Length - 1) ? usfm.IndexOf('\\', index + 1) : -1;
                if (nextMarkerIndex == -1)
                {
                    nextMarkerIndex = usfm.Length;
                }

                // If text, create text token until end or next \
                var ch = usfm[index];
                if (ch != '\\')
                {
                    string text = usfm.Substring(index, nextMarkerIndex - index);
                    if (!preserveWhitespace)
                    {
                        text = RegularizeSpaces(text);
                    }

                    lastTokenWithAttributes = null;
                    int attributeIndex = text.IndexOf('|');
                    if (attributeIndex >= 0)
                    {
                        UsfmToken matchingToken = FindMatchingStartMarker(usfm, tokens, nextMarkerIndex);
                        if (matchingToken != null)
                        {
                            ScrTag matchingTag = scrStylesheet.GetTag(matchingToken.NestlessMarker);
                            // leave attributes of other styles as regular text
                            if (matchingTag.StyleType == ScrStyleType.scCharacterStyle || matchingTag.StyleType == ScrStyleType.scMilestone ||
                                matchingTag.StyleType == ScrStyleType.scMilestoneEnd)
                            {
                                string adjustedText = text.Substring(0, attributeIndex);
                                if (matchingToken.SetAttributes(text.Substring(attributeIndex + 1),
                                                                matchingTag.DefaultAttribute, ref adjustedText, preserveWhitespace))
                                {
                                    text = adjustedText;
                                    // attributes for ending milestone are not copied from the beginning milestone, so don't update last token value
                                    if (matchingTag.StyleType == ScrStyleType.scCharacterStyle)
                                    {
                                        lastTokenWithAttributes = matchingToken;
                                    }
                                }
                            }
                        }
                    }

                    if (text.Length > 0)
                    {
                        tokens.Add(new UsfmToken(UsfmTokenType.Text, null, text, null));
                    }

                    index = nextMarkerIndex;
                    continue;
                }

                // Get marker (and move past whitespace or star ending)
                index++;
                int markerStart = index;
                while (index < usfm.Length)
                {
                    ch = usfm[index];

                    // Backslash starts a new marker
                    if (ch == '\\')
                    {
                        break;
                    }

                    // don't require a space before the | that starts attributes - mainly for milestones to allow \qt-s|speaker\*
                    if (ch == '|')
                    {
                        break;
                    }

                    // End star is part of marker
                    if (ch == '*')
                    {
                        index++;
                        break;
                    }

                    if (IsNonSemanticWhiteSpace(ch))
                    {
                        // Preserve whitespace if needed, otherwise skip
                        if (!preserveWhitespace)
                        {
                            index++;
                        }
                        break;
                    }
                    index++;
                }
                string marker = usfm.Substring(markerStart, index - markerStart).TrimEnd();
                // Milestone stop/end markers are ended with \*, so marker will just be * and can be skipped
                if (marker == "*")
                {
                    // make sure that previous token was a milestone - have to skip space only tokens that may have been added when
                    // preserveSpace is true.
                    UsfmToken prevToken = tokens.Count > 0 ? tokens.Last(t => t.Type != UsfmTokenType.Text || t.Text.Trim() != "") : null;
                    if (prevToken != null && (prevToken.Type == UsfmTokenType.Milestone ||
                                              prevToken.Type == UsfmTokenType.MilestoneEnd))
                    {
                        // if the last item is an empty text token, remove it so we don't get extra space.
                        if (tokens.Last().Type == UsfmTokenType.Text)
                        {
                            tokens.RemoveAt(tokens.Count - 1);
                        }
                        continue;
                    }
                }

                // Multiple whitespace after non-end marker is ok
                if (!marker.EndsWith("*", StringComparison.Ordinal) && !preserveWhitespace)
                {
                    while ((index < usfm.Length) && IsNonSemanticWhiteSpace(usfm[index]))
                    {
                        index++;
                    }
                }

                // Lookup tag
                ScrTag tag = scrStylesheet.GetTag(marker.TrimStart('+'));

                // If starts with a plus and is not a character style or an end style, it is an unknown tag
                if (marker.StartsWith("+", StringComparison.Ordinal) && tag.StyleType != ScrStyleType.scCharacterStyle && tag.StyleType != ScrStyleType.scEndStyle)
                {
                    tag = scrStylesheet.GetTag(marker);
                }

                // Note: Unless this is a milestone, tag.Marker and tag.EndMarker are ignored if maras the plus prefix must be kept
                // and the end marker is always marker + "*"
                string endMarker = tag.StyleType != ScrStyleType.scMilestone ? marker + "*" : tag.Endmarker;

                switch (tag.StyleType)
                {
                case ScrStyleType.scCharacterStyle:
                    // Handle verse special case
                    UsfmToken newToken;
                    if ((tag.TextProperties & TextProperties.scVerse) > 0)
                    {
                        newToken = new UsfmToken(UsfmTokenType.Verse, marker, null, null,
                                                 GetNextWord(usfm, ref index, preserveWhitespace));
                    }
                    else
                    {
                        newToken = new UsfmToken(UsfmTokenType.Character, marker, null, endMarker);
                    }
                    tokens.Add(newToken);
                    break;

                case ScrStyleType.scParagraphStyle:
                    // Handle chapter special case
                    if ((tag.TextProperties & TextProperties.scChapter) > 0)
                    {
                        tokens.Add(new UsfmToken(UsfmTokenType.Chapter, marker, null, null, GetNextWord(usfm, ref index, preserveWhitespace)));
                    }
                    else if ((tag.TextProperties & TextProperties.scBook) > 0)
                    {
                        tokens.Add(new UsfmToken(UsfmTokenType.Book, marker, null, null, GetNextWord(usfm, ref index, preserveWhitespace)));
                    }
                    else
                    {
                        tokens.Add(new UsfmToken(UsfmTokenType.Paragraph, marker, null, endMarker));
                    }
                    break;

                case ScrStyleType.scNoteStyle:
                    tokens.Add(new UsfmToken(UsfmTokenType.Note, marker, null, endMarker, GetNextWord(usfm, ref index, preserveWhitespace)));
                    break;

                case ScrStyleType.scEndStyle:
                    lastTokenWithAttributes = AddEndMarker(marker, tokens, lastTokenWithAttributes);
                    break;

                case ScrStyleType.scUnknownStyle:
                    // End tokens are always end tokens, even if unknown
                    if (marker.EndsWith("*", StringComparison.Ordinal))
                    {
                        lastTokenWithAttributes = AddEndMarker(marker, tokens, lastTokenWithAttributes);
                    }
                    else
                    {
                        // Handle special case of esb and esbe which might not be in basic stylesheet
                        // but are always sidebars and so should be tokenized as paragraphs
                        if (marker == "esb" || marker == "esbe")
                        {
                            tokens.Add(new UsfmToken(UsfmTokenType.Paragraph, marker, null, endMarker));
                            break;
                        }
                        // Create unknown token with a corresponding end note
                        tokens.Add(new UsfmToken(UsfmTokenType.Unknown, marker, null, marker + "*"));
                    }
                    break;

                case ScrStyleType.scMilestone:
                case ScrStyleType.scMilestoneEnd:
                    // if a milestone is not followed by a ending \* treat don't create a milestone token for the begining. Instead create at
                    // text token for all the text up to the beginning of the next marker. This will make typing of milestones easiest since
                    // the partially typed milestone more be reformatted to have a normal ending even if it hasn't been typed yet.
                    if (!MilestoneEnded(usfm, index))
                    {
                        int endOfText = (index < usfm.Length - 1) ? usfm.IndexOf('\\', index + 1) : -1;
                        if (endOfText == -1)
                        {
                            endOfText = usfm.Length;
                        }
                        string milestoneText = usfm.Substring(index, endOfText - index);
                        // add back space that was removed after marker
                        if (milestoneText.Length > 0 && milestoneText[0] != ' ' && milestoneText[0] != '|')
                        {
                            milestoneText = " " + milestoneText;
                        }
                        tokens.Add(new UsfmToken(UsfmTokenType.Text, null, @"\" + marker + milestoneText, null));
                        index = endOfText;
                    }
                    else if (tag.StyleType == ScrStyleType.scMilestone)
                    {
                        tokens.Add(new UsfmToken(UsfmTokenType.Milestone, marker, null, endMarker));
                    }
                    else
                    {
                        tokens.Add(new UsfmToken(UsfmTokenType.MilestoneEnd, marker, null, null));
                    }
                    break;

                default:
                    Debug.Fail("Unknown ScrStyleType");
                    break;
                }
            }

            // Forces a space to be present in tokenization if immediately
            // before a token requiring a preceeding CR/LF. This is to ensure
            // that when written to disk and re-read, that tokenization
            // will match. For example, "\p test\p here" requires a space
            // after "test". Also, "\p \em test\em*\p here" requires a space
            // token inserted after \em*
            if (!preserveWhitespace)
            {
                for (int i = 1; i < tokens.Count; i++)
                {
                    // If requires newline (verses do, except when after '(' or '[')
                    if (tokens[i].Type == UsfmTokenType.Book ||
                        tokens[i].Type == UsfmTokenType.Chapter ||
                        tokens[i].Type == UsfmTokenType.Paragraph ||
                        (tokens[i].Type == UsfmTokenType.Verse &&
                         !(tokens[i - 1].Type == UsfmTokenType.Text &&
                           (tokens[i - 1].Text.EndsWith("(", StringComparison.Ordinal) || tokens[i - 1].Text.EndsWith("[", StringComparison.Ordinal)))))
                    {
                        // Add space to text token
                        if (tokens[i - 1].Type == UsfmTokenType.Text)
                        {
                            if (!tokens[i - 1].Text.EndsWith(" ", StringComparison.Ordinal))
                            {
                                tokens[i - 1].Text = tokens[i - 1].Text + " ";
                            }
                        }
                        else if (tokens[i - 1].Type == UsfmTokenType.End)
                        {
                            // Insert space token after * of end marker
                            tokens.Insert(i, new UsfmToken(UsfmTokenType.Text, null, " ", null));
                            i++;
                        }
                    }
                }
            }

            return(tokens);
        }