private ScrTag CreateTag(string marker) { // If tag already exists update with addtl info (normally from custom.sty) int tagIndex; if (tagIndexDictionary.TryGetValue(marker, out tagIndex)) { return(tags[tagIndex]); } ScrTag tag = new ScrTag(marker); // Sigh. // COM based stylesheet support assumes that for all markers except c & v // the default is that they ARE publishable. if (marker != "c" && marker != "v") { tag.TextProperties = TextProperties.scPublishable; } tags.Add(tag); tagIndexDictionary[tag.Marker] = tags.Count - 1; return(tag); }
/// <summary> /// Create a style corresponding to the list of markers given /// </summary> /// <param name="markers"></param> /// <returns></returns> public ScrTag NestedStyle(string[] markers) { ScrTag tag = GetTag(markers[0]).Clone(); if (tag == null) { return(null); } for (int i = 1; i < markers.GetLength(0); ++i) { ScrTag nestedTag = GetTag(markers[i]); if (nestedTag == null) { return(null); } tag.Marker += "X" + markers[i]; tag.AddNestedFormatting(nestedTag); } tag.Endmarker = tag.Marker + "*"; tag.Description = null; tag.Name = tag.Marker; return(tag); }
internal static ScrTag MakeEndMarker(string marker) { ScrTag qTagEndMarker = new ScrTag(marker); qTagEndMarker.StyleType = ScrStyleType.scEndStyle; return(qTagEndMarker); }
private List <string> Parse(IEnumerable <string> fileLines) { List <StylesheetEntry> entries = SplitStylesheet(fileLines); HashSet <string> foundStyles = new HashSet <string>(); List <string> errors = new List <string>(); bool foundMarker = false; for (int i = 0; i < entries.Count; ++i) { StylesheetEntry entry = entries[i]; if (entry.Marker != "marker") { continue; } string[] parts = entry.Text.Split(); if (parts.Length > 1 && parts[1] == "-") { // If the entry looks like "\marker xy -" remove the tag and its end tag if any foundMarker = true; RemoveTag(parts[0]); RemoveTag(parts[0] + "*"); continue; } foundMarker = true; ScrTag tag = CreateTag(entry.Text); List <string> tagErrors; ScrTag endTag = ScrTag.ParseSTYMarkerEntry(tag, entries, i + 1, out tagErrors); errors.AddRange(tagErrors); if (endTag != null && !tagIndexDictionary.ContainsKey(endTag.Marker)) { tags.Add(endTag); tagIndexDictionary[endTag.Marker] = tags.Count - 1; } if (foundStyles.Contains(entry.Text)) { errors.Add(ScrTag.GetMessage(entry.LineNumber, string.Format(Localizer.Str("Duplicate style definition '{0}'"), entry.Text))); } foundStyles.Add(entry.Text); } if (!foundMarker) { errors.Add(ScrTag.GetMessage(1, Localizer.Str("No styles defined"))); return(errors); } return(errors.Count == 0 ? null : errors); }
/// <summary> /// Apply formatting properties of nested marker to this marker /// </summary> public void AddNestedFormatting(ScrTag nested) { if (nested.Fontname != "") { Fontname = nested.Fontname; } if (nested.RawFontSize != null) { FontSize = nested.FontSize; } if (nested.RawBold != null) { Bold = nested.Bold; } if (nested.RawSmallCaps != null) { SmallCaps = nested.SmallCaps; } if (nested.RawSubscript != null) { Subscript = nested.Subscript; } if (nested.RawItalic != null) { Italic = nested.Italic; } if (nested.RawUnderline != null) { Underline = nested.Underline; } if (nested.RawSuperscript != null) { Superscript = nested.Superscript; } if (nested.RawColor != null) { RawColor = (int)nested.RawColor; } if (nested.Regular) { Bold = false; Italic = false; Superscript = false; } }
private void ValidateOccursUnder(UsfmParserState state, string marker, string contextMarker, bool includeMarkerInError) { ScrTag tag = scrStylesheet.GetTag(marker); var occursUnder = tag.OccursUnderList; if (occursUnder.Count > 0 && (contextMarker == null || !occursUnder.Contains(contextMarker))) { recordError(new VerseRef(state.VerseRef), includeMarkerInError ? "\\" + marker : "", state.VerseOffset, GetErrorMessage(notHereMessage, marker)); } }
private bool HasNonDefaultAttributes(ScrTag tag, NamedAttribute[] namedAttributes) { if (namedAttributes.Length == 0) { return(false); } if (namedAttributes.Length > 1) { return(true); } return(namedAttributes[0].Name != tag.DefaultAttribute); }
/// <summary> /// Adds the specified tag to stylesheet. /// </summary> protected void AddTagInternal(ScrTag tag) { int existingTagIndex; if (tagIndexDictionary.TryGetValue(tag.Marker, out existingTagIndex)) { tags[existingTagIndex] = tag; } else { tags.Add(tag); tagIndexDictionary[tag.Marker] = tagIndexDictionary.Count; } }
private static void ParseTextType(ScrTag qTag, StylesheetEntry entry) { string text = entry.Text.ToLowerInvariant(); if (text == "chapternumber") { qTag.AddTextProperty(TextProperties.scChapter); } if (text == "versenumber") { qTag.AddTextProperty(TextProperties.scVerse); } qTag.TextType = ParseEnum(entry.Text, propToTextType); }
/// <summary> /// Determines if a paragraph tag is valid. To validate a series, start with /// an empty stack and call repeatedly with each paragraph style. /// </summary> /// <param name="stack">stack to use (will be modified)</param> /// <param name="tag">tag to check</param> /// <param name="addTag">true to add tag to stack, false to check only</param> /// <returns>true if valid</returns> private static bool IsParagraphTagValid(List <ScrTag> stack, ScrTag tag, bool addTag) { // If stack empty, add and return success if (stack.Count == 0) { if (addTag) { stack.Add(tag); } return(true); } var occursUnderList = tag.OccursUnderList; if (occursUnderList.Count == 0) { return(true); } // Go backwards up stack looking for suitable occurs under for (int i = stack.Count - 1; i >= 0; i--) { // If allowable occurs under if (occursUnderList.Contains(stack[i].Marker)) { // If rank of next is less or equal if ((stack.Count - 1 == i) || tag.Rank == 0 || // no rank requirement for this tag (stack[i + 1].Rank <= tag.Rank)) { if (addTag) { // Clear rest of stack if (stack.Count - 1 > i) { stack.RemoveRange(i + 1, stack.Count - i - 1); } // Add tag and return success stack.Add(tag); } return(true); } } } return(false); }
private void ValidateAttributes(UsfmParserState state, ScrTag tag, string marker, NamedAttribute[] namedAttributes) { if (!markerCheck.allowVersion3Usfm) { if (marker == "fig") { if (namedAttributes.Any(a => !figureAttributes.Contains(a.Name.InternalValue))) { recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, GetErrorMessage(unsupportedAttributesMessage, marker)); } } else if (tag.StyleType == ScrStyleType.scCharacterStyle && HasNonDefaultAttributes(tag, namedAttributes)) { recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, GetErrorMessage(unsupportedAttributesMessage, marker)); } } // for character styles, find missing required attributes string[] missingAttributes = tag.Attributes.Where(a => a.IsRequired && namedAttributes.All(na => na.Name != a.Name)) .Select(a => a.Name.InternalValue).ToArray(); if (missingAttributes.Length > 0) { string errMsg = Localizer.Str(@"Missing required attributes ({0})"); errMsg = string.Format(errMsg, string.Join(", ", missingAttributes)); RecordMarkerError(state, marker, errMsg + markerSlot); } // find attributes that don't start with x- and aren't defined for the character style // also, link attributes of link-href, link-title and link-name are valid on any style // for figures, the standard attributes are already stripped out, so can skip the check for attributes on the CharTag (which will be null) string[] unknownAttributes = namedAttributes.Where( na => !na.Name.InternalValue.StartsWith("x-", StringComparison.OrdinalIgnoreCase)) .Where(na => !linkAttributes.Contains(na.Name.InternalValue)) .Where(na => tag.Attributes.All(a => a.Name != na.Name)) .Select(na => na.Name.InternalValue).ToArray(); if (unknownAttributes.Length > 0) { string errMsg = Localizer.Str(@"Unknown attributes ({0})"); errMsg = string.Format(errMsg, string.Join(", ", unknownAttributes)); RecordMarkerError(state, marker, errMsg + markerSlot); } }
public int GetTagIndex(string marker) { lock (syncRoot) { int index; if (tagIndexDictionary.TryGetValue(marker, out index)) { return(index); } // Create tag ScrTag tag = CreateTag(marker); tag.StyleType = ScrStyleType.scUnknownStyle; tag.Color = new RgbColor(255, 0, 0); return(tagIndexDictionary[tag.Marker]); } }
private static void ParseTextProperties(ScrTag qTag, StylesheetEntry entry) { string text = entry.Text.ToLowerInvariant(); string[] parts = text.Split(); foreach (string part in parts) { if (part.Trim() == "") { continue; } qTag.AddTextProperty(ParseEnum(part, propToTextProps)); } if (qTag.HasTextProperty(TextProperties.scNonpublishable)) { qTag.RemoveTextProperty(TextProperties.scPublishable); } }
/// <summary> /// ScrTag as parsed from the specified stylesheet entries. /// </summary> /// <param name="qTag">qTag needs to have the Marker set when calling this method</param> /// <param name="stylesheetEntries"></param> /// <param name="entryIndex"></param> /// <param name="errors"></param> /// <returns></returns> internal static ScrTag ParseSTYMarkerEntry(ScrTag qTag, List <StylesheetEntry> stylesheetEntries, int entryIndex, out List <string> errors) { int markerLineNumber = stylesheetEntries[entryIndex - 1].LineNumber; // The following items are present for conformance with // Paratext release 5.0 stylesheets. Release 6.0 and later // follows the guidelines set in InitPropertyMaps. // Make sure \id gets book property if (qTag.Marker == "id") { qTag.AddTextProperty(TextProperties.scBook); } errors = new List <string>(); HashSet <string> foundAttribs = new HashSet <string>(); ScrTag qTagEndMarker = null; while (entryIndex < stylesheetEntries.Count) { StylesheetEntry entry = stylesheetEntries[entryIndex]; ++entryIndex; if (entry.Marker == "marker") { break; } if (foundAttribs.Contains(entry.Marker)) { errors.Add(GetMessage(entry.LineNumber, string.Format(Localizer.Str("Duplicate style attribute '{0}'"), entry.Marker))); } try { switch (entry.Marker) { case "name": qTag.Name = entry.Text; break; case "description": qTag.Description = entry.Text; break; case "fontname": qTag.Fontname = entry.Text; break; case "fontsize": qTag.FontSize = entry.Text == "-" ? 0 : ParseI(entry); break; case "xmltag": qTag.XMLTag = entry.Text; break; case "encoding": qTag.Encoding = entry.Text; break; case "linespacing": qTag.LineSpacing = ParseI(entry); break; case "spacebefore": qTag.SpaceBefore = ParseI(entry); break; case "spaceafter": qTag.SpaceAfter = ParseI(entry); break; case "leftmargin": qTag.LeftMargin = ParseF(entry); break; case "rightmargin": qTag.RightMargin = ParseF(entry); break; case "firstlineindent": qTag.FirstLineIndent = ParseF(entry); break; case "rank": qTag.Rank = entry.Text == "-" ? 0 : ParseI(entry); break; case "bold": qTag.Bold = (entry.Text != "-"); break; case "smallcaps": qTag.SmallCaps = (entry.Text != "-"); break; case "subscript": qTag.Subscript = (entry.Text != "-"); break; case "italic": qTag.Italic = (entry.Text != "-"); break; // FB 23177 - added the \Regular tag so that there is a way to reset Italic, Bold and Superscript // that is compatible with the ptx2pdf macros used by PrintDraft case "regular": qTag.Italic = qTag.Bold = qTag.Superscript = false; qTag.Regular = true; break; case "underline": qTag.Underline = (entry.Text != "-"); break; case "superscript": qTag.Superscript = (entry.Text != "-"); break; case "testylename": break; // Ignore this tag, later we will use it to tie to FW styles case "notrepeatable": qTag.NotRepeatable = (entry.Text != "-"); break; case "textproperties": ParseTextProperties(qTag, entry); break; case "texttype": ParseTextType(qTag, entry); break; case "color": qTag.RawColor = entry.Text == "-" ? 0 : ParseColor(entry); break; case "colorname": qTag.RawColor = entry.Text == "-" ? 0 : GetThemeColor(entry); break; case "justification": qTag.JustificationType = ParseEnum(entry.Text, propToJustification); break; case "styletype": qTag.StyleType = ParseEnum(entry.Text, propToStyleType); break; case "attributes": try { qTag.RawAttributes = entry.Text; } catch (ArgumentException e) { errors.Add(GetMessage(entry.LineNumber, e.Message)); } break; case "occursunder": qTag.OccursUnder = String.Join(" ", entry.Text.Split(spaceSep, StringSplitOptions.RemoveEmptyEntries)); break; case "endmarker": qTagEndMarker = MakeEndMarker(entry.Text); qTag.Endmarker = entry.Text; break; default: errors.Add(GetMessage(entry.LineNumber, string.Format(Localizer.Str("Unknown marker: {0}"), entry.Marker))); break; } } catch (ArgumentOutOfRangeException e) { errors.Add(GetMessage(entry.LineNumber, string.Format(Localizer.Str("Invalid definition for marker '{0}': {1}"), entry.Marker, e.ActualValue))); } foundAttribs.Add(entry.Marker); } if (string.IsNullOrEmpty(qTag.Name)) { errors.Add(GetMessage(markerLineNumber, string.Format(Localizer.Str("Missing name for style: {0}"), qTag.Marker))); } // If we have not seen an end marker but this is a character style if (qTag.StyleType == ScrStyleType.scCharacterStyle && qTagEndMarker == null) { string endMarker = qTag.Marker + "*"; qTagEndMarker = MakeEndMarker(endMarker); qTag.Endmarker = endMarker; } else if (qTag.StyleType == ScrStyleType.scMilestone) { if (qTagEndMarker != null) { qTagEndMarker.StyleType = ScrStyleType.scMilestoneEnd; qTagEndMarker.RawAttributes = "?id"; // id is always an optional attribute for the end marker qTagEndMarker.Name = qTag.Name; } else { errors.Add(GetMessage(markerLineNumber, string.Format(Localizer.Str("Missing end marker for style: {0}"), qTag.Marker))); } } // Special cases if (qTag.TextType == ScrTextType.scOther && !qTag.HasTextProperty(TextProperties.scNonpublishable) && !qTag.HasTextProperty(TextProperties.scChapter) && !qTag.HasTextProperty(TextProperties.scVerse) && (qTag.StyleType == ScrStyleType.scCharacterStyle || qTag.StyleType == ScrStyleType.scParagraphStyle)) { qTag.AddTextProperty(TextProperties.scPublishable); } return(qTagEndMarker); }
private static bool MarkerRequiresClose(string markerWithoutPlus, ScrTag scrTag) { return(markerWithoutPlus == "fig" || scrTag.OccursUnderList.Contains("NEST")); }
/// <summary> /// Tokenize the specified USFM text /// </summary> /// <param name="scrStylesheet">stylesheet to use</param> /// <param name="usfm">usfm string</param> /// <param name="preserveWhitespace">true to preserve all whitespaces verbatim in tokens</param> /// <returns>list of tokens</returns> public static List <UsfmToken> Tokenize(ScrStylesheet scrStylesheet, string usfm, bool preserveWhitespace) { List <UsfmToken> tokens = new List <UsfmToken>(); UsfmToken lastTokenWithAttributes = null; int index = 0; // Current position while (index < usfm.Length) { int nextMarkerIndex = (index < usfm.Length - 1) ? usfm.IndexOf('\\', index + 1) : -1; if (nextMarkerIndex == -1) { nextMarkerIndex = usfm.Length; } // If text, create text token until end or next \ var ch = usfm[index]; if (ch != '\\') { string text = usfm.Substring(index, nextMarkerIndex - index); if (!preserveWhitespace) { text = RegularizeSpaces(text); } lastTokenWithAttributes = null; int attributeIndex = text.IndexOf('|'); if (attributeIndex >= 0) { UsfmToken matchingToken = FindMatchingStartMarker(usfm, tokens, nextMarkerIndex); if (matchingToken != null) { ScrTag matchingTag = scrStylesheet.GetTag(matchingToken.NestlessMarker); // leave attributes of other styles as regular text if (matchingTag.StyleType == ScrStyleType.scCharacterStyle || matchingTag.StyleType == ScrStyleType.scMilestone || matchingTag.StyleType == ScrStyleType.scMilestoneEnd) { string adjustedText = text.Substring(0, attributeIndex); if (matchingToken.SetAttributes(text.Substring(attributeIndex + 1), matchingTag.DefaultAttribute, ref adjustedText, preserveWhitespace)) { text = adjustedText; // attributes for ending milestone are not copied from the beginning milestone, so don't update last token value if (matchingTag.StyleType == ScrStyleType.scCharacterStyle) { lastTokenWithAttributes = matchingToken; } } } } } if (text.Length > 0) { tokens.Add(new UsfmToken(UsfmTokenType.Text, null, text, null)); } index = nextMarkerIndex; continue; } // Get marker (and move past whitespace or star ending) index++; int markerStart = index; while (index < usfm.Length) { ch = usfm[index]; // Backslash starts a new marker if (ch == '\\') { break; } // don't require a space before the | that starts attributes - mainly for milestones to allow \qt-s|speaker\* if (ch == '|') { break; } // End star is part of marker if (ch == '*') { index++; break; } if (IsNonSemanticWhiteSpace(ch)) { // Preserve whitespace if needed, otherwise skip if (!preserveWhitespace) { index++; } break; } index++; } string marker = usfm.Substring(markerStart, index - markerStart).TrimEnd(); // Milestone stop/end markers are ended with \*, so marker will just be * and can be skipped if (marker == "*") { // make sure that previous token was a milestone - have to skip space only tokens that may have been added when // preserveSpace is true. UsfmToken prevToken = tokens.Count > 0 ? tokens.Last(t => t.Type != UsfmTokenType.Text || t.Text.Trim() != "") : null; if (prevToken != null && (prevToken.Type == UsfmTokenType.Milestone || prevToken.Type == UsfmTokenType.MilestoneEnd)) { // if the last item is an empty text token, remove it so we don't get extra space. if (tokens.Last().Type == UsfmTokenType.Text) { tokens.RemoveAt(tokens.Count - 1); } continue; } } // Multiple whitespace after non-end marker is ok if (!marker.EndsWith("*", StringComparison.Ordinal) && !preserveWhitespace) { while ((index < usfm.Length) && IsNonSemanticWhiteSpace(usfm[index])) { index++; } } // Lookup tag ScrTag tag = scrStylesheet.GetTag(marker.TrimStart('+')); // If starts with a plus and is not a character style or an end style, it is an unknown tag if (marker.StartsWith("+", StringComparison.Ordinal) && tag.StyleType != ScrStyleType.scCharacterStyle && tag.StyleType != ScrStyleType.scEndStyle) { tag = scrStylesheet.GetTag(marker); } // Note: Unless this is a milestone, tag.Marker and tag.EndMarker are ignored if maras the plus prefix must be kept // and the end marker is always marker + "*" string endMarker = tag.StyleType != ScrStyleType.scMilestone ? marker + "*" : tag.Endmarker; switch (tag.StyleType) { case ScrStyleType.scCharacterStyle: // Handle verse special case UsfmToken newToken; if ((tag.TextProperties & TextProperties.scVerse) > 0) { newToken = new UsfmToken(UsfmTokenType.Verse, marker, null, null, GetNextWord(usfm, ref index, preserveWhitespace)); } else { newToken = new UsfmToken(UsfmTokenType.Character, marker, null, endMarker); } tokens.Add(newToken); break; case ScrStyleType.scParagraphStyle: // Handle chapter special case if ((tag.TextProperties & TextProperties.scChapter) > 0) { tokens.Add(new UsfmToken(UsfmTokenType.Chapter, marker, null, null, GetNextWord(usfm, ref index, preserveWhitespace))); } else if ((tag.TextProperties & TextProperties.scBook) > 0) { tokens.Add(new UsfmToken(UsfmTokenType.Book, marker, null, null, GetNextWord(usfm, ref index, preserveWhitespace))); } else { tokens.Add(new UsfmToken(UsfmTokenType.Paragraph, marker, null, endMarker)); } break; case ScrStyleType.scNoteStyle: tokens.Add(new UsfmToken(UsfmTokenType.Note, marker, null, endMarker, GetNextWord(usfm, ref index, preserveWhitespace))); break; case ScrStyleType.scEndStyle: lastTokenWithAttributes = AddEndMarker(marker, tokens, lastTokenWithAttributes); break; case ScrStyleType.scUnknownStyle: // End tokens are always end tokens, even if unknown if (marker.EndsWith("*", StringComparison.Ordinal)) { lastTokenWithAttributes = AddEndMarker(marker, tokens, lastTokenWithAttributes); } else { // Handle special case of esb and esbe which might not be in basic stylesheet // but are always sidebars and so should be tokenized as paragraphs if (marker == "esb" || marker == "esbe") { tokens.Add(new UsfmToken(UsfmTokenType.Paragraph, marker, null, endMarker)); break; } // Create unknown token with a corresponding end note tokens.Add(new UsfmToken(UsfmTokenType.Unknown, marker, null, marker + "*")); } break; case ScrStyleType.scMilestone: case ScrStyleType.scMilestoneEnd: // if a milestone is not followed by a ending \* treat don't create a milestone token for the begining. Instead create at // text token for all the text up to the beginning of the next marker. This will make typing of milestones easiest since // the partially typed milestone more be reformatted to have a normal ending even if it hasn't been typed yet. if (!MilestoneEnded(usfm, index)) { int endOfText = (index < usfm.Length - 1) ? usfm.IndexOf('\\', index + 1) : -1; if (endOfText == -1) { endOfText = usfm.Length; } string milestoneText = usfm.Substring(index, endOfText - index); // add back space that was removed after marker if (milestoneText.Length > 0 && milestoneText[0] != ' ' && milestoneText[0] != '|') { milestoneText = " " + milestoneText; } tokens.Add(new UsfmToken(UsfmTokenType.Text, null, @"\" + marker + milestoneText, null)); index = endOfText; } else if (tag.StyleType == ScrStyleType.scMilestone) { tokens.Add(new UsfmToken(UsfmTokenType.Milestone, marker, null, endMarker)); } else { tokens.Add(new UsfmToken(UsfmTokenType.MilestoneEnd, marker, null, null)); } break; default: Debug.Fail("Unknown ScrStyleType"); break; } } // Forces a space to be present in tokenization if immediately // before a token requiring a preceeding CR/LF. This is to ensure // that when written to disk and re-read, that tokenization // will match. For example, "\p test\p here" requires a space // after "test". Also, "\p \em test\em*\p here" requires a space // token inserted after \em* if (!preserveWhitespace) { for (int i = 1; i < tokens.Count; i++) { // If requires newline (verses do, except when after '(' or '[') if (tokens[i].Type == UsfmTokenType.Book || tokens[i].Type == UsfmTokenType.Chapter || tokens[i].Type == UsfmTokenType.Paragraph || (tokens[i].Type == UsfmTokenType.Verse && !(tokens[i - 1].Type == UsfmTokenType.Text && (tokens[i - 1].Text.EndsWith("(", StringComparison.Ordinal) || tokens[i - 1].Text.EndsWith("[", StringComparison.Ordinal))))) { // Add space to text token if (tokens[i - 1].Type == UsfmTokenType.Text) { if (!tokens[i - 1].Text.EndsWith(" ", StringComparison.Ordinal)) { tokens[i - 1].Text = tokens[i - 1].Text + " "; } } else if (tokens[i - 1].Type == UsfmTokenType.End) { // Insert space token after * of end marker tokens.Insert(i, new UsfmToken(UsfmTokenType.Text, null, " ", null)); i++; } } } } return(tokens); }