private void Parse(string stylesheetFileName) { List <StylesheetEntry> entries = SplitStylesheet(File.ReadAllLines(stylesheetFileName)); HashSet <string> foundStyles = new HashSet <string>(); for (int i = 0; i < entries.Count; ++i) { StylesheetEntry entry = entries[i]; if (entry.Marker != "marker") { continue; } string[] parts = entry.Text.Split(); if (parts.Length > 1 && parts[1] == "-") { // If the entry looks like "\marker xy -" remove the tag and its end tag if any _markers.Remove(parts[0]); _markers.Remove(parts[0] + "*"); continue; } UsfmMarker marker = CreateMarker(entry.Text); UsfmMarker endMarker = ParseMarkerEntry(marker, entries, i + 1); if (endMarker != null && !_markers.ContainsKey(endMarker.Marker)) { _markers[endMarker.Marker] = endMarker; } foundStyles.Add(entry.Text); } }
private static void ParseTextProperties(UsfmMarker qTag, StylesheetEntry entry) { string text = entry.Text.ToLowerInvariant(); string[] parts = text.Split(); foreach (string part in parts) { if (part.Trim() == "") { continue; } UsfmTextProperties textProperty; if (TextPropertyMappings.TryGetValue(part, out textProperty)) { qTag.TextProperties |= textProperty; } } if ((qTag.TextProperties & UsfmTextProperties.Nonpublishable) > 0) { qTag.TextProperties &= ~UsfmTextProperties.Publishable; } }
private void Parse(string stylesheetFileName) { IEnumerable <string> lines; if (File.Exists(stylesheetFileName)) { lines = File.ReadAllLines(stylesheetFileName); } else { string fileName = Path.GetFileName(stylesheetFileName); if (fileName == "usfm.sty" || fileName == "usfm_sb.sty") { lines = GetEmbeddedStylesheet(fileName); } else { throw new ArgumentException("The stylesheet does not exist.", nameof(stylesheetFileName)); } } List <StylesheetEntry> entries = SplitStylesheet(lines); HashSet <string> foundStyles = new HashSet <string>(); for (int i = 0; i < entries.Count; ++i) { StylesheetEntry entry = entries[i]; if (entry.Marker != "marker") { continue; } string[] parts = entry.Text.Split(); if (parts.Length > 1 && parts[1] == "-") { // If the entry looks like "\marker xy -" remove the tag and its end tag if any _markers.Remove(parts[0]); _markers.Remove(parts[0] + "*"); continue; } UsfmMarker marker = CreateMarker(entry.Text); UsfmMarker endMarker = ParseMarkerEntry(marker, entries, i + 1); if (endMarker != null && !_markers.ContainsKey(endMarker.Marker)) { _markers[endMarker.Marker] = endMarker; } foundStyles.Add(entry.Text); } }
private UsfmMarker CreateMarker(string markerStr) { // If tag already exists update with addtl info (normally from custom.sty) UsfmMarker marker; if (!_markers.TryGetValue(markerStr, out marker)) { marker = new UsfmMarker(markerStr); if (markerStr != "c" && markerStr != "v") { marker.TextProperties = UsfmTextProperties.Publishable; } _markers[markerStr] = marker; } return(marker); }
private static void ParseTextType(UsfmMarker qTag, StylesheetEntry entry) { if (entry.Text.Equals("chapternumber", StringComparison.OrdinalIgnoreCase)) { qTag.TextProperties |= UsfmTextProperties.Chapter; } if (entry.Text.Equals("versenumber", StringComparison.CurrentCultureIgnoreCase)) { qTag.TextProperties |= UsfmTextProperties.Verse; } UsfmTextType textType; if (TextTypeMappings.TryGetValue(entry.Text, out textType)) { qTag.TextType = textType; } }
public IEnumerable <UsfmToken> Parse(string usfm, bool preserveWhitespace = false) { List <UsfmToken> tokens = new List <UsfmToken>(); int index = 0; // Current position while (index < usfm.Length) { int nextMarkerIndex = (index < usfm.Length - 1) ? usfm.IndexOf('\\', index + 1) : -1; if (nextMarkerIndex == -1) { nextMarkerIndex = usfm.Length; } // If text, create text token until end or next \ var ch = usfm[index]; if (ch != '\\') { string text = usfm.Substring(index, nextMarkerIndex - index); if (!preserveWhitespace) { text = RegularizeSpaces(text); } tokens.Add(new UsfmToken(UsfmTokenType.Text, null, text)); index = nextMarkerIndex; continue; } // Get marker (and move past whitespace or star ending) index++; int markerStart = index; while (index < usfm.Length) { ch = usfm[index]; // Backslash starts a new marker if (ch == '\\') { break; } // End star is part of marker if (ch == '*') { index++; break; } if (IsNonSemanticWhiteSpace(ch)) { // Preserve whitespace if needed, otherwise skip if (!preserveWhitespace) { index++; } break; } index++; } string markerStr = usfm.Substring(markerStart, index - markerStart).TrimEnd(); // Multiple whitespace after non-end marker is ok if (!markerStr.EndsWith("*", StringComparison.Ordinal) && !preserveWhitespace) { while ((index < usfm.Length) && IsNonSemanticWhiteSpace(usfm[index])) { index++; } } // Lookup marker UsfmMarker marker = _stylesheet.GetMarker(markerStr.TrimStart('+')); // If starts with a plus and is not a character style, it is an unknown marker if (markerStr.StartsWith("+", StringComparison.Ordinal) && marker.StyleType != UsfmStyleType.Character) { marker = _stylesheet.GetMarker(markerStr); } switch (marker.StyleType) { case UsfmStyleType.Character: // Handle verse special case if ((marker.TextProperties & UsfmTextProperties.Verse) > 0) { tokens.Add(new UsfmToken(UsfmTokenType.Verse, marker, GetNextWord(usfm, ref index, preserveWhitespace))); } else { tokens.Add(new UsfmToken(UsfmTokenType.Character, marker, null)); } break; case UsfmStyleType.Paragraph: // Handle chapter special case if ((marker.TextProperties & UsfmTextProperties.Chapter) > 0) { tokens.Add(new UsfmToken(UsfmTokenType.Chapter, marker, GetNextWord(usfm, ref index, preserveWhitespace))); } else if ((marker.TextProperties & UsfmTextProperties.Book) > 0) { tokens.Add(new UsfmToken(UsfmTokenType.Book, marker, GetNextWord(usfm, ref index, preserveWhitespace))); } else { tokens.Add(new UsfmToken(UsfmTokenType.Paragraph, marker, null)); } break; case UsfmStyleType.Note: tokens.Add(new UsfmToken(UsfmTokenType.Note, marker, GetNextWord(usfm, ref index, preserveWhitespace))); break; case UsfmStyleType.End: tokens.Add(new UsfmToken(UsfmTokenType.End, marker, null)); break; case UsfmStyleType.Unknown: // End tokens are always end tokens, even if unknown if (markerStr.EndsWith("*", StringComparison.Ordinal)) { tokens.Add(new UsfmToken(UsfmTokenType.End, marker, null)); } else { // Handle special case of esb and esbe which might not be in basic stylesheet // but are always sidebars and so should be tokenized as paragraphs if (markerStr == "esb" || markerStr == "esbe") { tokens.Add(new UsfmToken(UsfmTokenType.Paragraph, marker, null)); break; } // Create unknown token with a corresponding end note tokens.Add(new UsfmToken(UsfmTokenType.Unknown, marker, null)); } break; } } // Forces a space to be present in tokenization if immediately // before a token requiring a preceeding CR/LF. This is to ensure // that when written to disk and re-read, that tokenization // will match. For example, "\p test\p here" requires a space // after "test". Also, "\p \em test\em*\p here" requires a space // token inserted after \em* if (!preserveWhitespace) { for (int i = 1; i < tokens.Count; i++) { // If requires newline (verses do, except when after '(' or '[') if (tokens[i].Type == UsfmTokenType.Book || tokens[i].Type == UsfmTokenType.Chapter || tokens[i].Type == UsfmTokenType.Paragraph || (tokens[i].Type == UsfmTokenType.Verse && !(tokens[i - 1].Type == UsfmTokenType.Text && (tokens[i - 1].Text.EndsWith("(", StringComparison.Ordinal) || tokens[i - 1].Text.EndsWith("[", StringComparison.Ordinal))))) { // Add space to text token if (tokens[i - 1].Type == UsfmTokenType.Text) { if (!tokens[i - 1].Text.EndsWith(" ", StringComparison.Ordinal)) { tokens[i - 1] = new UsfmToken(tokens[i - 1].Text + " "); } } else if (tokens[i - 1].Type == UsfmTokenType.End) { // Insert space token after * of end marker tokens.Insert(i, new UsfmToken(UsfmTokenType.Text, null, " ")); i++; } } } } return(tokens); }
public UsfmToken(UsfmTokenType type, UsfmMarker marker, string text) { Type = type; Marker = marker; Text = text; }
private static UsfmMarker ParseMarkerEntry(UsfmMarker marker, List <StylesheetEntry> stylesheetEntries, int entryIndex) { // The following items are present for conformance with // Paratext release 5.0 stylesheets. Release 6.0 and later // follows the guidelines set in InitPropertyMaps. // Make sure \id gets book property if (marker.Marker == "id") { marker.TextProperties |= UsfmTextProperties.Book; } UsfmMarker endMarker = null; while (entryIndex < stylesheetEntries.Count) { StylesheetEntry entry = stylesheetEntries[entryIndex]; ++entryIndex; if (entry.Marker == "marker") { break; } switch (entry.Marker) { case "name": marker.Name = entry.Text; break; case "description": marker.Description = entry.Text; break; case "fontname": marker.FontName = entry.Text; break; case "fontsize": if (entry.Text == "-") { marker.FontSize = 0; } else { int fontSize; if (ParseInteger(entry, out fontSize)) { marker.FontSize = fontSize; } } break; case "xmltag": marker.XmlTag = entry.Text; break; case "encoding": marker.Encoding = entry.Text; break; case "linespacing": int lineSpacing; if (ParseInteger(entry, out lineSpacing)) { marker.LineSpacing = lineSpacing; } break; case "spacebefore": int spaceBefore; if (ParseInteger(entry, out spaceBefore)) { marker.SpaceBefore = spaceBefore; } break; case "spaceafter": int spaceAfter; if (ParseInteger(entry, out spaceAfter)) { marker.SpaceAfter = spaceAfter; } break; case "leftmargin": int leftMargin; if (ParseInteger(entry, out leftMargin)) { marker.LeftMargin = leftMargin; } break; case "rightmargin": int rightMargin; if (ParseInteger(entry, out rightMargin)) { marker.RightMargin = rightMargin; } break; case "firstlineindent": int firstLineIndent; if (ParseFloat(entry, out firstLineIndent)) { marker.FirstLineIndent = firstLineIndent; } break; case "rank": if (entry.Text == "-") { marker.Rank = 0; } else { int rank; if (ParseInteger(entry, out rank)) { marker.Rank = rank; } } break; case "bold": marker.Bold = entry.Text != "-"; break; case "smallcaps": marker.SmallCaps = entry.Text != "-"; break; case "subscript": marker.Subscript = entry.Text != "-"; break; case "italic": marker.Italic = entry.Text != "-"; break; case "regular": marker.Italic = marker.Bold = marker.Superscript = false; marker.Regular = true; break; case "underline": marker.Underline = entry.Text != "-"; break; case "superscript": marker.Superscript = entry.Text != "-"; break; case "testylename": break; // Ignore this tag, later we will use it to tie to FW styles case "notrepeatable": marker.NotRepeatable = entry.Text != "-"; break; case "textproperties": ParseTextProperties(marker, entry); break; case "texttype": ParseTextType(marker, entry); break; case "color": if (entry.Text == "-") { marker.Color = 0; } else { int color; if (ParseInteger(entry, out color)) { marker.Color = color; } } break; case "justification": UsfmJustification justification; if (JustificationMappings.TryGetValue(entry.Text, out justification)) { marker.Justification = justification; } break; case "styletype": UsfmStyleType styleType; if (StyleMappings.TryGetValue(entry.Text, out styleType)) { marker.StyleType = styleType; } break; case "occursunder": marker.OccursUnder.UnionWith(entry.Text.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries)); break; case "endmarker": endMarker = MakeEndMarker(entry.Text); marker.EndMarker = entry.Text; break; } } // If we have not seen an end marker but this is a character style if (marker.StyleType == UsfmStyleType.Character && endMarker == null) { string endMarkerStr = marker.Marker + "*"; endMarker = MakeEndMarker(endMarkerStr); marker.EndMarker = endMarkerStr; } // Special cases if (marker.TextType == UsfmTextType.Other && (marker.TextProperties & UsfmTextProperties.Nonpublishable) == 0 && (marker.TextProperties & UsfmTextProperties.Chapter) == 0 && (marker.TextProperties & UsfmTextProperties.Verse) == 0 && (marker.StyleType == UsfmStyleType.Character || marker.StyleType == UsfmStyleType.Paragraph)) { marker.TextProperties |= UsfmTextProperties.Publishable; } return(endMarker); }