예제 #1
0
        private void Parse(string stylesheetFileName)
        {
            List <StylesheetEntry> entries = SplitStylesheet(File.ReadAllLines(stylesheetFileName));

            HashSet <string> foundStyles = new HashSet <string>();

            for (int i = 0; i < entries.Count; ++i)
            {
                StylesheetEntry entry = entries[i];

                if (entry.Marker != "marker")
                {
                    continue;
                }

                string[] parts = entry.Text.Split();
                if (parts.Length > 1 && parts[1] == "-")
                {
                    // If the entry looks like "\marker xy -" remove the tag and its end tag if any
                    _markers.Remove(parts[0]);
                    _markers.Remove(parts[0] + "*");
                    continue;
                }

                UsfmMarker marker    = CreateMarker(entry.Text);
                UsfmMarker endMarker = ParseMarkerEntry(marker, entries, i + 1);

                if (endMarker != null && !_markers.ContainsKey(endMarker.Marker))
                {
                    _markers[endMarker.Marker] = endMarker;
                }

                foundStyles.Add(entry.Text);
            }
        }
예제 #2
0
        private static void ParseTextProperties(UsfmMarker qTag, StylesheetEntry entry)
        {
            string text = entry.Text.ToLowerInvariant();

            string[] parts = text.Split();

            foreach (string part in parts)
            {
                if (part.Trim() == "")
                {
                    continue;
                }

                UsfmTextProperties textProperty;
                if (TextPropertyMappings.TryGetValue(part, out textProperty))
                {
                    qTag.TextProperties |= textProperty;
                }
            }

            if ((qTag.TextProperties & UsfmTextProperties.Nonpublishable) > 0)
            {
                qTag.TextProperties &= ~UsfmTextProperties.Publishable;
            }
        }
예제 #3
0
        private void Parse(string stylesheetFileName)
        {
            IEnumerable <string> lines;

            if (File.Exists(stylesheetFileName))
            {
                lines = File.ReadAllLines(stylesheetFileName);
            }
            else
            {
                string fileName = Path.GetFileName(stylesheetFileName);
                if (fileName == "usfm.sty" || fileName == "usfm_sb.sty")
                {
                    lines = GetEmbeddedStylesheet(fileName);
                }
                else
                {
                    throw new ArgumentException("The stylesheet does not exist.", nameof(stylesheetFileName));
                }
            }

            List <StylesheetEntry> entries = SplitStylesheet(lines);

            HashSet <string> foundStyles = new HashSet <string>();

            for (int i = 0; i < entries.Count; ++i)
            {
                StylesheetEntry entry = entries[i];

                if (entry.Marker != "marker")
                {
                    continue;
                }

                string[] parts = entry.Text.Split();
                if (parts.Length > 1 && parts[1] == "-")
                {
                    // If the entry looks like "\marker xy -" remove the tag and its end tag if any
                    _markers.Remove(parts[0]);
                    _markers.Remove(parts[0] + "*");
                    continue;
                }

                UsfmMarker marker    = CreateMarker(entry.Text);
                UsfmMarker endMarker = ParseMarkerEntry(marker, entries, i + 1);

                if (endMarker != null && !_markers.ContainsKey(endMarker.Marker))
                {
                    _markers[endMarker.Marker] = endMarker;
                }

                foundStyles.Add(entry.Text);
            }
        }
예제 #4
0
        private UsfmMarker CreateMarker(string markerStr)
        {
            // If tag already exists update with addtl info (normally from custom.sty)
            UsfmMarker marker;

            if (!_markers.TryGetValue(markerStr, out marker))
            {
                marker = new UsfmMarker(markerStr);
                if (markerStr != "c" && markerStr != "v")
                {
                    marker.TextProperties = UsfmTextProperties.Publishable;
                }
                _markers[markerStr] = marker;
            }

            return(marker);
        }
예제 #5
0
        private static void ParseTextType(UsfmMarker qTag, StylesheetEntry entry)
        {
            if (entry.Text.Equals("chapternumber", StringComparison.OrdinalIgnoreCase))
            {
                qTag.TextProperties |= UsfmTextProperties.Chapter;
            }
            if (entry.Text.Equals("versenumber", StringComparison.CurrentCultureIgnoreCase))
            {
                qTag.TextProperties |= UsfmTextProperties.Verse;
            }

            UsfmTextType textType;

            if (TextTypeMappings.TryGetValue(entry.Text, out textType))
            {
                qTag.TextType = textType;
            }
        }
예제 #6
0
        public IEnumerable <UsfmToken> Parse(string usfm, bool preserveWhitespace = false)
        {
            List <UsfmToken> tokens = new List <UsfmToken>();

            int index = 0;                  // Current position

            while (index < usfm.Length)
            {
                int nextMarkerIndex = (index < usfm.Length - 1) ? usfm.IndexOf('\\', index + 1) : -1;
                if (nextMarkerIndex == -1)
                {
                    nextMarkerIndex = usfm.Length;
                }

                // If text, create text token until end or next \
                var ch = usfm[index];
                if (ch != '\\')
                {
                    string text = usfm.Substring(index, nextMarkerIndex - index);
                    if (!preserveWhitespace)
                    {
                        text = RegularizeSpaces(text);
                    }

                    tokens.Add(new UsfmToken(UsfmTokenType.Text, null, text));

                    index = nextMarkerIndex;
                    continue;
                }

                // Get marker (and move past whitespace or star ending)
                index++;
                int markerStart = index;
                while (index < usfm.Length)
                {
                    ch = usfm[index];

                    // Backslash starts a new marker
                    if (ch == '\\')
                    {
                        break;
                    }

                    // End star is part of marker
                    if (ch == '*')
                    {
                        index++;
                        break;
                    }

                    if (IsNonSemanticWhiteSpace(ch))
                    {
                        // Preserve whitespace if needed, otherwise skip
                        if (!preserveWhitespace)
                        {
                            index++;
                        }
                        break;
                    }
                    index++;
                }
                string markerStr = usfm.Substring(markerStart, index - markerStart).TrimEnd();

                // Multiple whitespace after non-end marker is ok
                if (!markerStr.EndsWith("*", StringComparison.Ordinal) && !preserveWhitespace)
                {
                    while ((index < usfm.Length) && IsNonSemanticWhiteSpace(usfm[index]))
                    {
                        index++;
                    }
                }

                // Lookup marker
                UsfmMarker marker = _stylesheet.GetMarker(markerStr.TrimStart('+'));

                // If starts with a plus and is not a character style, it is an unknown marker
                if (markerStr.StartsWith("+", StringComparison.Ordinal) && marker.StyleType != UsfmStyleType.Character)
                {
                    marker = _stylesheet.GetMarker(markerStr);
                }

                switch (marker.StyleType)
                {
                case UsfmStyleType.Character:
                    // Handle verse special case
                    if ((marker.TextProperties & UsfmTextProperties.Verse) > 0)
                    {
                        tokens.Add(new UsfmToken(UsfmTokenType.Verse, marker, GetNextWord(usfm, ref index, preserveWhitespace)));
                    }
                    else
                    {
                        tokens.Add(new UsfmToken(UsfmTokenType.Character, marker, null));
                    }
                    break;

                case UsfmStyleType.Paragraph:
                    // Handle chapter special case
                    if ((marker.TextProperties & UsfmTextProperties.Chapter) > 0)
                    {
                        tokens.Add(new UsfmToken(UsfmTokenType.Chapter, marker, GetNextWord(usfm, ref index, preserveWhitespace)));
                    }
                    else if ((marker.TextProperties & UsfmTextProperties.Book) > 0)
                    {
                        tokens.Add(new UsfmToken(UsfmTokenType.Book, marker, GetNextWord(usfm, ref index, preserveWhitespace)));
                    }
                    else
                    {
                        tokens.Add(new UsfmToken(UsfmTokenType.Paragraph, marker, null));
                    }
                    break;

                case UsfmStyleType.Note:
                    tokens.Add(new UsfmToken(UsfmTokenType.Note, marker, GetNextWord(usfm, ref index, preserveWhitespace)));
                    break;

                case UsfmStyleType.End:
                    tokens.Add(new UsfmToken(UsfmTokenType.End, marker, null));
                    break;

                case UsfmStyleType.Unknown:
                    // End tokens are always end tokens, even if unknown
                    if (markerStr.EndsWith("*", StringComparison.Ordinal))
                    {
                        tokens.Add(new UsfmToken(UsfmTokenType.End, marker, null));
                    }
                    else
                    {
                        // Handle special case of esb and esbe which might not be in basic stylesheet
                        // but are always sidebars and so should be tokenized as paragraphs
                        if (markerStr == "esb" || markerStr == "esbe")
                        {
                            tokens.Add(new UsfmToken(UsfmTokenType.Paragraph, marker, null));
                            break;
                        }
                        // Create unknown token with a corresponding end note
                        tokens.Add(new UsfmToken(UsfmTokenType.Unknown, marker, null));
                    }
                    break;
                }
            }

            // Forces a space to be present in tokenization if immediately
            // before a token requiring a preceeding CR/LF. This is to ensure
            // that when written to disk and re-read, that tokenization
            // will match. For example, "\p test\p here" requires a space
            // after "test". Also, "\p \em test\em*\p here" requires a space
            // token inserted after \em*
            if (!preserveWhitespace)
            {
                for (int i = 1; i < tokens.Count; i++)
                {
                    // If requires newline (verses do, except when after '(' or '[')
                    if (tokens[i].Type == UsfmTokenType.Book ||
                        tokens[i].Type == UsfmTokenType.Chapter ||
                        tokens[i].Type == UsfmTokenType.Paragraph ||
                        (tokens[i].Type == UsfmTokenType.Verse &&
                         !(tokens[i - 1].Type == UsfmTokenType.Text &&
                           (tokens[i - 1].Text.EndsWith("(", StringComparison.Ordinal) || tokens[i - 1].Text.EndsWith("[", StringComparison.Ordinal)))))
                    {
                        // Add space to text token
                        if (tokens[i - 1].Type == UsfmTokenType.Text)
                        {
                            if (!tokens[i - 1].Text.EndsWith(" ", StringComparison.Ordinal))
                            {
                                tokens[i - 1] = new UsfmToken(tokens[i - 1].Text + " ");
                            }
                        }
                        else if (tokens[i - 1].Type == UsfmTokenType.End)
                        {
                            // Insert space token after * of end marker
                            tokens.Insert(i, new UsfmToken(UsfmTokenType.Text, null, " "));
                            i++;
                        }
                    }
                }
            }

            return(tokens);
        }
예제 #7
0
 public UsfmToken(UsfmTokenType type, UsfmMarker marker, string text)
 {
     Type   = type;
     Marker = marker;
     Text   = text;
 }
예제 #8
0
        private static UsfmMarker ParseMarkerEntry(UsfmMarker marker, List <StylesheetEntry> stylesheetEntries, int entryIndex)
        {
            // The following items are present for conformance with
            // Paratext release 5.0 stylesheets.  Release 6.0 and later
            // follows the guidelines set in InitPropertyMaps.
            // Make sure \id gets book property
            if (marker.Marker == "id")
            {
                marker.TextProperties |= UsfmTextProperties.Book;
            }

            UsfmMarker endMarker = null;

            while (entryIndex < stylesheetEntries.Count)
            {
                StylesheetEntry entry = stylesheetEntries[entryIndex];
                ++entryIndex;

                if (entry.Marker == "marker")
                {
                    break;
                }

                switch (entry.Marker)
                {
                case "name":
                    marker.Name = entry.Text;
                    break;

                case "description":
                    marker.Description = entry.Text;
                    break;

                case "fontname":
                    marker.FontName = entry.Text;
                    break;

                case "fontsize":
                    if (entry.Text == "-")
                    {
                        marker.FontSize = 0;
                    }
                    else
                    {
                        int fontSize;
                        if (ParseInteger(entry, out fontSize))
                        {
                            marker.FontSize = fontSize;
                        }
                    }
                    break;

                case "xmltag":
                    marker.XmlTag = entry.Text;
                    break;

                case "encoding":
                    marker.Encoding = entry.Text;
                    break;

                case "linespacing":
                    int lineSpacing;
                    if (ParseInteger(entry, out lineSpacing))
                    {
                        marker.LineSpacing = lineSpacing;
                    }
                    break;

                case "spacebefore":
                    int spaceBefore;
                    if (ParseInteger(entry, out spaceBefore))
                    {
                        marker.SpaceBefore = spaceBefore;
                    }
                    break;

                case "spaceafter":
                    int spaceAfter;
                    if (ParseInteger(entry, out spaceAfter))
                    {
                        marker.SpaceAfter = spaceAfter;
                    }
                    break;

                case "leftmargin":
                    int leftMargin;
                    if (ParseInteger(entry, out leftMargin))
                    {
                        marker.LeftMargin = leftMargin;
                    }
                    break;

                case "rightmargin":
                    int rightMargin;
                    if (ParseInteger(entry, out rightMargin))
                    {
                        marker.RightMargin = rightMargin;
                    }
                    break;

                case "firstlineindent":
                    int firstLineIndent;
                    if (ParseFloat(entry, out firstLineIndent))
                    {
                        marker.FirstLineIndent = firstLineIndent;
                    }
                    break;

                case "rank":
                    if (entry.Text == "-")
                    {
                        marker.Rank = 0;
                    }
                    else
                    {
                        int rank;
                        if (ParseInteger(entry, out rank))
                        {
                            marker.Rank = rank;
                        }
                    }
                    break;

                case "bold":
                    marker.Bold = entry.Text != "-";
                    break;

                case "smallcaps":
                    marker.SmallCaps = entry.Text != "-";
                    break;

                case "subscript":
                    marker.Subscript = entry.Text != "-";
                    break;

                case "italic":
                    marker.Italic = entry.Text != "-";
                    break;

                case "regular":
                    marker.Italic  = marker.Bold = marker.Superscript = false;
                    marker.Regular = true;
                    break;

                case "underline":
                    marker.Underline = entry.Text != "-";
                    break;

                case "superscript":
                    marker.Superscript = entry.Text != "-";
                    break;

                case "testylename":
                    break;                             // Ignore this tag, later we will use it to tie to FW styles

                case "notrepeatable":
                    marker.NotRepeatable = entry.Text != "-";
                    break;

                case "textproperties":
                    ParseTextProperties(marker, entry);
                    break;

                case "texttype":
                    ParseTextType(marker, entry);
                    break;

                case "color":
                    if (entry.Text == "-")
                    {
                        marker.Color = 0;
                    }
                    else
                    {
                        int color;
                        if (ParseInteger(entry, out color))
                        {
                            marker.Color = color;
                        }
                    }
                    break;

                case "justification":
                    UsfmJustification justification;
                    if (JustificationMappings.TryGetValue(entry.Text, out justification))
                    {
                        marker.Justification = justification;
                    }
                    break;

                case "styletype":
                    UsfmStyleType styleType;
                    if (StyleMappings.TryGetValue(entry.Text, out styleType))
                    {
                        marker.StyleType = styleType;
                    }
                    break;

                case "occursunder":
                    marker.OccursUnder.UnionWith(entry.Text.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries));
                    break;

                case "endmarker":
                    endMarker        = MakeEndMarker(entry.Text);
                    marker.EndMarker = entry.Text;
                    break;
                }
            }

            // If we have not seen an end marker but this is a character style
            if (marker.StyleType == UsfmStyleType.Character && endMarker == null)
            {
                string endMarkerStr = marker.Marker + "*";
                endMarker        = MakeEndMarker(endMarkerStr);
                marker.EndMarker = endMarkerStr;
            }

            // Special cases
            if (marker.TextType == UsfmTextType.Other &&
                (marker.TextProperties & UsfmTextProperties.Nonpublishable) == 0 &&
                (marker.TextProperties & UsfmTextProperties.Chapter) == 0 &&
                (marker.TextProperties & UsfmTextProperties.Verse) == 0 &&
                (marker.StyleType == UsfmStyleType.Character || marker.StyleType == UsfmStyleType.Paragraph))
            {
                marker.TextProperties |= UsfmTextProperties.Publishable;
            }

            return(endMarker);
        }