/// <summary> /// Get specified text as a list of tokens. /// </summary> public List <UsfmToken> GetUsfmTokens(int bookNum) { List <UsfmToken> tokens; tokens = UsfmToken.Tokenize(stylesheet, bookText, false); return(tokens); }
private void CheckRubyGlossing(UsfmParserState state, string marker, NamedAttribute[] attributes) { var baseText = innerTextBuilder.Value?.ToString(); if (string.IsNullOrEmpty(baseText)) { return; } string[] baseSequences = CharacterSequences(baseText).ToArray(); var glossText = attributes?.FirstOrDefault(a => a.Name == AttributeName.Gloss)?.Value; // empty gloss text will result in a missing gloss attribute error, so just returning rather than creating 2 errors if (string.IsNullOrEmpty(glossText)) { return; } string[] glosses = UsfmToken.ParseRubyGlosses(glossText, false); if (baseSequences.Length > glosses.Length && glosses.Length != 1) { RecordMarkerError(state, marker, Localizer.Str(@"Fewer ruby glosses than base text characters") + markerSlot); } else if (baseSequences.Length < glosses.Length) { RecordMarkerError(state, marker, Localizer.Str(@"More ruby glosses than base text characters") + markerSlot); } }
private static void LookaheadParser(UsfmParserState state, UsfmParser lookaheadParser, string marker, out bool isTokenClosed) { // BEWARE: This method is fairly performance-critical // Determine current marker string endMarker = marker + "*"; // Process tokens until either the start of the stack doesn't match (it was closed // improperly) or a matching close marker is found while (lookaheadParser.ProcessToken()) { UsfmToken currentToken = lookaheadParser.tokens[lookaheadParser.index]; // Check if same marker was reopened without a close bool reopened = currentToken.Marker == marker && lookaheadParser.State.Stack.SequenceEqual(state.Stack); if (reopened) { isTokenClosed = false; return; } // Check if beginning of stack is unchanged. If token is unclosed, it will be unchanged bool markerStillOpen = lookaheadParser.State.Stack.Take(state.Stack.Count).SequenceEqual(state.Stack); if (!markerStillOpen) { // Record whether marker is an end for this marker isTokenClosed = currentToken.Marker == endMarker && currentToken.Type == UsfmTokenType.End; return; } } isTokenClosed = false; }
/// <summary> /// Determines if token is a jump to a reference /// </summary> /// <param name="token"></param> /// <returns></returns> bool IsRef(UsfmToken token) { return((index < tokens.Count - 2) && (tokens[index + 1].Text != null) && (tokens[index + 1].Text.Contains("|")) && (tokens[index + 2].Type == UsfmTokenType.End) && (tokens[index + 2].Marker == token.EndMarker) && (token.Marker == "ref")); }
public override void Milestone(UsfmParserState state, string marker, bool startMilestone, NamedAttribute[] namedAttributes) { if (!markerCheck.allowVersion3Usfm) { recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, GetErrorMessage(unsupportedMarkerMessage, marker)); } Tuple <VerseRef, int, string> tuple; if (startMilestone) { if (openMilestones.TryGetValue(marker, out tuple)) { recordError(tuple.Item1, marker, tuple.Item2, "#" + missingMilestoneEnd + " \\" + marker); } openMilestones[marker] = new Tuple <VerseRef, int, string>(state.VerseRef.Clone(), state.VerseOffset, UsfmToken.GetAttribute(namedAttributes, AttributeName.Id)); } else { if (endMilestoneMarkerMap.Count == 0) { foreach (var tag in scrStylesheet.Tags.Where(t => t.StyleType == ScrStyleType.scMilestone)) { endMilestoneMarkerMap[tag.Endmarker] = tag.Marker; } } string startMarker = endMilestoneMarkerMap[marker]; if (openMilestones.TryGetValue(startMarker, out tuple)) { if (tuple.Item3 != UsfmToken.GetAttribute(namedAttributes, AttributeName.Id)) { recordError(tuple.Item1, marker, tuple.Item2, "#" + Localizer.Str("Id on start/end milestones do not match:") + " \\" + startMarker); } openMilestones.Remove(startMarker); } else { recordError(state.VerseRef, marker, state.VerseOffset, "#" + Localizer.Str("End milestone has no matching start:") + " \\" + marker); } } ValidateAttributes(state, scrStylesheet.GetTag(marker), marker, namedAttributes ?? new NamedAttribute[0]); }
/// <summary> /// Determines if token is a cell of a table /// </summary> /// <param name="token"></param> /// <returns></returns> bool IsCell(UsfmToken token) { return(token.Type == UsfmTokenType.Character && (token.Marker.StartsWith("th") || token.Marker.StartsWith("tc")) && State.Stack.Exists(elem => elem.Type == UsfmElementTypes.Row)); }
/// <summary> /// Processes a single token /// </summary> /// <returns>false if there were no more tokens process</returns> public bool ProcessToken() { // If past end if (index >= tokens.Count - 1) { return(false); } // Move to next token index++; // Update verse offset with previous token (since verse offset is from start of current token) if (index > 0) { State.VerseOffset += tokens[index - 1].GetLength(false, !tokensPreserveWhitespace); } // Skip over tokens that are to be skipped, ensuring that // SpecialToken state is true. if (skip > 0) { skip--; State.SpecialToken = true; return(true); } // Reset special token and figure status State.SpecialToken = false; UsfmToken token = tokens[index]; // Switch unknown types to either character or paragraph UsfmTokenType tokenType = token.Type; if (tokenType == UsfmTokenType.Unknown) { tokenType = DetermineUnknownTokenType(); } if (sink != null && !string.IsNullOrEmpty(token.Marker)) { sink.GotMarker(State, token.Marker); } // Close open elements switch (tokenType) { case UsfmTokenType.Book: case UsfmTokenType.Chapter: CloseAll(); break; case UsfmTokenType.Paragraph: // Handle special case of table rows if (token.Marker == "tr") { // Close all but table and sidebar while (State.Stack.Count > 0 && Peek().Type != UsfmElementTypes.Table && Peek().Type != UsfmElementTypes.Sidebar) { CloseElement(); } break; } // Handle special case of sidebars if (token.Marker == "esb") { // Close all CloseAll(); break; } // Close all but sidebar while (State.Stack.Count > 0 && Peek().Type != UsfmElementTypes.Sidebar) { CloseElement(); } break; case UsfmTokenType.Character: // Handle special case of table cell if (IsCell(token)) { // Close until row while (Peek().Type != UsfmElementTypes.Row) { CloseElement(); } break; } // Handle refs if (IsRef(token)) { // Refs don't close anything break; } // If non-nested character style, close all character styles if (!token.Marker.StartsWith("+")) { CloseCharStyles(); } break; case UsfmTokenType.Verse: CloseNote(); break; case UsfmTokenType.Note: CloseNote(); break; case UsfmTokenType.End: // If end marker for an active note if (State.Stack.Exists(e => e.Type == UsfmElementTypes.Note && (e.Marker + "*" == token.Marker))) { CloseNote(); break; } // If end marker for a character style on stack, close it // If no matching end marker, close all character styles on top of stack UsfmParserElement elem; bool unmatched = true; while (State.Stack.Count > 0) { elem = Peek(); if (elem.Type != UsfmElementTypes.Char) { break; } CloseElement(); // Determine if a + prefix is needed to close it (was nested char style) bool plusPrefix = (State.Stack.Count > 0 && Peek().Type == UsfmElementTypes.Char); // If is a match if ((plusPrefix ? "+" : "") + elem.Marker + "*" == token.Marker) { unmatched = false; break; } } // Unmatched end marker if (unmatched) { if (sink != null) { sink.Unmatched(State, token.Marker); } } break; } // Handle tokens switch (tokenType) { case UsfmTokenType.Book: State.Stack.Add(new UsfmParserElement(UsfmElementTypes.Book, token.Marker)); // Code is always upper case string code = token.Data[0].ToUpperInvariant(); // Update verse ref. Leave book alone if not empty to prevent parsing errors // on books with bad id lines. if (State.VerseRef.Book == "" && Canon.BookIdToNumber(code) != 0) { State.VerseRef.Book = code; } State.VerseRef.ChapterNum = 1; State.VerseRef.VerseNum = 0; State.VerseOffset = 0; // Book start. if (sink != null) { sink.StartBook(State, token.Marker, code); } break; case UsfmTokenType.Chapter: // Get alternate chapter number string altChapter = null; string pubChapter = null; if (!InventoryMode) { if (index < tokens.Count - 3 && tokens[index + 1].Marker == "ca" && tokens[index + 2].Text != null && tokens[index + 3].Marker == "ca*") { altChapter = tokens[index + 2].Text.Trim(); skip += 3; // Skip blank space after if present if (index + skip < tokens.Count - 1 && tokens[index + skip + 1].Text != null && tokens[index + skip + 1].Text.Trim().Length == 0) { skip++; } } // Get publishable chapter number if (index + skip < tokens.Count - 2 && tokens[index + skip + 1].Marker == "cp" && tokens[index + skip + 2].Text != null) { pubChapter = tokens[index + skip + 2].Text.Trim(); skip += 2; } } // Chapter State.VerseRef.Chapter = token.Data[0]; State.VerseRef.VerseNum = 0; // Verse offset is not zeroed for chapter 1, as it is part of intro if (State.VerseRef.ChapterNum != 1) { State.VerseOffset = 0; } if (sink != null) { sink.Chapter(State, token.Data[0], token.Marker, altChapter, pubChapter); } break; case UsfmTokenType.Verse: string pubVerse = null; string altVerse = null; if (!InventoryMode) { if (index < tokens.Count - 3 && tokens[index + 1].Marker == "va" && tokens[index + 2].Text != null && tokens[index + 3].Marker == "va*") { // Get alternate verse number altVerse = tokens[index + 2].Text.Trim(); skip += 3; } if (index + skip < tokens.Count - 3 && tokens[index + skip + 1].Marker == "vp" && tokens[index + skip + 2].Text != null && tokens[index + skip + 3].Marker == "vp*") { // Get publishable verse number pubVerse = tokens[index + skip + 2].Text.Trim(); skip += 3; } } // Verse State.VerseRef.Verse = token.Data[0]; State.VerseOffset = 0; if (sink != null) { sink.Verse(State, token.Data[0], token.Marker, altVerse, pubVerse); } break; case UsfmTokenType.Paragraph: // Handle special case of table rows if (token.Marker == "tr") { // Start table if not open if (State.Stack.TrueForAll(e => e.Type != UsfmElementTypes.Table)) { State.Stack.Add(new UsfmParserElement(UsfmElementTypes.Table, null)); if (sink != null) { sink.StartTable(State); } } State.Stack.Add(new UsfmParserElement(UsfmElementTypes.Row, token.Marker)); // Row start if (sink != null) { sink.StartRow(State, token.Marker); } break; } // Handle special case of sidebars if (token.Marker == "esb") { bool isClosed = IsStudyBibleItemClosed("esb", "esbe"); // TODO - see FB 23934 // Would like to only add start sidebar if it is closed - adding unclosed marker will cause // an end marker to be created in an unexpected place in the editor. // if (isClosed) State.Stack.Add(new UsfmParserElement(UsfmElementTypes.Sidebar, token.Marker)); // Look for category string sidebarCategory = null; if (index < tokens.Count - 3 && tokens[index + 1].Marker == "cat" && tokens[index + 2].Text != null && tokens[index + 3].Marker == "cat*") { // Get category sidebarCategory = tokens[index + 2].Text.Trim(); skip += 3; } if (sink != null) { sink.StartSidebar(State, token.Marker, sidebarCategory, isClosed); } break; } // Close sidebar if in sidebar if (token.Marker == "esbe") { if (State.Stack.Exists(e => e.Type == UsfmElementTypes.Sidebar)) { CloseAll(); } else if (sink != null) { sink.Unmatched(State, token.Marker); } break; } State.Stack.Add(new UsfmParserElement(UsfmElementTypes.Para, token.Marker)); // Paragraph opening if (sink != null) { sink.StartPara(State, token.Marker, token.Type == UsfmTokenType.Unknown); } break; case UsfmTokenType.Character: // Handle special case of table cells (treated as special character style) if (IsCell(token)) { string align = "start"; if (token.Marker.Length > 2 && token.Marker[2] == 'c') { align = "center"; } else if (token.Marker.Length > 2 && token.Marker[2] == 'r') { align = "end"; } State.Stack.Add(new UsfmParserElement(UsfmElementTypes.Cell, token.Marker)); if (sink != null) { sink.StartCell(State, token.Marker, align); } break; } if (IsRef(token)) { // xrefs are special tokens (they do not stand alone) State.SpecialToken = true; string display; string target; ParseDisplayAndTarget(out display, out target); skip += 2; if (sink != null) { sink.Ref(State, token.Marker, display, target); } break; } string actualMarker; bool invalidMarker = false; if (token.Marker.StartsWith("+")) { // Only strip + if properly nested actualMarker = state.CharTag != null?token.Marker.TrimStart('+') : token.Marker; invalidMarker = state.CharTag == null; } else { actualMarker = token.Marker; } State.Stack.Add(new UsfmParserElement(UsfmElementTypes.Char, actualMarker, tokens[index].Attributes)); if (sink != null) { bool charIsClosed = IsTokenClosed(); State.Stack.Last().IsClosed = charIsClosed; // save for attribute check in Text method sink.StartChar(State, actualMarker, charIsClosed, token.Type == UsfmTokenType.Unknown || invalidMarker, tokens[index].Attributes); } break; case UsfmTokenType.Note: // Look for category string noteCategory = null; if (index < tokens.Count - 3 && tokens[index + 1].Marker == "cat" && tokens[index + 2].Text != null && tokens[index + 3].Marker == "cat*") { // Get category noteCategory = tokens[index + 2].Text.Trim(); skip += 3; } State.Stack.Add(new UsfmParserElement(UsfmElementTypes.Note, token.Marker)); if (sink != null) { sink.StartNote(State, token.Marker, token.Data[0], noteCategory, IsTokenClosed()); } break; case UsfmTokenType.Text: string text = token.Text; // If last token before a paragraph, book or chapter, esb, esbe (both are paragraph types), // or at very end, strip final space // This is because USFM requires these to be on a new line, therefore adding whitespace if ((index == tokens.Count - 1 || tokens[index + 1].Type == UsfmTokenType.Paragraph || tokens[index + 1].Type == UsfmTokenType.Book || tokens[index + 1].Type == UsfmTokenType.Chapter) && text.Length > 0 && text[text.Length - 1] == ' ') { text = text.Substring(0, text.Length - 1); } if (sink != null) { // Replace ~ with nbsp text = text.Replace('~', '\u00A0'); // Replace // with <optbreak/> foreach (string str in optBreakSplitter.Split(text)) { if (str == "//") { sink.OptBreak(state); } else { sink.Text(state, str); } } } break; case UsfmTokenType.Milestone: case UsfmTokenType.MilestoneEnd: // currently, parse state doesn't need to be update, so just inform the sink about the milestone. sink?.Milestone(state, token.Marker, token.Type == UsfmTokenType.Milestone, token.Attributes); break; } return(true); }