private static void LookaheadParser(UsfmParserState state, UsfmParser lookaheadParser, string marker, out bool isTokenClosed) { // BEWARE: This method is fairly performance-critical // Determine current marker string endMarker = marker + "*"; // Process tokens until either the start of the stack doesn't match (it was closed // improperly) or a matching close marker is found while (lookaheadParser.ProcessToken()) { UsfmToken currentToken = lookaheadParser.tokens[lookaheadParser.index]; // Check if same marker was reopened without a close bool reopened = currentToken.Marker == marker && lookaheadParser.State.Stack.SequenceEqual(state.Stack); if (reopened) { isTokenClosed = false; return; } // Check if beginning of stack is unchanged. If token is unclosed, it will be unchanged bool markerStillOpen = lookaheadParser.State.Stack.Take(state.Stack.Count).SequenceEqual(state.Stack); if (!markerStillOpen) { // Record whether marker is an end for this marker isTokenClosed = currentToken.Marker == endMarker && currentToken.Type == UsfmTokenType.End; return; } } isTokenClosed = false; }
private void CheckRubyGlossing(UsfmParserState state, string marker, NamedAttribute[] attributes) { var baseText = innerTextBuilder.Value?.ToString(); if (string.IsNullOrEmpty(baseText)) { return; } string[] baseSequences = CharacterSequences(baseText).ToArray(); var glossText = attributes?.FirstOrDefault(a => a.Name == AttributeName.Gloss)?.Value; // empty gloss text will result in a missing gloss attribute error, so just returning rather than creating 2 errors if (string.IsNullOrEmpty(glossText)) { return; } string[] glosses = UsfmToken.ParseRubyGlosses(glossText, false); if (baseSequences.Length > glosses.Length && glosses.Length != 1) { RecordMarkerError(state, marker, Localizer.Str(@"Fewer ruby glosses than base text characters") + markerSlot); } else if (baseSequences.Length < glosses.Length) { RecordMarkerError(state, marker, Localizer.Str(@"More ruby glosses than base text characters") + markerSlot); } }
public override void StartNote(UsfmParserState state, string marker, string caller, string category, bool closed) { prevCharMarker = null; // FB-49128 changed check to allow character style to have an embedded cross reference if (state.NoteTag.Marker != "x" && state.Stack.Count >= 2 && state.Stack[state.Stack.Count - 2].Type == UsfmElementTypes.Char) { string charMarker = state.Stack[state.Stack.Count - 2].Marker; recordError(state.VerseRef, "\\" + charMarker, lastCharMarkerOffset, GetErrorMessage(charStyleNotClosedMessage, charMarker)); } if (!closed) { recordError(state.VerseRef, "\\" + marker, state.VerseOffset, GetErrorMessage(noteNotClosedMessage, marker)); } if (caller.Length == 0) { recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, "#" + missingCallerMessage); } else { // removed caller consistency check from this stand alone version } if (state.ParaTag == null) { recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, "#" + noteNoParaMessage); } prevMarkerWasChapter = false; }
private void HandleWordlistOrGlossaryCitationFormInnerText(UsfmParserState state, string text) { if (innerTextBuilder.Key != null) { innerTextBuilder.Value.Append(text); } }
private void ValidateParagraphTypeTag(UsfmParserState state, string marker) { ReportPendingVerseNoParaError(); if (marker == "id" && paraStack.Count != 0) { // \id marker can only appear at the beginning of the book UsfmParserState idMarkerState = previousTextState ?? state; string text = previousText ?? ""; int offset = idMarkerState.VerseOffset + text.Length; // we can't actully select the \id marker because it changes the verse reference and is // not even considered as a match) recordError(new VerseRef(idMarkerState.VerseRef), "", offset, GetErrorMessage(notHereMessage, marker)); } else if (marker == "nb" && !prevMarkerWasChapter) { // \nb marker can only appear immediately following a chapter marker recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, GetErrorMessage(notHereMessage, marker)); } // Validate paragraph if (!IsParagraphTagValid(paraStack, scrStylesheet.GetTag(marker), true)) { recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, GetErrorMessage(notHereMessage, marker)); } prevMarkerWasChapter = false; }
/// <summary> /// Creates a USFM parser /// </summary> /// <param name="scrStylesheet"></param> /// <param name="tokens">list of tokens to parse</param> /// <param name="verseRef">initial reference for the parser</param> /// <param name="sink">optional sink to send parse events to. Null for none</param> public UsfmParser(ScrStylesheet scrStylesheet, List <UsfmToken> tokens, VerseRef verseRef, UsfmParserSink sink) { this.scrStylesheet = scrStylesheet; this.tokens = tokens; this.state = new UsfmParserState(scrStylesheet, verseRef); this.sink = sink; }
public override void Verse(UsfmParserState state, string number, string marker, string altNumber, string pubNumber) { prevCharMarker = null; if (state.ParaTag == null) { if (!pendingVerseNoParaError.IsDefault) { pendingVerseNoParaError.Verse = pendingVerseNoParaError.VerseNum + "-" + state.VerseRef.VerseNum; } else { pendingVerseNoParaError = new VerseRef(state.VerseRef); pendingVerseNoParaErrorOffset = state.VerseOffset; pendingVerseNoParaErrorMarker = "\\" + marker; } } else { if (state.CharTag != null) { recordError(lastCharMarkerVerse, "\\" + state.CharTag.Marker, lastCharMarkerOffset, GetErrorMessage(charStyleNotClosedMessage, state.CharTag.Marker)); } ValidateOccursUnder(state, marker, state.ParaTag.Marker, false); } prevMarkerWasChapter = false; }
public override void EndPara(UsfmParserState state, string marker) { if (emptyPara && !okToBeEmptyMarkers.Contains(marker)) { recordError(state.VerseRef, "\\" + marker, state.VerseOffset, GetErrorMessage(emptyMarkerMessage, marker)); } }
/// <summary> /// Creates a USFM parser /// </summary> /// <param name="scrStylesheet"></param> /// <param name="tokens">list of tokens to parse</param> /// <param name="state">initial state of the parser</param> /// <param name="sink">optional sink to send parse events to. Null for none</param> /// <param name="tokensPreserveWhitespace">True if the tokens were created while preserving whitespace, /// false otherwise</param> public UsfmParser(ScrStylesheet scrStylesheet, List <UsfmToken> tokens, UsfmParserState state, UsfmParserSink sink, bool tokensPreserveWhitespace = false) { this.scrStylesheet = scrStylesheet; this.tokens = tokens; this.state = state; this.sink = sink; this.tokensPreserveWhitespace = tokensPreserveWhitespace; }
public override void StartSidebar(UsfmParserState state, string marker, string category, bool closed) { ValidateParagraphTypeTag(state, marker); if (!closed) { recordError(state.VerseRef, "\\" + marker, state.VerseOffset, GetErrorMessage(sidebarNotClosedMessage, marker)); } }
public override void Unmatched(UsfmParserState state, string marker) { recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, GetErrorMessage(endNoStartMessage, marker)); if (marker == "w") { CheckWordlistErrors(state, marker, new NamedAttribute[0]); } }
private void ValidateOccursUnder(UsfmParserState state, string marker, string contextMarker, bool includeMarkerInError) { ScrTag tag = scrStylesheet.GetTag(marker); var occursUnder = tag.OccursUnderList; if (occursUnder.Count > 0 && (contextMarker == null || !occursUnder.Contains(contextMarker))) { recordError(new VerseRef(state.VerseRef), includeMarkerInError ? "\\" + marker : "", state.VerseOffset, GetErrorMessage(notHereMessage, marker)); } }
private void CheckWordlistErrors(UsfmParserState state, string marker, NamedAttribute[] attributes) { // FB 47540 - an \w inside antoher \w can cause this to be null. if (innerTextBuilder.Value == null) { return; } // code checking glossary was removed for this stand alone checking tool innerTextBuilder = new KeyValuePair <MarkerLevel, StringBuilder>(); }
public override void StartChar(UsfmParserState state, string markerWithoutPlus, bool closed, bool unknown, params NamedAttribute[] namedAttributes) { if (markerWithoutPlus == "w" || markerWithoutPlus == "rb") { var markerLevel = new MarkerLevel(state); innerTextBuilder = new KeyValuePair <MarkerLevel, StringBuilder>(markerLevel, new StringBuilder()); } if (markerWithoutPlus == "k" && state.VerseRef.Book == "GLO") { var markerLevel = new MarkerLevel(state); innerTextBuilder = new KeyValuePair <MarkerLevel, StringBuilder>(markerLevel, new StringBuilder()); } lastCharMarkerOffset = state.VerseOffset; lastCharMarkerVerse = state.VerseRef.Clone(); if (unknown) { recordError(new VerseRef(state.VerseRef), "\\" + markerWithoutPlus, state.VerseOffset, GetErrorMessage(unknownMarkerMessage, markerWithoutPlus)); } else if (markerWithoutPlus == "rb" && !markerCheck.allowVersion3Usfm) { recordError(new VerseRef(state.VerseRef), "\\" + markerWithoutPlus, state.VerseOffset, GetErrorMessage(unsupportedMarkerMessage, markerWithoutPlus)); } if (markerWithoutPlus == prevCharMarker && repeatedCharMarkersWarnings.Contains(markerWithoutPlus)) { recordError(new VerseRef(state.VerseRef), "\\" + markerWithoutPlus, state.VerseOffset, GetErrorMessage(repeatedCharMarker, markerWithoutPlus)); } if (!closed && MarkerRequiresClose(markerWithoutPlus, state.CharTag)) { recordError(state.VerseRef, "\\" + markerWithoutPlus, state.VerseOffset, GetErrorMessage(charStyleNotClosedMessage, markerWithoutPlus)); } if (state.ParaTag == null) { recordError(new VerseRef(state.VerseRef), "\\" + markerWithoutPlus, state.VerseOffset, GetErrorMessage(charNoParaMessage, markerWithoutPlus)); } if (state.CharTag != null && closed) { ValidateAttributes(state, state.CharTag, markerWithoutPlus, namedAttributes ?? new NamedAttribute[0]); } ValidateCharacterTypeTag(state, markerWithoutPlus); emptyChar = true; }
public override void StartCell(UsfmParserState state, string marker, string align) { ValidateCharacterTypeTag(state, marker); int cellNumber = marker[marker.Length - 1] - '0'; if (cellNumber != nextTableCell) { string expectedMarker = string.Format("\\t{0}{1}", marker[1], nextTableCell); recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, GetErrorMessage(string.Format(missingTableMarker, expectedMarker), marker)); } nextTableCell = cellNumber + 1; }
public override void StartPara(UsfmParserState state, string marker, bool unknown) { prevCharMarker = null; if (unknown) { recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, GetErrorMessage(unknownMarkerMessage, marker)); } emptyPara = true; ValidateParagraphTypeTag(state, marker); }
private void ValidateAttributes(UsfmParserState state, ScrTag tag, string marker, NamedAttribute[] namedAttributes) { if (!markerCheck.allowVersion3Usfm) { if (marker == "fig") { if (namedAttributes.Any(a => !figureAttributes.Contains(a.Name.InternalValue))) { recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, GetErrorMessage(unsupportedAttributesMessage, marker)); } } else if (tag.StyleType == ScrStyleType.scCharacterStyle && HasNonDefaultAttributes(tag, namedAttributes)) { recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, GetErrorMessage(unsupportedAttributesMessage, marker)); } } // for character styles, find missing required attributes string[] missingAttributes = tag.Attributes.Where(a => a.IsRequired && namedAttributes.All(na => na.Name != a.Name)) .Select(a => a.Name.InternalValue).ToArray(); if (missingAttributes.Length > 0) { string errMsg = Localizer.Str(@"Missing required attributes ({0})"); errMsg = string.Format(errMsg, string.Join(", ", missingAttributes)); RecordMarkerError(state, marker, errMsg + markerSlot); } // find attributes that don't start with x- and aren't defined for the character style // also, link attributes of link-href, link-title and link-name are valid on any style // for figures, the standard attributes are already stripped out, so can skip the check for attributes on the CharTag (which will be null) string[] unknownAttributes = namedAttributes.Where( na => !na.Name.InternalValue.StartsWith("x-", StringComparison.OrdinalIgnoreCase)) .Where(na => !linkAttributes.Contains(na.Name.InternalValue)) .Where(na => tag.Attributes.All(a => a.Name != na.Name)) .Select(na => na.Name.InternalValue).ToArray(); if (unknownAttributes.Length > 0) { string errMsg = Localizer.Str(@"Unknown attributes ({0})"); errMsg = string.Format(errMsg, string.Join(", ", unknownAttributes)); RecordMarkerError(state, marker, errMsg + markerSlot); } }
public override void Milestone(UsfmParserState state, string marker, bool startMilestone, NamedAttribute[] namedAttributes) { if (!markerCheck.allowVersion3Usfm) { recordError(new VerseRef(state.VerseRef), "\\" + marker, state.VerseOffset, GetErrorMessage(unsupportedMarkerMessage, marker)); } Tuple <VerseRef, int, string> tuple; if (startMilestone) { if (openMilestones.TryGetValue(marker, out tuple)) { recordError(tuple.Item1, marker, tuple.Item2, "#" + missingMilestoneEnd + " \\" + marker); } openMilestones[marker] = new Tuple <VerseRef, int, string>(state.VerseRef.Clone(), state.VerseOffset, UsfmToken.GetAttribute(namedAttributes, AttributeName.Id)); } else { if (endMilestoneMarkerMap.Count == 0) { foreach (var tag in scrStylesheet.Tags.Where(t => t.StyleType == ScrStyleType.scMilestone)) { endMilestoneMarkerMap[tag.Endmarker] = tag.Marker; } } string startMarker = endMilestoneMarkerMap[marker]; if (openMilestones.TryGetValue(startMarker, out tuple)) { if (tuple.Item3 != UsfmToken.GetAttribute(namedAttributes, AttributeName.Id)) { recordError(tuple.Item1, marker, tuple.Item2, "#" + Localizer.Str("Id on start/end milestones do not match:") + " \\" + startMarker); } openMilestones.Remove(startMarker); } else { recordError(state.VerseRef, marker, state.VerseOffset, "#" + Localizer.Str("End milestone has no matching start:") + " \\" + marker); } } ValidateAttributes(state, scrStylesheet.GetTag(marker), marker, namedAttributes ?? new NamedAttribute[0]); }
private void ValidateCharacterTypeTag(UsfmParserState state, string marker) { prevMarkerWasChapter = false; if (!Canon.IsCanonical(state.VerseRef.BookNum)) // only validate order/occursUnder in Canonical { return; } // Determine if nested styles are used correctly within a cross reference quote bool skipCharStyle = false; int index = state.Stack.Count - 2; UsfmParserElement elem = index >= 0 ? state.Stack[index] : null; while (elem != null && elem.Type != UsfmElementTypes.Note) { if (elem.Marker == "xq") { skipCharStyle = true; break; } index--; elem = index >= 0 ? state.Stack[index] : null; } if (skipCharStyle) { return; // allow any embedded character styles within cross reference quote } string contextMarker = null; if (state.NoteTag != null) { contextMarker = state.NoteTag.Marker; } else if (state.ParaTag != null) { contextMarker = state.ParaTag.Marker; } ValidateOccursUnder(state, marker, contextMarker, true); }
public override void EndChar(UsfmParserState state, string marker, NamedAttribute[] attributes) { if (emptyChar && !okToBeEmptyMarkers.Contains(marker)) { recordError(state.VerseRef, "\\" + marker, state.VerseOffset, GetErrorMessage(emptyMarkerMessage, marker)); } if (marker == "w") { CheckWordlistErrors(state, marker, attributes); } if (marker == "k") { CheckGlossaryCitationFormErrors(state, marker); } if (marker == "rb") { CheckRubyGlossing(state, marker, attributes); } prevMarkerWasChapter = false; prevCharMarker = marker; }
public override void Text(UsfmParserState state, string text) { if (text.Trim().Length > 0) { HandleWordlistOrGlossaryCitationFormInnerText(state, text); emptyPara = false; emptyChar = false; prevCharMarker = null; } previousTextState = state.Clone(); previousText = text; if (state.NoteTag != null && state.CharTag != null && (state.CharTag.Marker == "fr" || state.CharTag.Marker == "xo")) { // removed origin consistency check in this stand alone version } int attrStart = text.IndexOf('|'); if (attrStart >= 0 && state.CharTag != null && state.Stack.Last().IsClosed) { RecordMarkerError(state, state.CharTag.Marker, invalidAttribute); } }
public override void StartSidebar(UsfmParserState state, string marker, string category, bool closed) { this.state.InSideBar = true; }
private void RecordMarkerError(UsfmParserState state, string marker, string message) { recordError(state.VerseRef, "\\" + marker, state.VerseOffset, GetErrorMessage(message, marker)); }
public override void EndSidebar(UsfmParserState state, string marker) { this.state.InSideBar = false; }
internal MarkerLevel(UsfmParserState state) { element = state.Stack.Last(); stackLevel = state.Stack.Count(); }
private void CheckGlossaryCitationFormErrors(UsfmParserState state, string marker) { // removed glossary checking from this stand alone marker check innerTextBuilder = new KeyValuePair <MarkerLevel, StringBuilder>(); }
public override void Chapter(UsfmParserState state, string number, string marker, string altNumber, string pubNumber) { prevCharMarker = null; ValidateParagraphTypeTag(state, marker); prevMarkerWasChapter = true; }
public override void StartBook(UsfmParserState state, string marker, string code) { ValidateParagraphTypeTag(state, marker); }
public override void EndNote(UsfmParserState state, string marker) { // removed origin consistent check from this stand alone version }
public override void StartRow(UsfmParserState state, string marker) { ValidateParagraphTypeTag(state, marker); nextTableCell = 1; }