public USFMDocument ParseFromString(string input) { Regex splitRegex = new Regex("\\\\([a-z0-9*]+)([^\\\\]*)"); USFMDocument output = new USFMDocument(); foreach (Match match in splitRegex.Matches(input)) { if (IgnoredTags.Contains(match.Groups[1].Value)) { continue; } ConvertToMarkerResult result = ConvertToMarker(match.Groups[1].Value, match.Groups[2].Value); if (result.marker is TRMarker && !output.GetTypesPathToLastMarker().Contains(typeof(TableBlock))) { output.Insert(new TableBlock()); } output.Insert(result.marker); if (!string.IsNullOrWhiteSpace(result.remainingText)) { output.Insert(new TextBlock(result.remainingText)); } } return(output); }
/// <summary> /// Generate a list of Markers from a string /// </summary> /// <param name="input">USFM String to tokenize</param> /// <returns>A List of Markers based upon the string</returns> private List <Marker> TokenizeFromString(string input) { List <Marker> output = new List <Marker>(); bool isInVMarkerNow = false; foreach (Match match in splitRegex.Matches(input)) { if (IgnoredTags.Contains(match.Groups[1].Value)) { continue; } ConvertToMarkerResult result = ConvertToMarker(match.Groups[1].Value, match.Groups[2].Value); result.marker.Position = match.Index; output.Add(result.marker); //Conditions for the spaces between tags to be included as whitespace in a verse var vMarker = new VMarker(); bool isAllowedByVMarker = vMarker.AllowedContents.Contains(result.marker.GetType()); bool doesNotAllowVMarker = !result.marker.AllowedContents.Contains(typeof(VMarker)); if (result.marker is VMarker) { isInVMarkerNow = true; } if (!isAllowedByVMarker && !(result.marker is VMarker)) { isInVMarkerNow = false; } //deciding when to include textblocks //whitespace textblocks is added to the list when the tag is a Allowed by VMarker, does not allow VMarker, and is current in a VMarker //this solves the problem of \v 1 \tl hello \tl* \tl hello \tl* appearing as hellohello instead of hello hello if (!string.IsNullOrWhiteSpace(result.remainingText) || (!string.IsNullOrEmpty(result.remainingText) && isAllowedByVMarker && doesNotAllowVMarker && isInVMarkerNow)) { output.Add(new TextBlock(result.remainingText)); } } return(output); }
/// <summary> /// Generate a list of Markers from a string /// </summary> /// <param name="input">USFM String to tokenize</param> /// <returns>A List of Markers based upon the string</returns> private List <Marker> TokenizeFromString(string input) { List <Marker> output = new List <Marker>(); foreach (Match match in splitRegex.Matches(input)) { if (IgnoredTags.Contains(match.Groups[1].Value)) { continue; } ConvertToMarkerResult result = ConvertToMarker(match.Groups[1].Value, match.Groups[2].Value); result.marker.Position = match.Index; output.Add(result.marker); if (!string.IsNullOrWhiteSpace(result.remainingText)) { output.Add(new TextBlock(result.remainingText)); } } return(output); }