internal Dacs7TagParserException(TagParserState parseArea, string area, string tag) : base($"Could not extract {Enum.GetName(typeof(TagParserState), parseArea)} from data '{area}'. Full tag was '{tag}'.") { _parseArea = parseArea; ParseData = area; Tag = tag; }
private static bool TryExtractData(ref TagParserResult result, ReadOnlySpan <char> input, ref int indexStart, ref TagParserState state, ref ReadOnlySpan <char> type, int i) { switch (state) { case TagParserState.Area: { if (TryDetectArea(input, out var selector, out var db)) { result.Area = selector; result.DbNumber = db; indexStart = i + 1; state = TagParserState.Offset; return(true); } } break; case TagParserState.Offset: { // TODO: !!!!!!! #if SPANSUPPORT if (int.TryParse(input, out var offset)) #else if (TryConvertSpanToInt32(input, out var offset)) #endif { result.Offset = offset; indexStart = i + 1; state = TagParserState.Type; return(true); } } break; case TagParserState.Type: { type = input; state = TagParserState.NumberOfItems; indexStart = i + 1; return(true); } case TagParserState.NumberOfItems: { if (input.IsEmpty) { return(true); } // TODO: !!!!!!! #if SPANSUPPORT if (ushort.TryParse(input, out var length)) #else if (TryConvertSpanToUShort(input, out var length)) #endif { result.Length = length; state = TagParserState.TypeValidation; return(true); } } break; case TagParserState.TypeValidation: { if (result.Length <= 0) { result.Length = 1; } var offset = result.Offset; var length = result.Length; if (!type.IsEmpty && TryDetectTypes(type, ref length, ref offset, out var vtype, out var rType, out var unicode)) { result.Length = length; result.Offset = offset; result.VarType = vtype; result.ResultType = rType; result.Encoding = unicode; indexStart = i + 1; state = TagParserState.Success; return(true); } } break; case TagParserState.Success: return(true); } return(false); }
private static void Parse(string tag, ref TagParserResult result, ref int indexStart, ref TagParserState state, ref ReadOnlySpan <char> type, ReadOnlySpan <char> data, int index, bool throwException = false) { if (!TryExtractData(ref result, data, ref indexStart, ref state, ref type, index) && throwException) { result.ErrorState = state; ThrowHelper.ThrowTagParseException(TagParserState.Area, data.ToString(), tag); } }
// Return DOMElement instead of Tag, since we -could- return private DOMElement _ParseTag(int startPosition) { // Initialize new Tag and empty Attribute HTMLTag tag = new HTMLTag(startPosition); HTMLTagAttribute currentAttribute = null; // Start looping through the HTML (skip 1 char since we're already at the '<' tagParserState = TagParserState.ExpectingTagName; int currentPosition = startPosition + 1; while (currentPosition < _HTML.Length) { // Read char and advance char chr = _HTML[currentPosition]; switch (tagParserState) { #region TagParserState.ExpectingTagName - Look for an optional '/' and/or a tag name and possibly an ending '>' (if there's a '/' found) /* * MATCHES: * <DIV ATTRIBUTE="FOO" ATTR = 'BAR'> or </DIV> * ‾‾‾ ‾‾‾‾‾ */ // When we're start a tag and waiting for the tag name... case TagParserState.ExpectingTagName: { if (isAlphaNumericChar(chr)) { // A letter in the tag name - add it to sbTemp and read the rest of the tag name tag.TagName = _readAlphaNumericWord(currentPosition); if (tag.TagName.StartsWith("!--")) { // HTML comment HTMLContent comment = new HTMLContent(startPosition, _readUntil(startPosition, "-->")); return(comment); } else { // Any tag conversions? switch (tag.TagName.ToLower()) { case "form": tag = new HTMLForm(tag.StartPosition) { TagName = tag.TagName }; break; case "input": tag = new HTMLInput(tag.StartPosition) { TagName = tag.TagName }; break; case "select": tag = new HTMLSelect(tag.StartPosition) { TagName = tag.TagName }; break; case "option": tag = new HTMLSelectOption(tag.StartPosition) { TagName = tag.TagName }; break; case "textarea": tag = new HTMLTextarea(tag.StartPosition) { TagName = tag.TagName }; break; } // Advance position by name length currentPosition += tag.TagName.Length; tagParserState = TagParserState.ExpectingTagContentsOrEnd; } } else if (chr == '/') { // This is a closing tag like </div> - read the tag name and close it tag.IsClosingTag = true; // Advance to the start of the tag name and read it currentPosition = this._indexOfNextNonWhitespaceChar(currentPosition + 1); tag.TagName = _readAlphaNumericWord(currentPosition); currentPosition += tag.TagName.Length; // Advance to end of tag '>' currentPosition += _readUntil(currentPosition, '>').Length - 1; tagParserState = TagParserState.TagEnded; } } break; #endregion #region TagParserState.ExpectingAttributeNameOrTagEnd - Inside the tag, looking for either alpha chars (start of an attribute), or a '/' self-closing flag, or the closing '>' character case TagParserState.ExpectingTagContentsOrEnd: // Advance to the next non-whitespace char currentPosition = _indexOfNextNonWhitespaceChar(currentPosition); chr = _HTML[currentPosition]; if (chr == '/') { /* MATCHES: <IMG /> * ‾‾ */ // Self-closing tag tag.SelfClosed = true; // Advance to end of tag '>' currentPosition += _readUntil(currentPosition, '>').Length - 1; tagParserState = TagParserState.TagEnded; } else if (chr == '>') { /* MATCHES: <DIV> * ‾ */ // End of tag tagParserState = TagParserState.TagEnded; } else if ((chr == '"') || (chr == '\'')) { // Unnamed, quoted attribute value, like a DOCTYPE dtd path <!DOCTYPE html "blah blah"> // Read the quoted value string attributeValue = _readValue(currentPosition); // Build a new attribute currentAttribute = new HTMLTagAttribute(currentPosition, null, attributeValue, chr.ToString()); // Advance the position currentPosition += attributeValue.Length; // Finish the attribute and clear it currentAttribute.EndPosition = currentPosition; tag.Attributes.Add(currentAttribute); currentAttribute = null; } else if (isAlphaChar(chr)) { /* * MATCHES: * <DIV ATTRIBUTE="FOO" ATTR = 'BAR'> * ‾‾‾‾‾‾‾‾‾ ‾‾‾‾ */ // A letter in the attribute name - read the rest of the attribute string attributeName = _readAlphaNumericWord(currentPosition); currentAttribute = new HTMLTagAttribute(currentPosition, attributeName); // Advance position to the end of the name currentPosition += attributeName.Length; // Do we have an attribute value? int nextNonWhitespaceChar = _indexOfNextNonWhitespaceChar(currentPosition); if (_HTML[nextNonWhitespaceChar] == '=') { // tagParserState = TagParserState.ExpectingAttributeValue; currentPosition = nextNonWhitespaceChar + 1; // Advance to the next non-whitespace char (in case of space-separated values like 'foo = "bar"' nextNonWhitespaceChar = _indexOfNextNonWhitespaceChar(currentPosition); string rawAttributeValue = _readValue(currentPosition); currentAttribute.Value = rawAttributeValue; // Advance position to end of the value currentPosition += rawAttributeValue.Length; } else { // A standalone attributelike <!DOCTYPE html "foobar"> // ‾‾‾‾ } // End of attribute - mark the end position and add to the tag currentAttribute.EndPosition = currentPosition; tag.Attributes.Add(currentAttribute); // Reset attribute currentAttribute = null; } break; #endregion } // End the tag? if (tagParserState == TagParserState.TagEnded) { // Apply transformations? if (_transforms == Transformations.LowercaseNames) { tag.TagName = tag.TagName.ToLower(); foreach (HTMLTagAttribute attr in tag.Attributes) { if (attr.Name != null) { attr.Name = attr.Name.ToLower(); } } } else if (_transforms == Transformations.UppercaseNames) { tag.TagName = tag.TagName.ToUpper(); foreach (HTMLTagAttribute attr in tag.Attributes) { if (attr.Name != null) { attr.Name = attr.Name.ToUpper(); } } } // Remove empty attributes list if (tag.Attributes.Count == 0) { tag.Attributes = null; } // Mark the end position of the tag and return it tag.MarkEndPosition(currentPosition); return(tag); } } // Shouldn't really get here... return(tag); }
public static void ThrowTagParseException(TagParserState area, string v, string tag) => throw new Dacs7TagParserException(area, v, tag);
protected Dacs7TagParserException(SerializationInfo serializationInfo, StreamingContext streamingContext) : base(serializationInfo, streamingContext) { _parseArea = (TagParserState)serializationInfo.GetValue("_parseArea", typeof(TagParserState)); Tag = (string)serializationInfo.GetValue("Tag", typeof(string)); ParseData = (string)serializationInfo.GetValue("ParseData", typeof(string)); }