/// <summary> /// </summary> /// <param name="buf"> </param> /// <param name="offset"> </param> /// <param name="ct"> </param> /// <param name="tok"> </param> private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_Depth--; m_ns.PopScope(); if (current == null) { // end of doc if (OnStreamEnd != null) { OnStreamEnd(this, m_root); } // FireOnDocumentEnd(); return; } // if (current.Name != name) // throw new Exception("Invalid end tag: " + name + // " != " + current.Name); var parent = (Element)current.Parent; if (parent == null) { DoRaiseOnStreamElement(current); // if (OnStreamElement!=null) // OnStreamElement(this, current); // FireOnElement(current); } current = parent; }
private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_ns.PopScope(); if (m_elem == null) {// end of doc FireOnDocumentEnd(); return; } string name = null; if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_NO_ATTS)) { name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); } else { name = utf.GetString(buf, offset + m_enc.MinBytesPerChar * 2, ct.NameEnd - offset - m_enc.MinBytesPerChar * 2); } //workaround for iTeleport bug, that sends xmlns prefix //and exception is thrown in .NET Framework 2.0 //replace prefix for unsupported, which is then ignored //here end tag is workarounded to match replaced start and end tag if (name.StartsWith("xmlns")) { name = string.Format("unsupported:{0}", name.Substring("xmlns:".Length)); } if (m_elem.Name != name) { throw new XmlException("Invalid end tag: " + name + " != " + m_elem.Name); } XmlElement parent = (XmlElement)m_elem.ParentNode; if (parent == null) { FireOnElement(m_elem); } m_elem = parent; }
internal Expression ParseContent() // E -> C E' { if (this.input.Peek() is ContentToken) { ContentToken ct = (ContentToken)this.input.Read(); Expression res = ContentMatchExpression.CreateContentMatchExpression(ct); return(this.ParseExPrime(res)); } else { return(null); } }
internal static int ParseInt(string header, ContentToken token) { int i; string content = token.Content; if (!int.TryParse(content.Substring(header.Length).Trim('"'), out i)) { throw new ParsingException($"{content} is not of '{header}{nameof(Int32)}' format", token.Index); } else { return(i); } }
internal static DateTime ParseDateTime(string header, ContentToken token) { DateTime dt; string content = token.Content; if (!DateTime.TryParse(content.Substring(header.Length).Trim('"'), out dt)) { throw new ParsingException($"{content} is not of '{header}{nameof(DateTime)}' format", token.Index); } else { return(dt); } }
private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_Depth--; m_NamespaceStack.Pop(); if (current == null) { // end of doc if (OnStreamEnd != null) { OnStreamEnd(this, m_root); } // FireOnDocumentEnd(); return; } string name = null; if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_NO_ATTS)) { name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); } else { name = utf.GetString(buf, offset + m_enc.MinBytesPerChar * 2, ct.NameEnd - offset - m_enc.MinBytesPerChar * 2); } // if (current.Name != name) // throw new Exception("Invalid end tag: " + name + // " != " + current.Name); Element parent = (Element)current.Parent; if (parent == null) { DoRaiseOnStreamElement(current); //if (OnStreamElement!=null) // OnStreamElement(this, current); //FireOnElement(current); } current = parent; }
private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { this.m_Depth--; this.m_ns.PopScope(); if (this.current == null) { // end of doc OnStreamEnd?.Invoke(this, this.m_root); // FireOnDocumentEnd(); return; } string name = null; if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_NO_ATTS)) { name = utf.GetString(buf, offset + this.m_enc.MinBytesPerChar, ct.NameEnd - offset - this.m_enc.MinBytesPerChar); } else { name = utf.GetString(buf, offset + this.m_enc.MinBytesPerChar * 2, ct.NameEnd - offset - this.m_enc.MinBytesPerChar * 2); } // if (current.Name != name) // throw new Exception("Invalid end tag: " + name + // " != " + current.Name); var parent = (Element)this.current.Parent; if (parent == null) { this.DoRaiseOnStreamElement(this.current); //if (OnStreamElement!=null) // OnStreamElement(this, current); //FireOnElement(current); } this.current = parent; }
private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_ns.PopScope(); if (m_elem == null) {// end of doc FireOnDocumentEnd(); return; } string name = null; if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_NO_ATTS)) { name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); } else { name = utf.GetString(buf, offset + m_enc.MinBytesPerChar * 2, ct.NameEnd - offset - m_enc.MinBytesPerChar * 2); } if (m_elem.Name != name) { throw new XmlException("Invalid end tag: " + name + " != " + m_elem.Name); } XmlElement parent = (XmlElement)m_elem.ParentNode; if (parent == null) { FireOnElement(m_elem); } m_elem = parent; }
internal LogLevelMatchExpression(ContentToken token) { this.Levels = LogLevels.None; var content = token.Content; foreach (char c in content.Substring(ExpressionHeader.Length).Trim('"')) { LogLevels level; if (!LogLevelMap.TryGetValue(c, out level)) { throw new ParsingException($"{content} is not of '{ExpressionHeader}{nameof(LogLevels)}' format", token.Index); } else { this.Levels |= level; } } }
internal static int[] ParseIntArray(string header, ContentToken token) { string[] arrayContent = token.Content.Substring(header.Length).Split(','); int[] res = arrayContent.Select(c => { int i; if (!int.TryParse(c, out i)) { throw new ParsingException($"{token.Content} is not of '{header}int1,int2,...' format", token.Index); } else { return(i); } }) .ToArray(); return(res); }
internal static ContentMatchExpression CreateContentMatchExpression(ContentToken token) { string header = token.Content.Substring(0, token.Content.IndexOf(':') + 1); if (string.IsNullOrWhiteSpace(header)) { return(new TextMatchExpression(token)); } else { Func <ContentToken, ContentMatchExpression> ctor; if (Ctors.TryGetValue(header, out ctor)) { return(ctor(token)); } else { return(new TextMatchExpression(token)); } } }
private void EndTag(byte[] buf, int offset, ContentToken ct, Tokens tok) { // TODO we don't validate Xml right now // could check here if end tag name equals the start tag name depth--; nsStack.Pop(); if (current == null) { OnStreamEnd?.Invoke(); return; } var parent = current.Parent as XmppXElement; if (parent == null) { OnStreamElement?.Invoke(current); } current = parent; }
public static string GetCache(string key) { // Get token from cache first if (_tokenCache.InnerCache.ContainsKey(key)) { return(_tokenCache.GetString(key)); } // CACHE MISS: load token from Sitecore (this will degrade page rendering performance). // This will almost never happen since the token source list must be loaded from cache in the first place (the local 'key' variable). IEnumerable <Item> libraries = TokenService.GetAllTokenLibraries(); IEnumerable <ContentToken> tokens = TokenService.GetTokensFromLibraries(libraries); ContentToken token = tokens.FirstOrDefault(u => u.Pattern.Equals(key, StringComparison.Ordinal)); if (token != null) { return(token.Output); } // No token found: holy crap, someone screwed up Logger.Warn($"Fatal content token error: no token found in cache or the database for '{token.Pattern}'", typeof(CATSTokenCacheManager)); return(string.Empty); }
/** * Scans the first token of a byte subarrary that contains content. * Returns one of the following integers according to the type of token * that the subarray starts with: * <ul> * <li><code>TOK.START_TAG_NO_ATTS</code></li> * <li><code>TOK.START_TAG_WITH_ATTS</code></li> * <li><code>TOK.EMPTY_ELEMENT_NO_ATTS</code></li> * <li><code>TOK.EMPTY_ELEMENT_WITH_ATTS</code></li> * <li><code>TOK.END_TAG</code></li> * <li><code>TOK.DATA_CHARS</code></li> * <li><code>TOK.DATA_NEWLINE</code></li> * <li><code>TOK.CDATA_SECT_OPEN</code></li> * <li><code>TOK.ENTITY_REF</code></li> * <li><code>TOK.MAGIC_ENTITY_REF</code></li> * <li><code>TOK.CHAR_REF</code></li> * <li><code>TOK.CHAR_PAIR_REF</code></li> * <li><code>TOK.PI</code></li> * <li><code>TOK.XML_DECL</code></li> * <li><code>TOK.COMMENT</code></li> * </ul> * <p> * Information about the token is stored in <code>token</code>. * </p> * When <code>TOK.CDATA_SECT_OPEN</code> is returned, * <code>tokenizeCdataSection</code> should be called until * it returns <code>TOK.CDATA_SECT</code>. * * @exception EmptyTokenException if the subarray is empty * @exception PartialTokenException if the subarray contains only part of * a legal token * @exception InvalidTokenException if the subarrary does not start * with a legal token or part of one * @exception ExtensibleTokenException if the subarray encodes just a carriage * return ('\r') * * @see #TOK.START_TAG_NO_ATTS * @see #TOK.START_TAG_WITH_ATTS * @see #TOK.EMPTY_ELEMENT_NO_ATTS * @see #TOK.EMPTY_ELEMENT_WITH_ATTS * @see #TOK.END_TAG * @see #TOK.DATA_CHARS * @see #TOK.DATA_NEWLINE * @see #TOK.CDATA_SECT_OPEN * @see #TOK.ENTITY_REF * @see #TOK.MAGIC_ENTITY_REF * @see #TOK.CHAR_REF * @see #TOK.CHAR_PAIR_REF * @see #TOK.PI * @see #TOK.XML_DECL * @see #TOK.COMMENT * @see ContentToken * @see EmptyTokenException * @see PartialTokenException * @see InvalidTokenException * @see ExtensibleTokenException * @see #tokenizeCdataSection */ public TOK tokenizeContent(byte[] buf, int off, int end, ContentToken token) { if (minBPC > 1) end = adjustEnd(off, end); if (off == end) throw new EmptyTokenException(); switch (byteType(buf, off)) { case BT_LT: return scanLt(buf, off + minBPC, end, token); case BT_AMP: return scanRef(buf, off + minBPC, end, token); case BT_CR: off += minBPC; if (off == end) throw new ExtensibleTokenException(TOK.DATA_NEWLINE); if (byteType(buf, off) == BT_LF) off += minBPC; token.TokenEnd = off; return TOK.DATA_NEWLINE; case BT_LF: token.TokenEnd = off + minBPC; return TOK.DATA_NEWLINE; case BT_RSQB: off += minBPC; if (off == end) throw new ExtensibleTokenException(TOK.DATA_CHARS); if (!charMatches(buf, off, ']')) break; off += minBPC; if (off == end) throw new ExtensibleTokenException(TOK.DATA_CHARS); if (!charMatches(buf, off, '>')) { off -= minBPC; break; } throw new InvalidTokenException(off); case BT_NONXML: case BT_MALFORM: throw new InvalidTokenException(off); case BT_LEAD2: if (end - off < 2) throw new PartialCharException(off); check2(buf, off); off += 2; break; case BT_LEAD3: if (end - off < 3) throw new PartialCharException(off); check3(buf, off); off += 3; break; case BT_LEAD4: if (end - off < 4) throw new PartialCharException(off); check4(buf, off); off += 4; break; default: off += minBPC; break; } token.TokenEnd = extendData(buf, off, end); return TOK.DATA_CHARS; }
internal BeginIdxExpression(ContentToken token) { this.BeginIdxContent = ContentParsingModule.ParseInt(ExpressionHeader, token); }
/* off points to character following first character of attribute name */ private TOK scanAtts(int nameStart, byte[] buf, int off, int end, ContentToken token) { int NameEnd = -1; while (off != end) { switch (byteType(buf, off)) { case BT_NMSTRT: case BT_NAME: case BT_MINUS: off += minBPC; break; case BT_LEAD2: if (end - off < 2) throw new PartialCharException(off); if (!isNameChar2(buf, off)) throw new InvalidTokenException(off); off += 2; break; case BT_LEAD3: if (end - off < 3) throw new PartialCharException(off); if (!isNameChar3(buf, off)) throw new InvalidTokenException(off); off += 3; break; case BT_LEAD4: if (end - off < 4) throw new PartialCharException(off); if (!isNameChar4(buf, off)) throw new InvalidTokenException(off); off += 4; break; case BT_S: case BT_CR: case BT_LF: NameEnd = off; for (;;) { off += minBPC; if (off == end) throw new PartialTokenException(); switch (byteType(buf, off)) { case BT_EQUALS: goto loop; case BT_S: case BT_LF: case BT_CR: break; default: throw new InvalidTokenException(off); } } loop: ; /* fall through */ goto case BT_EQUALS; case BT_EQUALS: { if (NameEnd < 0) NameEnd = off; int open; for (;;) { off += minBPC; if (off == end) throw new PartialTokenException(); open = byteType(buf, off); if (open == BT_QUOT || open == BT_APOS) break; switch (open) { case BT_S: case BT_LF: case BT_CR: break; default: throw new InvalidTokenException(off); } } off += minBPC; int valueStart = off; bool normalized = true; int t; /* in attribute value */ for (;;) { if (off == end) throw new PartialTokenException(); t = byteType(buf, off); if (t == open) break; switch (t) { case BT_NONXML: case BT_MALFORM: throw new InvalidTokenException(off); case BT_LEAD2: if (end - off < 2) throw new PartialCharException(off); check2(buf, off); off += 2; break; case BT_LEAD3: if (end - off < 3) throw new PartialCharException(off); check3(buf, off); off += 3; break; case BT_LEAD4: if (end - off < 4) throw new PartialCharException(off); check4(buf, off); off += 4; break; case BT_AMP: { normalized = false; int saveNameEnd = token.NameEnd; scanRef(buf, off + minBPC, end, token); token.NameEnd = saveNameEnd; off = token.TokenEnd; break; } case BT_S: if (normalized && (off == valueStart || byteToAscii(buf, off) != ' ' || (off + minBPC != end && (byteToAscii(buf, off + minBPC) == ' ' || byteType(buf, off + minBPC) == open)))) normalized = false; off += minBPC; break; case BT_LT: throw new InvalidTokenException(off); case BT_LF: case BT_CR: normalized = false; /* fall through */ goto default; default: off += minBPC; break; } } token.appendAttribute(nameStart, NameEnd, valueStart, off, normalized); off += minBPC; if (off == end) throw new PartialTokenException(); t = byteType(buf, off); switch (t) { case BT_S: case BT_CR: case BT_LF: off += minBPC; if (off == end) throw new PartialTokenException(); t = byteType(buf, off); break; case BT_GT: case BT_SOL: break; default: throw new InvalidTokenException(off); } /* off points to closing quote */ for (;;) { switch (t) { case BT_NMSTRT: nameStart = off; off += minBPC; goto skipToName; case BT_LEAD2: if (end - off < 2) throw new PartialCharException(off); if (byteType2(buf, off) != BT_NMSTRT) throw new InvalidTokenException(off); nameStart = off; off += 2; goto skipToName; case BT_LEAD3: if (end - off < 3) throw new PartialCharException(off); if (byteType3(buf, off) != BT_NMSTRT) throw new InvalidTokenException(off); nameStart = off; off += 3; goto skipToName; case BT_LEAD4: if (end - off < 4) throw new PartialCharException(off); if (byteType4(buf, off) != BT_NMSTRT) throw new InvalidTokenException(off); nameStart = off; off += 4; goto skipToName; case BT_S: case BT_CR: case BT_LF: break; case BT_GT: token.checkAttributeUniqueness(buf); token.TokenEnd = off + minBPC; return TOK.START_TAG_WITH_ATTS; case BT_SOL: off += minBPC; if (off == end) throw new PartialTokenException(); checkCharMatches(buf, off, '>'); token.checkAttributeUniqueness(buf); token.TokenEnd = off + minBPC; return TOK.EMPTY_ELEMENT_WITH_ATTS; default: throw new InvalidTokenException(off); } off += minBPC; if (off == end) throw new PartialTokenException(); t = byteType(buf, off); } skipToName: NameEnd = -1; break; } default: throw new InvalidTokenException(off); } } throw new PartialTokenException(); }
/* off points to character following "<" */ private TOK scanLt(byte[] buf, int off, int end, ContentToken token) { if (off == end) throw new PartialTokenException(); switch (byteType(buf, off)) { case BT_NMSTRT: off += minBPC; break; case BT_LEAD2: if (end - off < 2) throw new PartialCharException(off); if (byteType2(buf, off) != BT_NMSTRT) throw new InvalidTokenException(off); off += 2; break; case BT_LEAD3: if (end - off < 3) throw new PartialCharException(off); if (byteType3(buf, off) != BT_NMSTRT) throw new InvalidTokenException(off); off += 3; break; case BT_LEAD4: if (end - off < 4) throw new PartialCharException(off); if (byteType4(buf, off) != BT_NMSTRT) throw new InvalidTokenException(off); off += 4; break; case BT_EXCL: if ((off += minBPC) == end) throw new PartialTokenException(); switch (byteType(buf, off)) { case BT_MINUS: return scanComment(buf, off + minBPC, end, token); case BT_LSQB: return scanCdataSection(buf, off + minBPC, end, token); } throw new InvalidTokenException(off); case BT_QUEST: return scanPi(buf, off + minBPC, end, token); case BT_SOL: return scanEndTag(buf, off + minBPC, end, token); default: throw new InvalidTokenException(off); } /* we have a start-tag */ token.NameEnd = -1; token.clearAttributes(); while (off != end) { switch (byteType(buf, off)) { case BT_NMSTRT: case BT_NAME: case BT_MINUS: off += minBPC; break; case BT_LEAD2: if (end - off < 2) throw new PartialCharException(off); if (!isNameChar2(buf, off)) throw new InvalidTokenException(off); off += 2; break; case BT_LEAD3: if (end - off < 3) throw new PartialCharException(off); if (!isNameChar3(buf, off)) throw new InvalidTokenException(off); off += 3; break; case BT_LEAD4: if (end - off < 4) throw new PartialCharException(off); if (!isNameChar4(buf, off)) throw new InvalidTokenException(off); off += 4; break; case BT_S: case BT_CR: case BT_LF: token.NameEnd = off; off += minBPC; for (;;) { if (off == end) throw new PartialTokenException(); switch (byteType(buf, off)) { case BT_NMSTRT: return scanAtts(off, buf, off + minBPC, end, token); case BT_LEAD2: if (end - off < 2) throw new PartialCharException(off); if (byteType2(buf, off) != BT_NMSTRT) throw new InvalidTokenException(off); return scanAtts(off, buf, off + 2, end, token); case BT_LEAD3: if (end - off < 3) throw new PartialCharException(off); if (byteType3(buf, off) != BT_NMSTRT) throw new InvalidTokenException(off); return scanAtts(off, buf, off + 3, end, token); case BT_LEAD4: if (end - off < 4) throw new PartialCharException(off); if (byteType4(buf, off) != BT_NMSTRT) throw new InvalidTokenException(off); return scanAtts(off, buf, off + 4, end, token); case BT_GT: case BT_SOL: goto loop; case BT_S: case BT_CR: case BT_LF: off += minBPC; break; default: throw new InvalidTokenException(off); } } loop: break; case BT_GT: if (token.NameEnd < 0) token.NameEnd = off; token.TokenEnd = off + minBPC; return TOK.START_TAG_NO_ATTS; case BT_SOL: if (token.NameEnd < 0) token.NameEnd = off; off += minBPC; if (off == end) throw new PartialTokenException(); checkCharMatches(buf, off, '>'); token.TokenEnd = off + minBPC; return TOK.EMPTY_ELEMENT_NO_ATTS; default: throw new InvalidTokenException(off); } } throw new PartialTokenException(); }
private string NormalizeAttributeValue(byte[] buf, int offset, int length) { if (length == 0) { return(string.Empty); } string val = null; var buffer = new ByteBuffer(); var copy = new byte[length]; Buffer.BlockCopy(buf, offset, copy, 0, length); buffer.Write(copy); byte[] b = buffer.GetBuffer(); int off = 0; var ct = new ContentToken(); try { while (off < b.Length) { //tok = m_enc.tokenizeContent(b, off, b.Length, ct); Tokens tok = utf8Encoding.TokenizeAttributeValue(b, off, b.Length, ct); switch (tok) { case Tokens.PartialToken: case Tokens.PartialChar: case Tokens.ExtensibleToken: return(null); case Tokens.AttributeValueS: case Tokens.DataChars: case Tokens.DataNewline: val += (utf.GetString(b, off, ct.TokenEnd - off)); break; case Tokens.CharReference: case Tokens.MagicEntityReference: val += new string(new[] { ct.RefChar1 }); break; case Tokens.CharPairReference: val += new string(new[] { ct.RefChar1, ct.RefChar2 }); break; case Tokens.EntityReference: throw new NotImplementedException("Token type not implemented: " + tok); } off = ct.TokenEnd; } } catch (Exception ex) { OnStreamError?.Invoke(ex); } finally { buffer.RemoveFirst(off); } return(val); }
private void StartTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_Depth++; int colon; string name; string prefix; Hashtable ht = new Hashtable(); m_ns.PushScope(); // if i have attributes if ((tok == TOK.START_TAG_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_WITH_ATTS)) { int start; int end; string val; for (int i=0; i<ct.getAttributeSpecifiedCount(); i++) { start = ct.getAttributeNameStart(i); end = ct.getAttributeNameEnd(i); name = utf.GetString(buf, start, end - start); start = ct.getAttributeValueStart(i); end = ct.getAttributeValueEnd(i); //val = utf.GetString(buf, start, end - start); val = NormalizeAttributeValue(buf, start, end - start); // <foo b='&'/> // <foo b='&amp;' // TODO: if val includes &, it gets double-escaped if (name.StartsWith("xmlns:")) { colon = name.IndexOf(':'); prefix = name.Substring(colon+1); m_ns.AddNamespace(prefix, val); } else if (name == "xmlns") { m_ns.AddNamespace(string.Empty, val); } else { ht.Add(name, val); } } } name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); colon = name.IndexOf(':'); string ns = ""; prefix = null; if (colon > 0) { prefix = name.Substring(0, colon); name = name.Substring(colon + 1); ns = m_ns.LookupNamespace(prefix); } else { ns = m_ns.DefaultNamespace; } Element newel = ElementFactory.GetElement(prefix, name, ns); foreach (string attrname in ht.Keys) { newel.SetAttribute(attrname, (string)ht[attrname]); } if (m_root == null) { m_root = newel; //FireOnDocumentStart(m_root); if (OnStreamStart!=null) OnStreamStart(this, m_root); } else { if (current != null) current.AddChild(newel); current = newel; } }
/// <summary> /// Write bytes into the parser. /// </summary> /// <param name="buf">The bytes to put into the parse stream</param> /// <param name="offset">Offset into buf to start at</param> /// <param name="length">Number of bytes to write</param> /// <exception cref="System.NotImplementedException">Token type not implemented: + tok</exception> public void Write(byte[] buf, int offset, int length) { // or assert, really, but this is a little nicer. if (length == 0) { return; } // No locking is required. Read() won't get called again // until this method returns. // TODO: only do this copy if we have a partial token at the // end of parsing. var copy = new byte[length]; Buffer.BlockCopy(buf, offset, copy, 0, length); bufferAggregate.Write(copy); byte[] b = bufferAggregate.GetBuffer(); int off = 0; var ct = new ContentToken(); try { while (off < b.Length) { Tokens tok; if (isCData) { tok = utf8Encoding.TokenizeCdataSection(b, off, b.Length, ct); } else { tok = utf8Encoding.TokenizeContent(b, off, b.Length, ct); } switch (tok) { case Tokens.PartialToken: case Tokens.PartialChar: case Tokens.ExtensibleToken: return; case Tokens.EmptyElementNoAtts: case Tokens.EmptyElementWithAtts: StartTag(b, off, ct, tok); EndTag(b, off, ct, tok); break; case Tokens.StartTagNoAtts: case Tokens.StartTagWithAtts: StartTag(b, off, ct, tok); break; case Tokens.EndTag: EndTag(b, off, ct, tok); break; case Tokens.DataChars: case Tokens.DataNewline: AddText(utf.GetString(b, off, ct.TokenEnd - off)); break; case Tokens.CharReference: case Tokens.MagicEntityReference: AddText(new string(new[] { ct.RefChar1 })); break; case Tokens.CharPairReference: AddText(new string(new[] { ct.RefChar1, ct.RefChar2 })); break; case Tokens.Comment: if (current != null) { // <!-- 4 // --> 3 int start = off + 4 * utf8Encoding.MinBytesPerChar; int end = ct.TokenEnd - off - 7 * utf8Encoding.MinBytesPerChar; string text = utf.GetString(b, start, end); current.Add(text); } break; case Tokens.CdataSectOpen: isCData = true; break; case Tokens.CdataSectClose: CloseCDataSection(); isCData = false; break; case Tokens.XmlDeclaration: // thou shalt use UTF8, and XML version 1. // i shall ignore evidence to the contrary... // TODO: Throw an exception if these assuptions are // wrong break; case Tokens.EntityReference: case Tokens.ProcessingInstruction: throw new NotImplementedException("Token type not implemented: " + tok); } off = ct.TokenEnd; } } catch (Exception ex) { OnStreamError?.Invoke(ex); } finally { bufferAggregate.RemoveFirst(off); } }
/// <summary> /// Put bytes into the parser. /// </summary> /// <param name="buf"> The bytes to put into the parse stream </param> /// <param name="offset"> Offset into buf to start at </param> /// <param name="length"> Number of bytes to write </param> public void Push(byte[] buf, int offset, int length) { // or assert, really, but this is a little nicer. if (length == 0) { return; } // No locking is required. Read() won't get called again // until this method returns. // TODO: only do this copy if we have a partial token at the // end of parsing. var copy = new byte[length]; Buffer.BlockCopy(buf, offset, copy, 0, length); m_buf.Write(copy); byte[] b = m_buf.GetBuffer(); int off = 0; TOK tok = TOK.END_TAG; var ct = new ContentToken(); try { while (off < b.Length) { if (m_cdata) { tok = m_enc.tokenizeCdataSection(b, off, b.Length, ct); } else { tok = m_enc.tokenizeContent(b, off, b.Length, ct); } switch (tok) { case TOK.EMPTY_ELEMENT_NO_ATTS: case TOK.EMPTY_ELEMENT_WITH_ATTS: StartTag(b, off, ct, tok); EndTag(b, off, ct, tok); break; case TOK.START_TAG_NO_ATTS: case TOK.START_TAG_WITH_ATTS: StartTag(b, off, ct, tok); break; case TOK.END_TAG: EndTag(b, off, ct, tok); break; case TOK.DATA_CHARS: case TOK.DATA_NEWLINE: AddText(utf.GetString(b, off, ct.TokenEnd - off)); break; case TOK.CHAR_REF: case TOK.MAGIC_ENTITY_REF: AddText(new string(new[] { ct.RefChar1 })); break; case TOK.CHAR_PAIR_REF: AddText(new string(new[] { ct.RefChar1, ct.RefChar2 })); break; case TOK.COMMENT: if (current != null) { // <!-- 4 // --> 3 int start = off + 4 * m_enc.MinBytesPerChar; int end = ct.TokenEnd - off - 7 * m_enc.MinBytesPerChar; string text = utf.GetString(b, start, end); current.AddChild(new Comment(text)); } break; case TOK.CDATA_SECT_OPEN: m_cdata = true; break; case TOK.CDATA_SECT_CLOSE: m_cdata = false; break; case TOK.XML_DECL: // thou shalt use UTF8, and XML version 1. // i shall ignore evidence to the contrary... // TODO: Throw an exception if these assuptions are // wrong break; case TOK.ENTITY_REF: case TOK.PI: #if CF throw new util.NotImplementedException("Token type not implemented: " + tok); #else throw new NotImplementedException("Token type not implemented: " + tok); #endif } off = ct.TokenEnd; } } catch (PartialTokenException) { // ignored; } catch (ExtensibleTokenException) { // ignored; } catch (Exception ex) { if (OnStreamError != null) { OnStreamError(this, ex); } } finally { m_buf.Clear(off); } }
internal DatetimeBeginMatchExpression(ContentToken token) { this.DatetimeBeginContent = ContentParsingModule.ParseDateTime(ExpressionHeader, token); }
internal TextMatchExpression(ContentToken token) : this(token.Content) { }
public void OnEncodingCr_EncodeCrLf() { var token = new ContentToken('\n'.ToByte()); Assert.AreEqual(token.Encode(), new[] { '\r'.ToByte(), '\n'.ToByte() }); }
internal ProcessIdMatchExpression(ContentToken token) { this.ProcessIdsContent = ContentParsingModule.ParseIntArray(ExpressionHeader, token); }
internal RegexMatchExpression(ContentToken token) : this(token.Content) { }
/// <summary> /// </summary> /// <param name="buf"> </param> /// <param name="offset"> </param> /// <param name="ct"> </param> /// <param name="tok"> </param> private void StartTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_Depth++; int colon; string name; string prefix; var ht = new Hashtable(); m_ns.PushScope(); // if i have attributes if ((tok == TOK.START_TAG_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_WITH_ATTS)) { int start; int end; string val; for (int i = 0; i < ct.getAttributeSpecifiedCount(); i++) { start = ct.getAttributeNameStart(i); end = ct.getAttributeNameEnd(i); name = utf.GetString(buf, start, end - start); start = ct.getAttributeValueStart(i); end = ct.getAttributeValueEnd(i); // val = utf.GetString(buf, start, end - start); val = NormalizeAttributeValue(buf, start, end - start); // <foo b='&'/> // <foo b='&amp;' // TODO: if val includes &, it gets double-escaped if (name.StartsWith("xmlns:")) { colon = name.IndexOf(':'); prefix = name.Substring(colon + 1); m_ns.AddNamespace(prefix, val); } else if (name == "xmlns") { m_ns.AddNamespace(string.Empty, val); } else { ht.Add(name, val); } } } name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); colon = name.IndexOf(':'); string ns = string.Empty; prefix = null; if (colon > 0) { prefix = name.Substring(0, colon); name = name.Substring(colon + 1); ns = m_ns.LookupNamespace(prefix); } else { ns = m_ns.DefaultNamespace; } Element newel = ElementFactory.GetElement(prefix, name, ns); foreach (string attrname in ht.Keys) { newel.SetAttribute(attrname, (string)ht[attrname]); } if (m_root == null) { m_root = newel; // FireOnDocumentStart(m_root); if (OnStreamStart != null) { OnStreamStart(this, m_root, m_ns.DefaultNamespace ?? ""); } } else { if (current != null) { current.AddChild(newel); } current = newel; } }
private string NormalizeAttributeValue(byte[] buf, int offset, int length) { if (length == 0) return null; string val = null; BufferAggregate buffer = new BufferAggregate(); byte[] copy = new byte[length]; System.Buffer.BlockCopy(buf, offset, copy, 0, length); buffer.Write(copy); byte[] b = buffer.GetBuffer(); int off = 0; TOK tok = TOK.END_TAG; ContentToken ct = new ContentToken(); try { while (off < b.Length) { //tok = m_enc.tokenizeContent(b, off, b.Length, ct); tok = m_enc.tokenizeAttributeValue(b, off, b.Length, ct); switch (tok) { case TOK.ATTRIBUTE_VALUE_S: case TOK.DATA_CHARS: case TOK.DATA_NEWLINE: val += (utf.GetString(b, off, ct.TokenEnd - off)); break; case TOK.CHAR_REF: case TOK.MAGIC_ENTITY_REF: val += new string(new char[] { ct.RefChar1 }); break; case TOK.CHAR_PAIR_REF: val += new string(new char[] {ct.RefChar1, ct.RefChar2}); break; case TOK.ENTITY_REF: #if CF throw new util.NotImplementedException("Token type not implemented: " + tok); #else throw new System.NotImplementedException("Token type not implemented: " + tok); #endif } off = ct.TokenEnd; } } catch (PartialTokenException) { // ignored; } catch (ExtensibleTokenException) { // ignored; } catch (Exception ex) { if (OnStreamError != null) OnStreamError(this, ex); } finally { buffer.Clear(off); } return val; }
/// <summary> /// </summary> /// <param name="buf"> </param> /// <param name="offset"> </param> /// <param name="length"> </param> /// <returns> </returns> /// <exception cref="NotImplementedException"></exception> private string NormalizeAttributeValue(byte[] buf, int offset, int length) { if (length == 0) { return(null); } string val = null; var buffer = new BufferAggregate(); var copy = new byte[length]; Buffer.BlockCopy(buf, offset, copy, 0, length); buffer.Write(copy); byte[] b = buffer.GetBuffer(); int off = 0; TOK tok = TOK.END_TAG; var ct = new ContentToken(); try { while (off < b.Length) { // tok = m_enc.tokenizeContent(b, off, b.Length, ct); tok = m_enc.tokenizeAttributeValue(b, off, b.Length, ct); switch (tok) { case TOK.ATTRIBUTE_VALUE_S: case TOK.DATA_CHARS: case TOK.DATA_NEWLINE: val += utf.GetString(b, off, ct.TokenEnd - off); break; case TOK.CHAR_REF: case TOK.MAGIC_ENTITY_REF: val += new string(new[] { ct.RefChar1 }); break; case TOK.CHAR_PAIR_REF: val += new string(new[] { ct.RefChar1, ct.RefChar2 }); break; case TOK.ENTITY_REF: #if CF throw new util.NotImplementedException("Token type not implemented: " + tok); #else throw new NotImplementedException("Token type not implemented: " + tok); #endif } off = ct.TokenEnd; } } catch (PartialTokenException) { // ignored; } catch (ExtensibleTokenException) { // ignored; } catch (Exception ex) { if (OnStreamError != null) { OnStreamError(this, ex); } } finally { buffer.Clear(off); } return(val); }
private void StartTag(byte[] buf, int offset, ContentToken ct, Tokens tok) { depth++; int colon; string name; string prefix; var attributes = new Dictionary <string, string>(); nsStack.Push(); // if i have attributes if ((tok == Tokens.StartTagWithAtts) || (tok == Tokens.EmptyElementWithAtts)) { int start; int end; string val; for (int i = 0; i < ct.GetAttributeSpecifiedCount(); i++) { start = ct.GetAttributeNameStart(i); end = ct.GetAttributeNameEnd(i); name = utf.GetString(buf, start, end - start); start = ct.GetAttributeValueStart(i); end = ct.GetAttributeValueEnd(i); //val = _utf.GetString(buf, start, end - start); val = NormalizeAttributeValue(buf, start, end - start); // <foo b='&'/> // <foo b='&amp;' // TODO: if val includes &, it gets double-escaped if (name.StartsWith("xmlns:")) { // prefixed namespace declaration colon = name.IndexOf(':'); prefix = name.Substring(colon + 1); nsStack.AddNamespace(prefix, val); attributes.Add(name, val); } else if (name == "xmlns") { // namespace declaration nsStack.AddNamespace(string.Empty, val); attributes.Add(name, val); } else { // normal attribute attributes.Add(name, val); } } } name = utf.GetString(buf, offset + utf8Encoding.MinBytesPerChar, ct.NameEnd - offset - utf8Encoding.MinBytesPerChar); colon = name.IndexOf(':'); string ns; prefix = null; if (colon > 0) { prefix = name.Substring(0, colon); name = name.Substring(colon + 1); ns = nsStack.LookupNamespace(prefix); } else { ns = nsStack.DefaultNamespace; } XmppXElement newel = Factory.GetElement(prefix, name, ns); foreach (string attrname in attributes.Keys) { colon = attrname.IndexOf(':'); if (colon > 0) { prefix = attrname.Substring(0, colon); name = attrname.Substring(colon + 1); ns = nsStack.LookupNamespace(prefix); if (attrname.StartsWith("xmlns:")) { // Namespace Declaration newel.SetAttributeValue(XName.Get(name, ns), attributes[attrname]); } else { // prefixed attribute newel.SetAttributeValue("{" + ns + "}" + name, attributes[attrname]); } } else { newel.SetAttributeValue(XName.Get(attrname, string.Empty), attributes[attrname]); } } if (root == null) { root = newel; OnStreamStart?.Invoke(root); } else { current?.Add(newel); current = newel; } }
private void StartTag(byte[] buf, int offset, ContentToken ct, TOK tok) { int colon; string name; string prefix; Hashtable ht = new Hashtable(); m_ns.PushScope(); // if i have attributes if ((tok == TOK.START_TAG_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_WITH_ATTS)) { int start; int end; string val; for (int i = 0; i < ct.getAttributeSpecifiedCount(); i++) { start = ct.getAttributeNameStart(i); end = ct.getAttributeNameEnd(i); name = utf.GetString(buf, start, end - start); start = ct.getAttributeValueStart(i); end = ct.getAttributeValueEnd(i); val = utf.GetString(buf, start, end - start); // <foo b='&'/> // <foo b='&amp;' // TODO: if val includes &, it gets double-escaped if (name.StartsWith("xmlns:")) { colon = name.IndexOf(':'); prefix = name.Substring(colon + 1); m_ns.AddNamespace(prefix, val); } else if (name == "xmlns") { m_ns.AddNamespace(string.Empty, val); } ht.Add(name, val); } } name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); colon = name.IndexOf(':'); string ns = ""; prefix = ""; if (colon > 0) { prefix = name.Substring(0, colon); name = name.Substring(colon + 1); ns = m_ns.LookupNamespace(prefix); } else { ns = m_ns.DefaultNamespace; } XmlQualifiedName q = new XmlQualifiedName(name, ns); XmlElement elem = m_factory.GetElement(prefix, q, m_doc); foreach (string attrname in ht.Keys) { colon = attrname.IndexOf(':'); if (colon > 0) { prefix = attrname.Substring(0, colon); name = attrname.Substring(colon + 1); XmlAttribute attr = m_doc.CreateAttribute(prefix, name, m_ns.LookupNamespace(prefix)); attr.InnerXml = (string)ht[attrname]; elem.SetAttributeNode(attr); } else { XmlAttribute attr = m_doc.CreateAttribute(attrname); attr.InnerXml = (string)ht[attrname]; elem.SetAttributeNode(attr); } } if (m_root == null) { m_root = elem; FireOnDocumentStart(m_root); } else { if (m_elem != null) { m_elem.AppendChild(elem); } m_elem = elem; } }
/// <summary> /// Put bytes into the parser. /// </summary> /// <param name="buf">The bytes to put into the parse stream</param> /// <param name="offset">Offset into buf to start at</param> /// <param name="length">Number of bytes to write</param> public void Push(byte[] buf, int offset, int length) { // or assert, really, but this is a little nicer. if (length == 0) return; // No locking is required. Read() won't get called again // until this method returns. // TODO: only do this copy if we have a partial token at the // end of parsing. byte[] copy = new byte[length]; System.Buffer.BlockCopy(buf, offset, copy, 0, length); m_buf.Write(copy); byte[] b = m_buf.GetBuffer(); int off = 0; TOK tok = TOK.END_TAG; ContentToken ct = new ContentToken(); try { while (off < b.Length) { if (m_cdata) tok = m_enc.tokenizeCdataSection(b, off, b.Length, ct); else tok = m_enc.tokenizeContent(b, off, b.Length, ct); switch (tok) { case TOK.EMPTY_ELEMENT_NO_ATTS: case TOK.EMPTY_ELEMENT_WITH_ATTS: StartTag(b, off, ct, tok); EndTag(b, off, ct, tok); break; case TOK.START_TAG_NO_ATTS: case TOK.START_TAG_WITH_ATTS: StartTag(b, off, ct, tok); break; case TOK.END_TAG: EndTag(b, off, ct, tok); break; case TOK.DATA_CHARS: case TOK.DATA_NEWLINE: AddText(utf.GetString(b, off, ct.TokenEnd - off)); break; case TOK.CHAR_REF: case TOK.MAGIC_ENTITY_REF: AddText(new string(new char[] { ct.RefChar1 })); break; case TOK.CHAR_PAIR_REF: AddText(new string(new char[] {ct.RefChar1, ct.RefChar2})); break; case TOK.COMMENT: if (current != null) { // <!-- 4 // --> 3 int start = off + 4 * m_enc.MinBytesPerChar; int end = ct.TokenEnd - off - 7 * m_enc.MinBytesPerChar; string text = utf.GetString(b, start, end); current.AddChild(new Comment(text)); } break; case TOK.CDATA_SECT_OPEN: m_cdata = true; break; case TOK.CDATA_SECT_CLOSE: m_cdata = false; break; case TOK.XML_DECL: // thou shalt use UTF8, and XML version 1. // i shall ignore evidence to the contrary... // TODO: Throw an exception if these assuptions are // wrong break; case TOK.ENTITY_REF: case TOK.PI: #if CF throw new util.NotImplementedException("Token type not implemented: " + tok); #else throw new System.NotImplementedException("Token type not implemented: " + tok); #endif } off = ct.TokenEnd; } } catch (PartialTokenException) { // ignored; } catch (ExtensibleTokenException) { // ignored; } catch (Exception ex) { if (OnStreamError != null) OnStreamError(this, ex); } finally { m_buf.Clear(off); } }
/// <summary> /// Put bytes into the parser. /// </summary> /// <param name="buf">The bytes to put into the parse stream</param> /// <param name="offset">Offset into buf to start at</param> /// <param name="length">Number of bytes to write</param> public void Push(byte[] buf, int offset, int length) { // or assert, really, but this is a little nicer. if (length == 0) { return; } // No locking is required. Read() won't get called again // until this method returns. Keep in mind that we're // already on a thread in a ThreadPool, which is created // and managed by System.IO at the end of the day. // TODO: only do this copy if we have a partial token at the // end of parsing. byte[] copy = new byte[length]; System.Buffer.BlockCopy(buf, offset, copy, 0, length); m_buf.Write(copy); byte[] b = m_buf.GetBuffer(); int off = 0; TOK tok = TOK.END_TAG; ContentToken ct = new ContentToken(); try { while (off < b.Length) { if (m_cdata) { tok = m_enc.tokenizeCdataSection(b, off, b.Length, ct); } else { tok = m_enc.tokenizeContent(b, off, b.Length, ct); } switch (tok) { case TOK.EMPTY_ELEMENT_NO_ATTS: case TOK.EMPTY_ELEMENT_WITH_ATTS: StartTag(b, off, ct, tok); EndTag(b, off, ct, tok); break; case TOK.START_TAG_NO_ATTS: case TOK.START_TAG_WITH_ATTS: StartTag(b, off, ct, tok); break; case TOK.END_TAG: EndTag(b, off, ct, tok); break; case TOK.DATA_CHARS: case TOK.DATA_NEWLINE: AddText(utf.GetString(b, off, ct.TokenEnd - off)); break; case TOK.CHAR_REF: case TOK.MAGIC_ENTITY_REF: AddText(new string(new char[] { ct.RefChar1 })); break; case TOK.CHAR_PAIR_REF: AddText(new string(new char[] { ct.RefChar1, ct.RefChar2 })); break; case TOK.COMMENT: if (m_elem != null) { // <!-- 4 // --> 3 int start = off + 4 * m_enc.MinBytesPerChar; int end = ct.TokenEnd - off - 7 * m_enc.MinBytesPerChar; string text = utf.GetString(b, start, end); m_elem.AppendChild(m_doc.CreateComment(text)); } break; case TOK.CDATA_SECT_OPEN: m_cdata = true; break; case TOK.CDATA_SECT_CLOSE: m_cdata = false; break; case TOK.XML_DECL: // thou shalt use UTF8, and XML version 1. // i shall ignore evidence to the contrary... // TODO: Throw an exception if these assuptions are // wrong break; case TOK.ENTITY_REF: case TOK.PI: throw new System.NotImplementedException("Token type not implemented: " + tok); } off = ct.TokenEnd; ct.clearAttributes(); } } catch (PartialTokenException) { // Console.WriteLine("PartialTokenException: " + System.Text.Encoding.UTF8.GetString(copy)); // ignored; } catch (ExtensibleTokenException) { // ignored; } catch (XpNet.InvalidTokenException e) { throw new XMLParseException(e, this, buf, offset, length); } catch (Exception e) { throw new Exception("Unexpected exception", e); } finally { m_buf.Clear(off); ct.clearAttributes(); } }
private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_Depth--; m_ns.PopScope(); if (current == null) {// end of doc if (OnStreamEnd!=null) OnStreamEnd(this, m_root); // FireOnDocumentEnd(); return; } string name = null; if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_NO_ATTS)) name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); else name = utf.GetString(buf, offset + m_enc.MinBytesPerChar*2, ct.NameEnd - offset - m_enc.MinBytesPerChar*2); // if (current.Name != name) // throw new Exception("Invalid end tag: " + name + // " != " + current.Name); Element parent = (Element) current.Parent; if (parent == null) { DoRaiseOnStreamElement(current); //if (OnStreamElement!=null) // OnStreamElement(this, current); //FireOnElement(current); } current = parent; }