internal void AssertType(TOK ty) { if (_type != ty) { throw new LexingException(String.Empty, _beg, "expected type " + ty.ToString()); } }
public Token(TOK ty, string lexeme, Position beg, Position end) { _type = ty; _lexeme = lexeme; _beg = beg; _end = end; }
/// <summary> /// </summary> /// <param name="buf"> </param> /// <param name="offset"> </param> /// <param name="ct"> </param> /// <param name="tok"> </param> private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_Depth--; m_ns.PopScope(); if (current == null) { // end of doc if (OnStreamEnd != null) { OnStreamEnd(this, m_root); } // FireOnDocumentEnd(); return; } // if (current.Name != name) // throw new Exception("Invalid end tag: " + name + // " != " + current.Name); var parent = (Element)current.Parent; if (parent == null) { DoRaiseOnStreamElement(current); // if (OnStreamElement!=null) // OnStreamElement(this, current); // FireOnElement(current); } current = parent; }
static void Ensure(Token token, string lexeme, TOK tp, Position beg, Position end) { Assert.IsNotNull(token); Assert.AreEqual(lexeme, token.Lexeme()); Assert.AreEqual(tp, token.Type()); Assert.AreEqual(beg, token.Beg); Assert.AreEqual(end, token.End); }
private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_ns.PopScope(); if (m_elem == null) {// end of doc FireOnDocumentEnd(); return; } string name = null; if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_NO_ATTS)) { name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); } else { name = utf.GetString(buf, offset + m_enc.MinBytesPerChar * 2, ct.NameEnd - offset - m_enc.MinBytesPerChar * 2); } //workaround for iTeleport bug, that sends xmlns prefix //and exception is thrown in .NET Framework 2.0 //replace prefix for unsupported, which is then ignored //here end tag is workarounded to match replaced start and end tag if (name.StartsWith("xmlns")) { name = string.Format("unsupported:{0}", name.Substring("xmlns:".Length)); } if (m_elem.Name != name) { throw new XmlException("Invalid end tag: " + name + " != " + m_elem.Name); } XmlElement parent = (XmlElement)m_elem.ParentNode; if (parent == null) { FireOnElement(m_elem); } m_elem = parent; }
private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_Depth--; m_NamespaceStack.Pop(); if (current == null) { // end of doc if (OnStreamEnd != null) { OnStreamEnd(this, m_root); } // FireOnDocumentEnd(); return; } string name = null; if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_NO_ATTS)) { name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); } else { name = utf.GetString(buf, offset + m_enc.MinBytesPerChar * 2, ct.NameEnd - offset - m_enc.MinBytesPerChar * 2); } // if (current.Name != name) // throw new Exception("Invalid end tag: " + name + // " != " + current.Name); Element parent = (Element)current.Parent; if (parent == null) { DoRaiseOnStreamElement(current); //if (OnStreamElement!=null) // OnStreamElement(this, current); //FireOnElement(current); } current = parent; }
private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { this.m_Depth--; this.m_ns.PopScope(); if (this.current == null) { // end of doc OnStreamEnd?.Invoke(this, this.m_root); // FireOnDocumentEnd(); return; } string name = null; if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_NO_ATTS)) { name = utf.GetString(buf, offset + this.m_enc.MinBytesPerChar, ct.NameEnd - offset - this.m_enc.MinBytesPerChar); } else { name = utf.GetString(buf, offset + this.m_enc.MinBytesPerChar * 2, ct.NameEnd - offset - this.m_enc.MinBytesPerChar * 2); } // if (current.Name != name) // throw new Exception("Invalid end tag: " + name + // " != " + current.Name); var parent = (Element)this.current.Parent; if (parent == null) { this.DoRaiseOnStreamElement(this.current); //if (OnStreamElement!=null) // OnStreamElement(this, current); //FireOnElement(current); } this.current = parent; }
private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_ns.PopScope(); if (m_elem == null) {// end of doc FireOnDocumentEnd(); return; } string name = null; if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_NO_ATTS)) { name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); } else { name = utf.GetString(buf, offset + m_enc.MinBytesPerChar * 2, ct.NameEnd - offset - m_enc.MinBytesPerChar * 2); } if (m_elem.Name != name) { throw new XmlException("Invalid end tag: " + name + " != " + m_elem.Name); } XmlElement parent = (XmlElement)m_elem.ParentNode; if (parent == null) { FireOnElement(m_elem); } m_elem = parent; }
/// <summary> /// </summary> /// <param name="buf"> </param> /// <param name="offset"> </param> /// <param name="ct"> </param> /// <param name="tok"> </param> private void StartTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_Depth++; int colon; string name; string prefix; var ht = new Hashtable(); m_ns.PushScope(); // if i have attributes if ((tok == TOK.START_TAG_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_WITH_ATTS)) { int start; int end; string val; for (int i = 0; i < ct.getAttributeSpecifiedCount(); i++) { start = ct.getAttributeNameStart(i); end = ct.getAttributeNameEnd(i); name = utf.GetString(buf, start, end - start); start = ct.getAttributeValueStart(i); end = ct.getAttributeValueEnd(i); // val = utf.GetString(buf, start, end - start); val = NormalizeAttributeValue(buf, start, end - start); // <foo b='&'/> // <foo b='&amp;' // TODO: if val includes &, it gets double-escaped if (name.StartsWith("xmlns:")) { colon = name.IndexOf(':'); prefix = name.Substring(colon + 1); m_ns.AddNamespace(prefix, val); } else if (name == "xmlns") { m_ns.AddNamespace(string.Empty, val); } else { ht.Add(name, val); } } } name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); colon = name.IndexOf(':'); string ns = string.Empty; prefix = null; if (colon > 0) { prefix = name.Substring(0, colon); name = name.Substring(colon + 1); ns = m_ns.LookupNamespace(prefix); } else { ns = m_ns.DefaultNamespace; } Element newel = ElementFactory.GetElement(prefix, name, ns); foreach (string attrname in ht.Keys) { newel.SetAttribute(attrname, (string)ht[attrname]); } if (m_root == null) { m_root = newel; // FireOnDocumentStart(m_root); if (OnStreamStart != null) { OnStreamStart(this, m_root, m_ns.DefaultNamespace ?? ""); } } else { if (current != null) { current.AddChild(newel); } current = newel; } }
/// <summary> /// Put bytes into the parser. /// </summary> /// <param name="buf"> The bytes to put into the parse stream </param> /// <param name="offset"> Offset into buf to start at </param> /// <param name="length"> Number of bytes to write </param> public void Push(byte[] buf, int offset, int length) { // or assert, really, but this is a little nicer. if (length == 0) { return; } // No locking is required. Read() won't get called again // until this method returns. // TODO: only do this copy if we have a partial token at the // end of parsing. var copy = new byte[length]; Buffer.BlockCopy(buf, offset, copy, 0, length); m_buf.Write(copy); byte[] b = m_buf.GetBuffer(); int off = 0; TOK tok = TOK.END_TAG; var ct = new ContentToken(); try { while (off < b.Length) { if (m_cdata) { tok = m_enc.tokenizeCdataSection(b, off, b.Length, ct); } else { tok = m_enc.tokenizeContent(b, off, b.Length, ct); } switch (tok) { case TOK.EMPTY_ELEMENT_NO_ATTS: case TOK.EMPTY_ELEMENT_WITH_ATTS: StartTag(b, off, ct, tok); EndTag(b, off, ct, tok); break; case TOK.START_TAG_NO_ATTS: case TOK.START_TAG_WITH_ATTS: StartTag(b, off, ct, tok); break; case TOK.END_TAG: EndTag(b, off, ct, tok); break; case TOK.DATA_CHARS: case TOK.DATA_NEWLINE: AddText(utf.GetString(b, off, ct.TokenEnd - off)); break; case TOK.CHAR_REF: case TOK.MAGIC_ENTITY_REF: AddText(new string(new[] { ct.RefChar1 })); break; case TOK.CHAR_PAIR_REF: AddText(new string(new[] { ct.RefChar1, ct.RefChar2 })); break; case TOK.COMMENT: if (current != null) { // <!-- 4 // --> 3 int start = off + 4 * m_enc.MinBytesPerChar; int end = ct.TokenEnd - off - 7 * m_enc.MinBytesPerChar; string text = utf.GetString(b, start, end); current.AddChild(new Comment(text)); } break; case TOK.CDATA_SECT_OPEN: m_cdata = true; break; case TOK.CDATA_SECT_CLOSE: m_cdata = false; break; case TOK.XML_DECL: // thou shalt use UTF8, and XML version 1. // i shall ignore evidence to the contrary... // TODO: Throw an exception if these assuptions are // wrong break; case TOK.ENTITY_REF: case TOK.PI: #if CF throw new util.NotImplementedException("Token type not implemented: " + tok); #else throw new NotImplementedException("Token type not implemented: " + tok); #endif } off = ct.TokenEnd; } } catch (PartialTokenException) { // ignored; } catch (ExtensibleTokenException) { // ignored; } catch (Exception ex) { if (OnStreamError != null) { OnStreamError(this, ex); } } finally { m_buf.Clear(off); } }
/// <summary> /// /// </summary> /// <param name="tokType"></param> public ExtensibleTokenException(TOK tokType) { this.tokType = tokType; }
private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_Depth--; m_NamespaceStack.Pop(); if (current == null) {// end of doc if (OnStreamEnd!=null) OnStreamEnd(this, m_root); // FireOnDocumentEnd(); return; } string name = null; if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_NO_ATTS)) name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); else name = utf.GetString(buf, offset + m_enc.MinBytesPerChar*2, ct.NameEnd - offset - m_enc.MinBytesPerChar*2); // if (current.Name != name) // throw new Exception("Invalid end tag: " + name + // " != " + current.Name); Element parent = (Element) current.Parent; if (parent == null) { DoRaiseOnStreamElement(current); //if (OnStreamElement!=null) // OnStreamElement(this, current); //FireOnElement(current); } current = parent; }
/// <summary> /// </summary> /// <param name="buf"> </param> /// <param name="offset"> </param> /// <param name="ct"> </param> /// <param name="tok"> </param> private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_Depth--; m_ns.PopScope(); if (current == null) { // end of doc if (OnStreamEnd != null) { OnStreamEnd(this, m_root); } // FireOnDocumentEnd(); return; } // if (current.Name != name) // throw new Exception("Invalid end tag: " + name + // " != " + current.Name); var parent = (Element) current.Parent; if (parent == null) { DoRaiseOnStreamElement(current); // if (OnStreamElement!=null) // OnStreamElement(this, current); // FireOnElement(current); } current = parent; }
// static methods /// <summary> /// This can be used to map from any IOB-style (i.e., "I-PERS" style labels) /// or just categories representation to any other. /// </summary> /// <remarks> /// This can be used to map from any IOB-style (i.e., "I-PERS" style labels) /// or just categories representation to any other. /// It can read and change any representation to other representations: /// a 4 way representation of all entities, like S-PERS, B-PERS, /// I-PERS, E-PERS for single word, beginning, internal, and end of entity /// (IOBES or SBIEO); always marking the first word of an entity (IOB2 or BIO); /// only marking specially the beginning of non-first /// items of an entity sequences with B-PERS (IOB1); /// the reverse IOE1 and IOE2; IO where everything is I-tagged; and /// NOPREFIX, where no prefixes are written on category labels. /// The last two representations are deficient in not allowing adjacent /// entities of the same class to be represented, but nevertheless /// convenient. Note that the background label is never given a prefix. /// This code is very specific to the particular CoNLL way of labeling /// classes for IOB-style encoding, but this notation is quite widespread. /// It will work on any of these styles of input. /// This will also recognize BILOU format (B=B, I=I, L=E, O=O, U=S). /// It also works with lowercased names like i-org. /// If the labels are not of the form "C-Y+", where C is a single character, /// then they will be regarded as NOPREFIX labels. /// This method updates the List tokens in place. /// </remarks> /// <param name="tokens">List of tokens (each a CoreLabel) in some style</param> /// <param name="key">The key in the CoreLabel to change, commonly CoreAnnotations.AnswerAnnotation.class</param> /// <param name="backgroundLabel">The background label, which gets special treatment</param> /// <param name="style">Output style; one of iob[12], ioe[12], io, sbieo/iobes, noprefix</param> /// <param name="intern">Whether to String-intern the new labels (may as well, small number!)</param> public static void EntitySubclassify <Tok>(IList <TOK> tokens, Type key, string backgroundLabel, string style, bool intern) where Tok : ICoreMap { int how; string lowerStyle = style.ToLower(Locale.English); switch (lowerStyle) { case "iob1": { how = 0; break; } case "iob2": case "bio": { how = 1; break; } case "ioe1": { how = 2; break; } case "ioe2": { how = 3; break; } case "io": { how = 4; break; } case "sbieo": case "iobes": { how = 5; break; } case "noprefix": { how = 6; break; } case "bilou": { how = 7; break; } default: { throw new ArgumentException("entitySubclassify: unknown style: " + style); } } IList <TOK> paddedTokens = new PaddedList <TOK>(tokens, (TOK) new CoreLabel()); int size = paddedTokens.Count; string[] newAnswers = new string[size]; for (int i = 0; i < size; i++) { TOK c = paddedTokens[i]; TOK p = paddedTokens[i - 1]; TOK n = paddedTokens[i + 1]; string cAns = c.Get(key); string pAns = p.Get(key); if (pAns == null) { pAns = backgroundLabel; } string nAns = n.Get(key); if (nAns == null) { nAns = backgroundLabel; } string @base; char prefix; if (cAns.Length > 2 && cAns[1] == '-') { @base = Sharpen.Runtime.Substring(cAns, 2, cAns.Length); prefix = char.ToUpperCase(cAns[0]); } else { @base = cAns; prefix = ' '; } string pBase; char pPrefix; if (pAns.Length > 2 && pAns[1] == '-') { pBase = Sharpen.Runtime.Substring(pAns, 2, pAns.Length); pPrefix = char.ToUpperCase(pAns[0]); } else { pBase = pAns; pPrefix = ' '; } string nBase; char nPrefix; if (nAns.Length > 2 && nAns[1] == '-') { nBase = Sharpen.Runtime.Substring(nAns, 2, nAns.Length); nPrefix = char.ToUpperCase(nAns[0]); } else { nBase = nAns; nPrefix = ' '; } bool isStartAdjacentSame = IsSameEntityBoundary(pBase, pPrefix, @base, prefix); bool isEndAdjacentSame = IsSameEntityBoundary(@base, prefix, nBase, nPrefix); bool isFirst = IsDifferentEntityBoundary(pBase, @base) || isStartAdjacentSame; bool isLast = IsDifferentEntityBoundary(@base, nBase) || isEndAdjacentSame; string newAnswer = @base; if ([email protected](backgroundLabel)) { switch (how) { case 0: { // iob1, only B if adjacent if (isStartAdjacentSame) { newAnswer = "B-" + @base; } else { newAnswer = "I-" + @base; } break; } case 1: { // iob2 always B at start if (isFirst) { newAnswer = "B-" + @base; } else { newAnswer = "I-" + @base; } break; } case 2: { // ioe1 if (isEndAdjacentSame) { newAnswer = "E-" + @base; } else { newAnswer = "I-" + @base; } break; } case 3: { // ioe2 if (isLast) { newAnswer = "E-" + @base; } else { newAnswer = "I-" + @base; } break; } case 4: { newAnswer = "I-" + @base; break; } case 5: { if (isFirst && isLast) { newAnswer = "S-" + @base; } else { if ((!isFirst) && isLast) { newAnswer = "E-" + @base; } else { if (isFirst && (!isLast)) { newAnswer = "B-" + @base; } else { newAnswer = "I-" + @base; } } } break; } case 7: { // nothing to do on case 6 as it's just base if (isFirst && isLast) { newAnswer = "U-" + @base; } else { if ((!isFirst) && isLast) { newAnswer = "L-" + @base; } else { if (isFirst && (!isLast)) { newAnswer = "B-" + @base; } else { newAnswer = "I-" + @base; } } } break; } } } if (intern) { newAnswer = string.Intern(newAnswer); } newAnswers[i] = newAnswer; } for (int i_1 = 0; i_1 < size; i_1++) { TOK c = tokens[i_1]; c.Set(typeof(CoreAnnotations.AnswerAnnotation), newAnswers[i_1]); } }
private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_ns.PopScope(); if (m_elem == null) {// end of doc FireOnDocumentEnd(); return; } string name = null; if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_NO_ATTS)) name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); else name = utf.GetString(buf, offset + m_enc.MinBytesPerChar*2, ct.NameEnd - offset - m_enc.MinBytesPerChar*2); if (m_elem.Name != name) throw new XmlException("Invalid end tag: " + name + " != " + m_elem.Name); XmlElement parent = (XmlElement)m_elem.ParentNode; if (parent == null) { FireOnElement(m_elem); } m_elem = parent; }
/// <summary> /// </summary> /// <param name="buf"> </param> /// <param name="offset"> </param> /// <param name="length"> </param> /// <returns> </returns> /// <exception cref="NotImplementedException"></exception> private string NormalizeAttributeValue(byte[] buf, int offset, int length) { if (length == 0) { return(null); } string val = null; var buffer = new BufferAggregate(); var copy = new byte[length]; Buffer.BlockCopy(buf, offset, copy, 0, length); buffer.Write(copy); byte[] b = buffer.GetBuffer(); int off = 0; TOK tok = TOK.END_TAG; var ct = new ContentToken(); try { while (off < b.Length) { // tok = m_enc.tokenizeContent(b, off, b.Length, ct); tok = m_enc.tokenizeAttributeValue(b, off, b.Length, ct); switch (tok) { case TOK.ATTRIBUTE_VALUE_S: case TOK.DATA_CHARS: case TOK.DATA_NEWLINE: val += utf.GetString(b, off, ct.TokenEnd - off); break; case TOK.CHAR_REF: case TOK.MAGIC_ENTITY_REF: val += new string(new[] { ct.RefChar1 }); break; case TOK.CHAR_PAIR_REF: val += new string(new[] { ct.RefChar1, ct.RefChar2 }); break; case TOK.ENTITY_REF: #if CF throw new util.NotImplementedException("Token type not implemented: " + tok); #else throw new NotImplementedException("Token type not implemented: " + tok); #endif } off = ct.TokenEnd; } } catch (PartialTokenException) { // ignored; } catch (ExtensibleTokenException) { // ignored; } catch (Exception ex) { if (OnStreamError != null) { OnStreamError(this, ex); } } finally { buffer.Clear(off); } return(val); }
private void StartTag(byte[] buf, int offset, ContentToken ct, TOK tok) { int colon; string name; string prefix; Hashtable ht = new Hashtable(); m_ns.PushScope(); // if i have attributes if ((tok == TOK.START_TAG_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_WITH_ATTS)) { int start; int end; string val; for (int i=0; i<ct.getAttributeSpecifiedCount(); i++) { start = ct.getAttributeNameStart(i); end = ct.getAttributeNameEnd(i); name = utf.GetString(buf, start, end - start); start = ct.getAttributeValueStart(i); end = ct.getAttributeValueEnd(i); val = utf.GetString(buf, start, end - start); // <foo b='&'/> // <foo b='&amp;' // TODO: if val includes &, it gets double-escaped if (name.StartsWith("xmlns:")) { colon = name.IndexOf(':'); prefix = name.Substring(colon+1); m_ns.AddNamespace(prefix, val); } else if (name == "xmlns") { m_ns.AddNamespace(string.Empty, val); } ht.Add(name, val); } } name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); colon = name.IndexOf(':'); string ns = ""; prefix = ""; if (colon > 0) { prefix = name.Substring(0, colon); name = name.Substring(colon + 1); ns = m_ns.LookupNamespace(prefix); } else { ns = m_ns.DefaultNamespace; } XmlQualifiedName q = new XmlQualifiedName(name, ns); XmlElement elem = m_factory.GetElement(prefix, q, m_doc); foreach (string attrname in ht.Keys) { colon = attrname.IndexOf(':'); if (colon > 0) { prefix = attrname.Substring(0, colon); name = attrname.Substring(colon+1); XmlAttribute attr = m_doc.CreateAttribute(prefix, name, m_ns.LookupNamespace(prefix)); attr.InnerXml = (string)ht[attrname]; elem.SetAttributeNode(attr); } else { XmlAttribute attr = m_doc.CreateAttribute(attrname); attr.InnerXml = (string)ht[attrname]; elem.SetAttributeNode(attr); } } if (m_root == null) { m_root = elem; FireOnDocumentStart(m_root); } else { if (m_elem != null) m_elem.AppendChild(elem); m_elem = elem; } }
private void StartTag(byte[] buf, int offset, ContentToken ct, TOK tok) { int colon; string name; string prefix; Hashtable ht = new Hashtable(); m_ns.PushScope(); // if i have attributes if ((tok == TOK.START_TAG_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_WITH_ATTS)) { int start; int end; string val; for (int i = 0; i < ct.getAttributeSpecifiedCount(); i++) { start = ct.getAttributeNameStart(i); end = ct.getAttributeNameEnd(i); name = utf.GetString(buf, start, end - start); start = ct.getAttributeValueStart(i); end = ct.getAttributeValueEnd(i); val = utf.GetString(buf, start, end - start); // <foo b='&'/> // <foo b='&amp;' // TODO: if val includes &, it gets double-escaped if (name.StartsWith("xmlns:")) { colon = name.IndexOf(':'); prefix = name.Substring(colon + 1); m_ns.AddNamespace(prefix, val); } else if (name == "xmlns") { m_ns.AddNamespace(string.Empty, val); } ht.Add(name, val); } } name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); colon = name.IndexOf(':'); string ns = ""; prefix = ""; if (colon > 0) { prefix = name.Substring(0, colon); name = name.Substring(colon + 1); ns = m_ns.LookupNamespace(prefix); } else { ns = m_ns.DefaultNamespace; } XmlQualifiedName q = new XmlQualifiedName(name, ns); XmlElement elem = m_factory.GetElement(prefix, q, m_doc); foreach (string attrname in ht.Keys) { colon = attrname.IndexOf(':'); if (colon > 0) { prefix = attrname.Substring(0, colon); name = attrname.Substring(colon + 1); XmlAttribute attr = m_doc.CreateAttribute(prefix, name, m_ns.LookupNamespace(prefix)); attr.InnerXml = (string)ht[attrname]; elem.SetAttributeNode(attr); } else { XmlAttribute attr = m_doc.CreateAttribute(attrname); attr.InnerXml = (string)ht[attrname]; elem.SetAttributeNode(attr); } } if (m_root == null) { m_root = elem; FireOnDocumentStart(m_root); } else { if (m_elem != null) { m_elem.AppendChild(elem); } m_elem = elem; } }
private void StartTag(byte[] buf, int offset, ContentToken ct, TOK tok) { m_Depth++; int colon; string name; string prefix; Hashtable ht = new Hashtable(); m_NamespaceStack.Push(); // if i have attributes if ((tok == TOK.START_TAG_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_WITH_ATTS)) { int start; int end; string val; for (int i=0; i<ct.getAttributeSpecifiedCount(); i++) { start = ct.getAttributeNameStart(i); end = ct.getAttributeNameEnd(i); name = utf.GetString(buf, start, end - start); start = ct.getAttributeValueStart(i); end = ct.getAttributeValueEnd(i); //val = utf.GetString(buf, start, end - start); val = NormalizeAttributeValue(buf, start, end - start); // <foo b='&'/> // <foo b='&amp;' // TODO: if val includes &, it gets double-escaped if (name.StartsWith("xmlns:")) { colon = name.IndexOf(':'); prefix = name.Substring(colon+1); m_NamespaceStack.AddNamespace(prefix, val); } else if (name == "xmlns") { m_NamespaceStack.AddNamespace(string.Empty, val); } else { ht.Add(name, val); } } } name = utf.GetString(buf, offset + m_enc.MinBytesPerChar, ct.NameEnd - offset - m_enc.MinBytesPerChar); colon = name.IndexOf(':'); string ns = ""; prefix = null; if (colon > 0) { prefix = name.Substring(0, colon); name = name.Substring(colon + 1); ns = m_NamespaceStack.LookupNamespace(prefix); } else { ns = m_NamespaceStack.DefaultNamespace; } Element newel = ElementFactory.GetElement(prefix, name, ns); foreach (string attrname in ht.Keys) { newel.SetAttribute(attrname, (string)ht[attrname]); } if (m_root == null) { m_root = newel; //FireOnDocumentStart(m_root); if (OnStreamStart!=null) OnStreamStart(this, m_root); } else { if (current != null) current.AddChild(newel); current = newel; } }
/// <summary> /// Put bytes into the parser. /// </summary> /// <param name="buf">The bytes to put into the parse stream</param> /// <param name="offset">Offset into buf to start at</param> /// <param name="length">Number of bytes to write</param> public void Push(byte[] buf, int offset, int length) { // or assert, really, but this is a little nicer. if (length == 0) { return; } // No locking is required. Read() won't get called again // until this method returns. Keep in mind that we're // already on a thread in a ThreadPool, which is created // and managed by System.IO at the end of the day. // TODO: only do this copy if we have a partial token at the // end of parsing. byte[] copy = new byte[length]; System.Buffer.BlockCopy(buf, offset, copy, 0, length); m_buf.Write(copy); byte[] b = m_buf.GetBuffer(); int off = 0; TOK tok = TOK.END_TAG; ContentToken ct = new ContentToken(); try { while (off < b.Length) { if (m_cdata) { tok = m_enc.tokenizeCdataSection(b, off, b.Length, ct); } else { tok = m_enc.tokenizeContent(b, off, b.Length, ct); } switch (tok) { case TOK.EMPTY_ELEMENT_NO_ATTS: case TOK.EMPTY_ELEMENT_WITH_ATTS: StartTag(b, off, ct, tok); EndTag(b, off, ct, tok); break; case TOK.START_TAG_NO_ATTS: case TOK.START_TAG_WITH_ATTS: StartTag(b, off, ct, tok); break; case TOK.END_TAG: EndTag(b, off, ct, tok); break; case TOK.DATA_CHARS: case TOK.DATA_NEWLINE: AddText(utf.GetString(b, off, ct.TokenEnd - off)); break; case TOK.CHAR_REF: case TOK.MAGIC_ENTITY_REF: AddText(new string(new char[] { ct.RefChar1 })); break; case TOK.CHAR_PAIR_REF: AddText(new string(new char[] { ct.RefChar1, ct.RefChar2 })); break; case TOK.COMMENT: if (m_elem != null) { // <!-- 4 // --> 3 int start = off + 4 * m_enc.MinBytesPerChar; int end = ct.TokenEnd - off - 7 * m_enc.MinBytesPerChar; string text = utf.GetString(b, start, end); m_elem.AppendChild(m_doc.CreateComment(text)); } break; case TOK.CDATA_SECT_OPEN: m_cdata = true; break; case TOK.CDATA_SECT_CLOSE: m_cdata = false; break; case TOK.XML_DECL: // thou shalt use UTF8, and XML version 1. // i shall ignore evidence to the contrary... // TODO: Throw an exception if these assuptions are // wrong break; case TOK.ENTITY_REF: case TOK.PI: throw new System.NotImplementedException("Token type not implemented: " + tok); } off = ct.TokenEnd; ct.clearAttributes(); } } catch (PartialTokenException) { // Console.WriteLine("PartialTokenException: " + System.Text.Encoding.UTF8.GetString(copy)); // ignored; } catch (ExtensibleTokenException) { // ignored; } catch (XpNet.InvalidTokenException e) { throw new XMLParseException(e, this, buf, offset, length); } catch (Exception e) { throw new Exception("Unexpected exception", e); } finally { m_buf.Clear(off); ct.clearAttributes(); } }
internal bool TypeIs(TOK ty) { return(ty == _type); }