Ejemplo n.º 1
0
 internal void AssertType(TOK ty)
 {
     if (_type != ty)
     {
         throw new LexingException(String.Empty, _beg, "expected type " + ty.ToString());
     }
 }
Ejemplo n.º 2
0
 public Token(TOK ty, string lexeme, Position beg, Position end)
 {
     _type   = ty;
     _lexeme = lexeme;
     _beg    = beg;
     _end    = end;
 }
Ejemplo n.º 3
0
        /// <summary>
        /// </summary>
        /// <param name="buf"> </param>
        /// <param name="offset"> </param>
        /// <param name="ct"> </param>
        /// <param name="tok"> </param>
        private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok)
        {
            m_Depth--;
            m_ns.PopScope();

            if (current == null)
            {
                // end of doc
                if (OnStreamEnd != null)
                {
                    OnStreamEnd(this, m_root);
                }

                //              FireOnDocumentEnd();
                return;
            }

            //          if (current.Name != name)
            //              throw new Exception("Invalid end tag: " + name +
            //                  " != " + current.Name);
            var parent = (Element)current.Parent;

            if (parent == null)
            {
                DoRaiseOnStreamElement(current);

                // if (OnStreamElement!=null)
                // OnStreamElement(this, current);
                // FireOnElement(current);
            }

            current = parent;
        }
Ejemplo n.º 4
0
 static void Ensure(Token token, string lexeme, TOK tp, Position beg, Position end)
 {
     Assert.IsNotNull(token);
     Assert.AreEqual(lexeme, token.Lexeme());
     Assert.AreEqual(tp, token.Type());
     Assert.AreEqual(beg, token.Beg);
     Assert.AreEqual(end, token.End);
 }
Ejemplo n.º 5
0
        private void EndTag(byte[] buf, int offset,
                            ContentToken ct, TOK tok)
        {
            m_ns.PopScope();

            if (m_elem == null)
            {// end of doc
                FireOnDocumentEnd();
                return;
            }

            string name = null;

            if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) ||
                (tok == TOK.EMPTY_ELEMENT_NO_ATTS))
            {
                name = utf.GetString(buf,
                                     offset + m_enc.MinBytesPerChar,
                                     ct.NameEnd - offset -
                                     m_enc.MinBytesPerChar);
            }
            else
            {
                name = utf.GetString(buf,
                                     offset + m_enc.MinBytesPerChar * 2,
                                     ct.NameEnd - offset -
                                     m_enc.MinBytesPerChar * 2);
            }

            //workaround for iTeleport bug, that sends xmlns prefix
            //and exception is thrown in .NET Framework 2.0
            //replace prefix for unsupported, which is then ignored
            //here end tag is workarounded to match replaced start and end tag
            if (name.StartsWith("xmlns"))
            {
                name = string.Format("unsupported:{0}", name.Substring("xmlns:".Length));
            }

            if (m_elem.Name != name)
            {
                throw new XmlException("Invalid end tag: " + name +
                                       " != " + m_elem.Name);
            }

            XmlElement parent = (XmlElement)m_elem.ParentNode;

            if (parent == null)
            {
                FireOnElement(m_elem);
            }
            m_elem = parent;
        }
Ejemplo n.º 6
0
        private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok)
        {
            m_Depth--;
            m_NamespaceStack.Pop();

            if (current == null)
            {            // end of doc
                if (OnStreamEnd != null)
                {
                    OnStreamEnd(this, m_root);
                }
//				FireOnDocumentEnd();
                return;
            }

            string name = null;

            if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) ||
                (tok == TOK.EMPTY_ELEMENT_NO_ATTS))
            {
                name = utf.GetString(buf,
                                     offset + m_enc.MinBytesPerChar,
                                     ct.NameEnd - offset -
                                     m_enc.MinBytesPerChar);
            }
            else
            {
                name = utf.GetString(buf,
                                     offset + m_enc.MinBytesPerChar * 2,
                                     ct.NameEnd - offset -
                                     m_enc.MinBytesPerChar * 2);
            }


//			if (current.Name != name)
//				throw new Exception("Invalid end tag: " + name +
//					" != " + current.Name);

            Element parent = (Element)current.Parent;

            if (parent == null)
            {
                DoRaiseOnStreamElement(current);
                //if (OnStreamElement!=null)
                //    OnStreamElement(this, current);
                //FireOnElement(current);
            }
            current = parent;
        }
Ejemplo n.º 7
0
        private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok)
        {
            this.m_Depth--;
            this.m_ns.PopScope();

            if (this.current == null)
            {            // end of doc
                OnStreamEnd?.Invoke(this, this.m_root);
                //				FireOnDocumentEnd();
                return;
            }

            string name = null;

            if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) ||
                (tok == TOK.EMPTY_ELEMENT_NO_ATTS))
            {
                name = utf.GetString(buf,
                                     offset + this.m_enc.MinBytesPerChar,
                                     ct.NameEnd - offset -
                                     this.m_enc.MinBytesPerChar);
            }
            else
            {
                name = utf.GetString(buf,
                                     offset + this.m_enc.MinBytesPerChar * 2,
                                     ct.NameEnd - offset -
                                     this.m_enc.MinBytesPerChar * 2);
            }


            //			if (current.Name != name)
            //				throw new Exception("Invalid end tag: " + name +
            //					" != " + current.Name);

            var parent = (Element)this.current.Parent;

            if (parent == null)
            {
                this.DoRaiseOnStreamElement(this.current);
                //if (OnStreamElement!=null)
                //    OnStreamElement(this, current);
                //FireOnElement(current);
            }
            this.current = parent;
        }
        private void EndTag(byte[] buf, int offset,
                            ContentToken ct, TOK tok)
        {
            m_ns.PopScope();

            if (m_elem == null)
            {// end of doc
                FireOnDocumentEnd();
                return;
            }

            string name = null;

            if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) ||
                (tok == TOK.EMPTY_ELEMENT_NO_ATTS))
            {
                name = utf.GetString(buf,
                                     offset + m_enc.MinBytesPerChar,
                                     ct.NameEnd - offset -
                                     m_enc.MinBytesPerChar);
            }
            else
            {
                name = utf.GetString(buf,
                                     offset + m_enc.MinBytesPerChar * 2,
                                     ct.NameEnd - offset -
                                     m_enc.MinBytesPerChar * 2);
            }


            if (m_elem.Name != name)
            {
                throw new XmlException("Invalid end tag: " + name +
                                       " != " + m_elem.Name);
            }

            XmlElement parent = (XmlElement)m_elem.ParentNode;

            if (parent == null)
            {
                FireOnElement(m_elem);
            }
            m_elem = parent;
        }
Ejemplo n.º 9
0
        /// <summary>
        /// </summary>
        /// <param name="buf"> </param>
        /// <param name="offset"> </param>
        /// <param name="ct"> </param>
        /// <param name="tok"> </param>
        private void StartTag(byte[] buf, int offset, ContentToken ct, TOK tok)
        {
            m_Depth++;
            int    colon;
            string name;
            string prefix;
            var    ht = new Hashtable();

            m_ns.PushScope();

            // if i have attributes
            if ((tok == TOK.START_TAG_WITH_ATTS) || (tok == TOK.EMPTY_ELEMENT_WITH_ATTS))
            {
                int    start;
                int    end;
                string val;
                for (int i = 0; i < ct.getAttributeSpecifiedCount(); i++)
                {
                    start = ct.getAttributeNameStart(i);
                    end   = ct.getAttributeNameEnd(i);
                    name  = utf.GetString(buf, start, end - start);

                    start = ct.getAttributeValueStart(i);
                    end   = ct.getAttributeValueEnd(i);

                    // val = utf.GetString(buf, start, end - start);
                    val = NormalizeAttributeValue(buf, start, end - start);

                    // <foo b='&amp;'/>
                    // <foo b='&amp;amp;'
                    // TODO: if val includes &amp;, it gets double-escaped
                    if (name.StartsWith("xmlns:"))
                    {
                        colon  = name.IndexOf(':');
                        prefix = name.Substring(colon + 1);
                        m_ns.AddNamespace(prefix, val);
                    }
                    else if (name == "xmlns")
                    {
                        m_ns.AddNamespace(string.Empty, val);
                    }
                    else
                    {
                        ht.Add(name, val);
                    }
                }
            }

            name = utf.GetString(buf,
                                 offset + m_enc.MinBytesPerChar,
                                 ct.NameEnd - offset - m_enc.MinBytesPerChar);

            colon = name.IndexOf(':');
            string ns = string.Empty;

            prefix = null;
            if (colon > 0)
            {
                prefix = name.Substring(0, colon);
                name   = name.Substring(colon + 1);
                ns     = m_ns.LookupNamespace(prefix);
            }
            else
            {
                ns = m_ns.DefaultNamespace;
            }

            Element newel = ElementFactory.GetElement(prefix, name, ns);

            foreach (string attrname in ht.Keys)
            {
                newel.SetAttribute(attrname, (string)ht[attrname]);
            }

            if (m_root == null)
            {
                m_root = newel;

                // FireOnDocumentStart(m_root);
                if (OnStreamStart != null)
                {
                    OnStreamStart(this, m_root, m_ns.DefaultNamespace ?? "");
                }
            }
            else
            {
                if (current != null)
                {
                    current.AddChild(newel);
                }

                current = newel;
            }
        }
Ejemplo n.º 10
0
        /// <summary>
        ///   Put bytes into the parser.
        /// </summary>
        /// <param name="buf"> The bytes to put into the parse stream </param>
        /// <param name="offset"> Offset into buf to start at </param>
        /// <param name="length"> Number of bytes to write </param>
        public void Push(byte[] buf, int offset, int length)
        {
            // or assert, really, but this is a little nicer.
            if (length == 0)
            {
                return;
            }

            // No locking is required.  Read() won't get called again
            // until this method returns.

            // TODO: only do this copy if we have a partial token at the
            // end of parsing.
            var copy = new byte[length];

            Buffer.BlockCopy(buf, offset, copy, 0, length);
            m_buf.Write(copy);

            byte[] b   = m_buf.GetBuffer();
            int    off = 0;
            TOK    tok = TOK.END_TAG;
            var    ct  = new ContentToken();

            try
            {
                while (off < b.Length)
                {
                    if (m_cdata)
                    {
                        tok = m_enc.tokenizeCdataSection(b, off, b.Length, ct);
                    }
                    else
                    {
                        tok = m_enc.tokenizeContent(b, off, b.Length, ct);
                    }

                    switch (tok)
                    {
                    case TOK.EMPTY_ELEMENT_NO_ATTS:
                    case TOK.EMPTY_ELEMENT_WITH_ATTS:
                        StartTag(b, off, ct, tok);
                        EndTag(b, off, ct, tok);
                        break;

                    case TOK.START_TAG_NO_ATTS:
                    case TOK.START_TAG_WITH_ATTS:
                        StartTag(b, off, ct, tok);
                        break;

                    case TOK.END_TAG:
                        EndTag(b, off, ct, tok);
                        break;

                    case TOK.DATA_CHARS:
                    case TOK.DATA_NEWLINE:
                        AddText(utf.GetString(b, off, ct.TokenEnd - off));
                        break;

                    case TOK.CHAR_REF:
                    case TOK.MAGIC_ENTITY_REF:
                        AddText(new string(new[] { ct.RefChar1 }));
                        break;

                    case TOK.CHAR_PAIR_REF:
                        AddText(new string(new[] { ct.RefChar1, ct.RefChar2 }));
                        break;

                    case TOK.COMMENT:
                        if (current != null)
                        {
                            // <!-- 4
                            // --> 3
                            int    start = off + 4 * m_enc.MinBytesPerChar;
                            int    end   = ct.TokenEnd - off - 7 * m_enc.MinBytesPerChar;
                            string text  = utf.GetString(b, start, end);
                            current.AddChild(new Comment(text));
                        }

                        break;

                    case TOK.CDATA_SECT_OPEN:
                        m_cdata = true;
                        break;

                    case TOK.CDATA_SECT_CLOSE:
                        m_cdata = false;
                        break;

                    case TOK.XML_DECL:

                        // thou shalt use UTF8, and XML version 1.
                        // i shall ignore evidence to the contrary...

                        // TODO: Throw an exception if these assuptions are
                        // wrong
                        break;

                    case TOK.ENTITY_REF:
                    case TOK.PI:
#if CF
                        throw new util.NotImplementedException("Token type not implemented: " + tok);
#else
                        throw new NotImplementedException("Token type not implemented: " + tok);
#endif
                    }

                    off = ct.TokenEnd;
                }
            }
            catch (PartialTokenException)
            {
                // ignored;
            }
            catch (ExtensibleTokenException)
            {
                // ignored;
            }
            catch (Exception ex)
            {
                if (OnStreamError != null)
                {
                    OnStreamError(this, ex);
                }
            }
            finally
            {
                m_buf.Clear(off);
            }
        }
Ejemplo n.º 11
0
 /// <summary>
 ///
 /// </summary>
 /// <param name="tokType"></param>
 public ExtensibleTokenException(TOK tokType)
 {
     this.tokType = tokType;
 }
Ejemplo n.º 12
0
		private void EndTag(byte[] buf, int offset,	ContentToken ct, TOK tok)
		{
			m_Depth--;
			m_NamespaceStack.Pop();

			if (current == null)
			{// end of doc
				if (OnStreamEnd!=null)
					OnStreamEnd(this, m_root);
//				FireOnDocumentEnd();
				return;
			}

			string name = null;

			if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) ||
				(tok == TOK.EMPTY_ELEMENT_NO_ATTS))
				name = utf.GetString(buf,
					offset + m_enc.MinBytesPerChar,
					ct.NameEnd - offset -
					m_enc.MinBytesPerChar);
			else
				name = utf.GetString(buf,
					offset + m_enc.MinBytesPerChar*2,
					ct.NameEnd - offset -
					m_enc.MinBytesPerChar*2);
                

//			if (current.Name != name)
//				throw new Exception("Invalid end tag: " + name +
//					" != " + current.Name);

			Element parent = (Element) current.Parent;
			if (parent == null)
            {               
                DoRaiseOnStreamElement(current);
                //if (OnStreamElement!=null)
                //    OnStreamElement(this, current);
				//FireOnElement(current);
			}
			current = parent;
		}
Ejemplo n.º 13
0
 /// <summary>
 /// 
 /// </summary>
 /// <param name="tokType"></param>
 public ExtensibleTokenException(TOK tokType)
 {
     this.tokType = tokType;
 }
Ejemplo n.º 14
0
        /// <summary>
        /// </summary>
        /// <param name="buf"> </param>
        /// <param name="offset"> </param>
        /// <param name="ct"> </param>
        /// <param name="tok"> </param>
        private void EndTag(byte[] buf, int offset, ContentToken ct, TOK tok)
        {
            m_Depth--;
            m_ns.PopScope();

            if (current == null)
            {
                // end of doc
                if (OnStreamEnd != null)
                {
                    OnStreamEnd(this, m_root);
                }

                // 				FireOnDocumentEnd();
                return;
            }

            // 			if (current.Name != name)
            // 				throw new Exception("Invalid end tag: " + name +
            // 					" != " + current.Name);
            var parent = (Element) current.Parent;
            if (parent == null)
            {
                DoRaiseOnStreamElement(current);

                // if (OnStreamElement!=null)
                // OnStreamElement(this, current);
                // FireOnElement(current);
            }

            current = parent;
        }
Ejemplo n.º 15
0
        // static methods
        /// <summary>
        /// This can be used to map from any IOB-style (i.e., "I-PERS" style labels)
        /// or just categories representation to any other.
        /// </summary>
        /// <remarks>
        /// This can be used to map from any IOB-style (i.e., "I-PERS" style labels)
        /// or just categories representation to any other.
        /// It can read and change any representation to other representations:
        /// a 4 way representation of all entities, like S-PERS, B-PERS,
        /// I-PERS, E-PERS for single word, beginning, internal, and end of entity
        /// (IOBES or SBIEO); always marking the first word of an entity (IOB2 or BIO);
        /// only marking specially the beginning of non-first
        /// items of an entity sequences with B-PERS (IOB1);
        /// the reverse IOE1 and IOE2; IO where everything is I-tagged; and
        /// NOPREFIX, where no prefixes are written on category labels.
        /// The last two representations are deficient in not allowing adjacent
        /// entities of the same class to be represented, but nevertheless
        /// convenient.  Note that the background label is never given a prefix.
        /// This code is very specific to the particular CoNLL way of labeling
        /// classes for IOB-style encoding, but this notation is quite widespread.
        /// It will work on any of these styles of input.
        /// This will also recognize BILOU format (B=B, I=I, L=E, O=O, U=S).
        /// It also works with lowercased names like i-org.
        /// If the labels are not of the form "C-Y+", where C is a single character,
        /// then they will be regarded as NOPREFIX labels.
        /// This method updates the List tokens in place.
        /// </remarks>
        /// <param name="tokens">List of tokens (each a CoreLabel) in some style</param>
        /// <param name="key">The key in the CoreLabel to change, commonly CoreAnnotations.AnswerAnnotation.class</param>
        /// <param name="backgroundLabel">The background label, which gets special treatment</param>
        /// <param name="style">Output style; one of iob[12], ioe[12], io, sbieo/iobes, noprefix</param>
        /// <param name="intern">Whether to String-intern the new labels (may as well, small number!)</param>
        public static void EntitySubclassify <Tok>(IList <TOK> tokens, Type key, string backgroundLabel, string style, bool intern)
            where Tok : ICoreMap
        {
            int    how;
            string lowerStyle = style.ToLower(Locale.English);

            switch (lowerStyle)
            {
            case "iob1":
            {
                how = 0;
                break;
            }

            case "iob2":
            case "bio":
            {
                how = 1;
                break;
            }

            case "ioe1":
            {
                how = 2;
                break;
            }

            case "ioe2":
            {
                how = 3;
                break;
            }

            case "io":
            {
                how = 4;
                break;
            }

            case "sbieo":
            case "iobes":
            {
                how = 5;
                break;
            }

            case "noprefix":
            {
                how = 6;
                break;
            }

            case "bilou":
            {
                how = 7;
                break;
            }

            default:
            {
                throw new ArgumentException("entitySubclassify: unknown style: " + style);
            }
            }
            IList <TOK> paddedTokens = new PaddedList <TOK>(tokens, (TOK) new CoreLabel());
            int         size         = paddedTokens.Count;

            string[] newAnswers = new string[size];
            for (int i = 0; i < size; i++)
            {
                TOK    c    = paddedTokens[i];
                TOK    p    = paddedTokens[i - 1];
                TOK    n    = paddedTokens[i + 1];
                string cAns = c.Get(key);
                string pAns = p.Get(key);
                if (pAns == null)
                {
                    pAns = backgroundLabel;
                }
                string nAns = n.Get(key);
                if (nAns == null)
                {
                    nAns = backgroundLabel;
                }
                string @base;
                char   prefix;
                if (cAns.Length > 2 && cAns[1] == '-')
                {
                    @base  = Sharpen.Runtime.Substring(cAns, 2, cAns.Length);
                    prefix = char.ToUpperCase(cAns[0]);
                }
                else
                {
                    @base  = cAns;
                    prefix = ' ';
                }
                string pBase;
                char   pPrefix;
                if (pAns.Length > 2 && pAns[1] == '-')
                {
                    pBase   = Sharpen.Runtime.Substring(pAns, 2, pAns.Length);
                    pPrefix = char.ToUpperCase(pAns[0]);
                }
                else
                {
                    pBase   = pAns;
                    pPrefix = ' ';
                }
                string nBase;
                char   nPrefix;
                if (nAns.Length > 2 && nAns[1] == '-')
                {
                    nBase   = Sharpen.Runtime.Substring(nAns, 2, nAns.Length);
                    nPrefix = char.ToUpperCase(nAns[0]);
                }
                else
                {
                    nBase   = nAns;
                    nPrefix = ' ';
                }
                bool   isStartAdjacentSame = IsSameEntityBoundary(pBase, pPrefix, @base, prefix);
                bool   isEndAdjacentSame   = IsSameEntityBoundary(@base, prefix, nBase, nPrefix);
                bool   isFirst             = IsDifferentEntityBoundary(pBase, @base) || isStartAdjacentSame;
                bool   isLast    = IsDifferentEntityBoundary(@base, nBase) || isEndAdjacentSame;
                string newAnswer = @base;
                if ([email protected](backgroundLabel))
                {
                    switch (how)
                    {
                    case 0:
                    {
                        // iob1, only B if adjacent
                        if (isStartAdjacentSame)
                        {
                            newAnswer = "B-" + @base;
                        }
                        else
                        {
                            newAnswer = "I-" + @base;
                        }
                        break;
                    }

                    case 1:
                    {
                        // iob2 always B at start
                        if (isFirst)
                        {
                            newAnswer = "B-" + @base;
                        }
                        else
                        {
                            newAnswer = "I-" + @base;
                        }
                        break;
                    }

                    case 2:
                    {
                        // ioe1
                        if (isEndAdjacentSame)
                        {
                            newAnswer = "E-" + @base;
                        }
                        else
                        {
                            newAnswer = "I-" + @base;
                        }
                        break;
                    }

                    case 3:
                    {
                        // ioe2
                        if (isLast)
                        {
                            newAnswer = "E-" + @base;
                        }
                        else
                        {
                            newAnswer = "I-" + @base;
                        }
                        break;
                    }

                    case 4:
                    {
                        newAnswer = "I-" + @base;
                        break;
                    }

                    case 5:
                    {
                        if (isFirst && isLast)
                        {
                            newAnswer = "S-" + @base;
                        }
                        else
                        {
                            if ((!isFirst) && isLast)
                            {
                                newAnswer = "E-" + @base;
                            }
                            else
                            {
                                if (isFirst && (!isLast))
                                {
                                    newAnswer = "B-" + @base;
                                }
                                else
                                {
                                    newAnswer = "I-" + @base;
                                }
                            }
                        }
                        break;
                    }

                    case 7:
                    {
                        // nothing to do on case 6 as it's just base
                        if (isFirst && isLast)
                        {
                            newAnswer = "U-" + @base;
                        }
                        else
                        {
                            if ((!isFirst) && isLast)
                            {
                                newAnswer = "L-" + @base;
                            }
                            else
                            {
                                if (isFirst && (!isLast))
                                {
                                    newAnswer = "B-" + @base;
                                }
                                else
                                {
                                    newAnswer = "I-" + @base;
                                }
                            }
                        }
                        break;
                    }
                    }
                }
                if (intern)
                {
                    newAnswer = string.Intern(newAnswer);
                }
                newAnswers[i] = newAnswer;
            }
            for (int i_1 = 0; i_1 < size; i_1++)
            {
                TOK c = tokens[i_1];
                c.Set(typeof(CoreAnnotations.AnswerAnnotation), newAnswers[i_1]);
            }
        }
Ejemplo n.º 16
0
        private void EndTag(byte[] buf, int offset,
            ContentToken ct, TOK tok)
        {
            m_ns.PopScope();

            if (m_elem == null)
            {// end of doc
                FireOnDocumentEnd();
                return;
            }

            string name = null;

            if ((tok == TOK.EMPTY_ELEMENT_WITH_ATTS) ||
                (tok == TOK.EMPTY_ELEMENT_NO_ATTS))
                name = utf.GetString(buf,
                                     offset + m_enc.MinBytesPerChar,
                                     ct.NameEnd - offset -
                                     m_enc.MinBytesPerChar);
            else
                name = utf.GetString(buf,
                                     offset + m_enc.MinBytesPerChar*2,
                                     ct.NameEnd - offset -
                                     m_enc.MinBytesPerChar*2);

            if (m_elem.Name != name)
                throw new XmlException("Invalid end tag: " + name +
                                       " != " + m_elem.Name);

            XmlElement parent = (XmlElement)m_elem.ParentNode;
            if (parent == null)
            {
                FireOnElement(m_elem);
            }
            m_elem = parent;
        }
Ejemplo n.º 17
0
        /// <summary>
        /// </summary>
        /// <param name="buf"> </param>
        /// <param name="offset"> </param>
        /// <param name="length"> </param>
        /// <returns> </returns>
        /// <exception cref="NotImplementedException"></exception>
        private string NormalizeAttributeValue(byte[] buf, int offset, int length)
        {
            if (length == 0)
            {
                return(null);
            }

            string val    = null;
            var    buffer = new BufferAggregate();
            var    copy   = new byte[length];

            Buffer.BlockCopy(buf, offset, copy, 0, length);
            buffer.Write(copy);
            byte[] b   = buffer.GetBuffer();
            int    off = 0;
            TOK    tok = TOK.END_TAG;
            var    ct  = new ContentToken();

            try
            {
                while (off < b.Length)
                {
                    // tok = m_enc.tokenizeContent(b, off, b.Length, ct);
                    tok = m_enc.tokenizeAttributeValue(b, off, b.Length, ct);

                    switch (tok)
                    {
                    case TOK.ATTRIBUTE_VALUE_S:
                    case TOK.DATA_CHARS:
                    case TOK.DATA_NEWLINE:
                        val += utf.GetString(b, off, ct.TokenEnd - off);
                        break;

                    case TOK.CHAR_REF:
                    case TOK.MAGIC_ENTITY_REF:
                        val += new string(new[] { ct.RefChar1 });
                        break;

                    case TOK.CHAR_PAIR_REF:
                        val += new string(new[] { ct.RefChar1, ct.RefChar2 });
                        break;

                    case TOK.ENTITY_REF:
#if CF
                        throw new util.NotImplementedException("Token type not implemented: " + tok);
#else
                        throw new NotImplementedException("Token type not implemented: " + tok);
#endif
                    }

                    off = ct.TokenEnd;
                }
            }
            catch (PartialTokenException)
            {
                // ignored;
            }
            catch (ExtensibleTokenException)
            {
                // ignored;
            }
            catch (Exception ex)
            {
                if (OnStreamError != null)
                {
                    OnStreamError(this, ex);
                }
            }
            finally
            {
                buffer.Clear(off);
            }

            return(val);
        }
Ejemplo n.º 18
0
        private void StartTag(byte[] buf, int offset,
            ContentToken ct, TOK tok)
        {
            int colon;
            string name;
            string prefix;
            Hashtable ht = new Hashtable();

            m_ns.PushScope();

            // if i have attributes
            if ((tok == TOK.START_TAG_WITH_ATTS) ||
                (tok == TOK.EMPTY_ELEMENT_WITH_ATTS))
            {
                int start;
                int end;
                string val;
                for (int i=0; i<ct.getAttributeSpecifiedCount(); i++)
                {
                    start = ct.getAttributeNameStart(i);
                    end = ct.getAttributeNameEnd(i);
                    name = utf.GetString(buf, start, end - start);

                    start = ct.getAttributeValueStart(i);
                    end =  ct.getAttributeValueEnd(i);
                    val = utf.GetString(buf, start, end - start);

                    // <foo b='&amp;'/>
                    // <foo b='&amp;amp;'
                    // TODO: if val includes &amp;, it gets double-escaped
                    if (name.StartsWith("xmlns:"))
                    {
                        colon = name.IndexOf(':');
                        prefix = name.Substring(colon+1);
                        m_ns.AddNamespace(prefix, val);
                    }
                    else if (name == "xmlns")
                    {
                        m_ns.AddNamespace(string.Empty, val);
                    }
                    ht.Add(name, val);
                }
            }

            name = utf.GetString(buf,
                                 offset + m_enc.MinBytesPerChar,
                                 ct.NameEnd - offset - m_enc.MinBytesPerChar);
            colon = name.IndexOf(':');
            string ns = "";
            prefix = "";
            if (colon > 0)
            {
                prefix = name.Substring(0, colon);
                name = name.Substring(colon + 1);
                ns = m_ns.LookupNamespace(prefix);
            }
            else
            {
                ns = m_ns.DefaultNamespace;
            }

            XmlQualifiedName q = new XmlQualifiedName(name, ns);
            XmlElement elem = m_factory.GetElement(prefix, q, m_doc);

            foreach (string attrname in ht.Keys)
            {
                colon = attrname.IndexOf(':');
                if (colon > 0)
                {
                    prefix = attrname.Substring(0, colon);
                    name = attrname.Substring(colon+1);

                    XmlAttribute attr = m_doc.CreateAttribute(prefix,
                                                              name,
                                                              m_ns.LookupNamespace(prefix));
                    attr.InnerXml = (string)ht[attrname];
                    elem.SetAttributeNode(attr);
                }
                else
                {
                    XmlAttribute attr = m_doc.CreateAttribute(attrname);
                    attr.InnerXml = (string)ht[attrname];
                    elem.SetAttributeNode(attr);
                }
            }

            if (m_root == null)
            {
                m_root = elem;
                FireOnDocumentStart(m_root);
            }
            else
            {
                if (m_elem != null)
                    m_elem.AppendChild(elem);
                m_elem = elem;
            }
        }
        private void StartTag(byte[] buf, int offset,
                              ContentToken ct, TOK tok)
        {
            int       colon;
            string    name;
            string    prefix;
            Hashtable ht = new Hashtable();

            m_ns.PushScope();

            // if i have attributes
            if ((tok == TOK.START_TAG_WITH_ATTS) ||
                (tok == TOK.EMPTY_ELEMENT_WITH_ATTS))
            {
                int    start;
                int    end;
                string val;
                for (int i = 0; i < ct.getAttributeSpecifiedCount(); i++)
                {
                    start = ct.getAttributeNameStart(i);
                    end   = ct.getAttributeNameEnd(i);
                    name  = utf.GetString(buf, start, end - start);

                    start = ct.getAttributeValueStart(i);
                    end   = ct.getAttributeValueEnd(i);
                    val   = utf.GetString(buf, start, end - start);

                    // <foo b='&amp;'/>
                    // <foo b='&amp;amp;'
                    // TODO: if val includes &amp;, it gets double-escaped
                    if (name.StartsWith("xmlns:"))
                    {
                        colon  = name.IndexOf(':');
                        prefix = name.Substring(colon + 1);
                        m_ns.AddNamespace(prefix, val);
                    }
                    else if (name == "xmlns")
                    {
                        m_ns.AddNamespace(string.Empty, val);
                    }
                    ht.Add(name, val);
                }
            }

            name = utf.GetString(buf,
                                 offset + m_enc.MinBytesPerChar,
                                 ct.NameEnd - offset - m_enc.MinBytesPerChar);
            colon = name.IndexOf(':');
            string ns = "";

            prefix = "";
            if (colon > 0)
            {
                prefix = name.Substring(0, colon);
                name   = name.Substring(colon + 1);
                ns     = m_ns.LookupNamespace(prefix);
            }
            else
            {
                ns = m_ns.DefaultNamespace;
            }

            XmlQualifiedName q    = new XmlQualifiedName(name, ns);
            XmlElement       elem = m_factory.GetElement(prefix, q, m_doc);


            foreach (string attrname in ht.Keys)
            {
                colon = attrname.IndexOf(':');
                if (colon > 0)
                {
                    prefix = attrname.Substring(0, colon);
                    name   = attrname.Substring(colon + 1);

                    XmlAttribute attr = m_doc.CreateAttribute(prefix,
                                                              name,
                                                              m_ns.LookupNamespace(prefix));
                    attr.InnerXml = (string)ht[attrname];
                    elem.SetAttributeNode(attr);
                }
                else
                {
                    XmlAttribute attr = m_doc.CreateAttribute(attrname);
                    attr.InnerXml = (string)ht[attrname];
                    elem.SetAttributeNode(attr);
                }
            }


            if (m_root == null)
            {
                m_root = elem;
                FireOnDocumentStart(m_root);
            }
            else
            {
                if (m_elem != null)
                {
                    m_elem.AppendChild(elem);
                }
                m_elem = elem;
            }
        }
Ejemplo n.º 20
0
		private void StartTag(byte[] buf, int offset,
			ContentToken ct, TOK tok)
		{
			m_Depth++;
			int colon;
			string name;
			string prefix;
			Hashtable ht = new Hashtable();
            
			m_NamespaceStack.Push();
            
			// if i have attributes
			if ((tok == TOK.START_TAG_WITH_ATTS) ||
				(tok == TOK.EMPTY_ELEMENT_WITH_ATTS))
			{
				int start;
				int end;
				string val;
				for (int i=0; i<ct.getAttributeSpecifiedCount(); i++)
				{                    
					start =  ct.getAttributeNameStart(i);
					end = ct.getAttributeNameEnd(i);
					name = utf.GetString(buf, start, end - start);
                    
					start = ct.getAttributeValueStart(i);
					end =  ct.getAttributeValueEnd(i);
					//val = utf.GetString(buf, start, end - start);

                    val = NormalizeAttributeValue(buf, start, end - start);
                    // <foo b='&amp;'/>
					// <foo b='&amp;amp;'
					// TODO: if val includes &amp;, it gets double-escaped
					if (name.StartsWith("xmlns:"))
					{
						colon = name.IndexOf(':');
						prefix = name.Substring(colon+1);
						m_NamespaceStack.AddNamespace(prefix, val);
					}
					else if (name == "xmlns")
					{
                        m_NamespaceStack.AddNamespace(string.Empty, val);						
					}
					else
					{
						ht.Add(name, val);
					}
				}
			}

			name = utf.GetString(buf,
				offset + m_enc.MinBytesPerChar,
				ct.NameEnd - offset - m_enc.MinBytesPerChar);
			
            colon = name.IndexOf(':');
			string ns = "";
			prefix = null;
			if (colon > 0)
			{
				prefix = name.Substring(0, colon);
				name = name.Substring(colon + 1);
				ns = m_NamespaceStack.LookupNamespace(prefix);
			}
			else
			{
				ns = m_NamespaceStack.DefaultNamespace;
			}
            			
			Element newel = ElementFactory.GetElement(prefix, name, ns);
			
			foreach (string attrname in ht.Keys)
			{
				newel.SetAttribute(attrname, (string)ht[attrname]);                
			}
            
			if (m_root == null)
			{
				m_root = newel;
				//FireOnDocumentStart(m_root);
				if (OnStreamStart!=null)
					OnStreamStart(this, m_root);
			}
			else
			{
				if (current != null)
					current.AddChild(newel);
				current = newel;
			}
		}
        /// <summary>
        /// Put bytes into the parser.
        /// </summary>
        /// <param name="buf">The bytes to put into the parse stream</param>
        /// <param name="offset">Offset into buf to start at</param>
        /// <param name="length">Number of bytes to write</param>
        public void Push(byte[] buf, int offset, int length)
        {
            // or assert, really, but this is a little nicer.
            if (length == 0)
            {
                return;
            }

            // No locking is required.  Read() won't get called again
            // until this method returns.  Keep in mind that we're
            // already on a thread in a ThreadPool, which is created
            // and managed by System.IO at the end of the day.

            // TODO: only do this copy if we have a partial token at the
            // end of parsing.
            byte[] copy = new byte[length];
            System.Buffer.BlockCopy(buf, offset, copy, 0, length);
            m_buf.Write(copy);

            byte[]       b   = m_buf.GetBuffer();
            int          off = 0;
            TOK          tok = TOK.END_TAG;
            ContentToken ct  = new ContentToken();

            try
            {
                while (off < b.Length)
                {
                    if (m_cdata)
                    {
                        tok = m_enc.tokenizeCdataSection(b, off, b.Length, ct);
                    }
                    else
                    {
                        tok = m_enc.tokenizeContent(b, off, b.Length, ct);
                    }

                    switch (tok)
                    {
                    case TOK.EMPTY_ELEMENT_NO_ATTS:
                    case TOK.EMPTY_ELEMENT_WITH_ATTS:
                        StartTag(b, off, ct, tok);
                        EndTag(b, off, ct, tok);
                        break;

                    case TOK.START_TAG_NO_ATTS:
                    case TOK.START_TAG_WITH_ATTS:
                        StartTag(b, off, ct, tok);
                        break;

                    case TOK.END_TAG:
                        EndTag(b, off, ct, tok);
                        break;

                    case TOK.DATA_CHARS:
                    case TOK.DATA_NEWLINE:
                        AddText(utf.GetString(b, off, ct.TokenEnd - off));
                        break;

                    case TOK.CHAR_REF:
                    case TOK.MAGIC_ENTITY_REF:
                        AddText(new string(new char[] { ct.RefChar1 }));
                        break;

                    case TOK.CHAR_PAIR_REF:
                        AddText(new string(new char[] { ct.RefChar1,
                                                        ct.RefChar2 }));
                        break;

                    case TOK.COMMENT:
                        if (m_elem != null)
                        {
                            // <!-- 4
                            //  --> 3
                            int start = off + 4 * m_enc.MinBytesPerChar;
                            int end   = ct.TokenEnd - off -
                                        7 * m_enc.MinBytesPerChar;
                            string text = utf.GetString(b, start, end);
                            m_elem.AppendChild(m_doc.CreateComment(text));
                        }
                        break;

                    case TOK.CDATA_SECT_OPEN:
                        m_cdata = true;
                        break;

                    case TOK.CDATA_SECT_CLOSE:
                        m_cdata = false;
                        break;

                    case TOK.XML_DECL:
                        // thou shalt use UTF8, and XML version 1.
                        // i shall ignore evidence to the contrary...

                        // TODO: Throw an exception if these assuptions are
                        // wrong
                        break;

                    case TOK.ENTITY_REF:
                    case TOK.PI:
                        throw new System.NotImplementedException("Token type not implemented: " + tok);
                    }
                    off = ct.TokenEnd;
                    ct.clearAttributes();
                }
            }
            catch (PartialTokenException)
            {
                // Console.WriteLine("PartialTokenException: " + System.Text.Encoding.UTF8.GetString(copy));
                // ignored;
            }
            catch (ExtensibleTokenException)
            {
                // ignored;
            }
            catch (XpNet.InvalidTokenException e)
            {
                throw new XMLParseException(e, this, buf, offset, length);
            }
            catch (Exception e)
            {
                throw new Exception("Unexpected exception", e);
            }
            finally
            {
                m_buf.Clear(off);
                ct.clearAttributes();
            }
        }
Ejemplo n.º 22
0
 internal bool TypeIs(TOK ty)
 {
     return(ty == _type);
 }