C# (CSharp) OA.Core.UI.Html.Parsing DynaString.SetToString Examples

Programming Language: C# (CSharp)

Namespace/Package Name: OA.Core.UI.Html.Parsing

Class/Type: DynaString

Method/Function: SetToString

Examples at hotexamples.com: 2

C# (CSharp) OA.Core.UI.Html.Parsing DynaString.SetToString - 2 examples found. These are the top rated real world C# (CSharp) examples of OA.Core.UI.Html.Parsing.DynaString.SetToString extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Append(2)

Clear(2)

SetToString(2)

Dispose(1)

SetEncoding(1)

SetToStringASCII(1)

Example #1

Show file

File: HTMLParser.cs Project: BclEx/object-assets

 HTMLchunk ParseTextWithEntities()
 {
     // okay, now that we got our first entity we will need to continue
     // parsing by copying data into temporary buffer and when finished
     // convert it to string
     while (_curPos < _dataLength)
     {
         var c = _html[_curPos++];
         // ok we got tag, but all we had before it was spaces, most likely end of lines
         // so we will return compact representation of that text data
         if (c == (byte)'<')
         {
             _curPos--;
             break;
         }
         // check if we got entity again
         if (c == (byte)'&')
         {
             var newEntityChar = _e.CheckForEntity(_html, ref _curPos, _dataLength);
             // restore current symbol
             if (newEntityChar != 0)
             {
                 if (newEntityChar == (byte)'<')
                 {
                     _chunk.LtEntity = true;
                 }
                 _text.Append(newEntityChar);
                 // we continue here since we fully parsed entity
                 continue;
             }
             // ok we did not parse entity in which case we add & char and continue along the way
             _text._buffer[_text._bufPos++] = c;
             continue;
         }
         _text._buffer[_text._bufPos++] = c;
     }
     _chunk.ChunkLength = _curPos - _chunk.ChunkOffset;
     _chunk.Type        = HTMLchunkType.Text;
     _chunk.Html        = _text.SetToString();
     return(_chunk);
 }

Example #2

Show file

File: TagParser.cs Project: BclEx/object-assets

        /// <summary>
        /// Internal: parses tag that started from current position
        /// </summary>
        /// <returns>HTMLchunk with tag information</returns>
        internal HTMLchunk ParseTag(ref int curPos)
        {
            /*
             *  WARNING: this code was optimised for performance rather than for readability,
             *  so be extremely careful at changing it -- your changes could easily result in wrongly parsed HTML
             *
             *  This routine takes about 60% of CPU time, in theory its the best place to gain extra speed,
             *  but I've spent plenty of time doing it, so it won't be easy... and if it is easy then please post
             *  your changes for everyone to enjoy!
             * */

            //var whiteSpaceHere = false;

            //var paramValue = false;
            byte c     = 0;
            byte cPeek = 0;

            // if true it means we have parsed complete tag
            //var gotTag = false;

            //var equalIdx = 0;

            // we reach this function immediately after tag's byte (<) was
            // detected, so we need to save it in order to keep correct HTML copy
            // _hunk.Append((byte)'<'); // (byte)'<'

            /*
             * _chunk.Buffer[0] = 60;
             * _chunk.BufPos = 1;
             * _chunk.HTMLen = 1;
             */

            // initialise peeked char - this will point to the next after < character
            if (curPos < _dataLength)
            {
                cPeek = _html[curPos];
                // in case of comments ! must follow immediately after <
                if (cPeek == (byte)'!')
                {
                    if (curPos + 2 < _dataLength &&
                        _html[curPos + 1] == (byte)'-' && _html[curPos + 2] == (byte)'-')
                    {
                        // we detected start of comments here, instead of parsing the rest here we will
                        // call special function tuned to do the job much more effectively
                        _chunk.Tag      = "!--";
                        _chunk.Type     = HTMLchunkType.Comment;
                        _chunk.Comments = true;
                        // _chunk.Append((byte)'!');
                        // _chunk.Append((byte)'-');
                        // _chunk.Append((byte)'-');
                        curPos            += 3;
                        _chunk             = ParseComments(ref curPos, out bool fullTag);
                        _chunk.ChunkLength = curPos - _chunk.ChunkOffset;
                        if (_p.AutoKeepComments || _p.KeepRawHTML)
                        {
                            if (!_p.AutoExtractBetweenTagsOnly)
                            {
                                _chunk.Html = GetString(_chunk.ChunkOffset, _chunk.ChunkLength);
                            }
                            else
                            {
                                _chunk.Html = GetString(_chunk.ChunkOffset + 4, _chunk.ChunkLength - fullTag ? 7 : 4);
                            }
                        }
                        return(_chunk);
                    }

                    // ok we might have here CDATA element of XML:
                    // ref: http://www.w3schools.com/xml/xml_cdata.asp
                    if (curPos + 7 < _dataLength &&
                        _html[curPos + 1] == (byte)'[' &&
                        _html[curPos + 2] == (byte)'C' &&
                        _html[curPos + 3] == (byte)'D' &&
                        _html[curPos + 4] == (byte)'A' &&
                        _html[curPos + 5] == (byte)'T' &&
                        _html[curPos + 6] == (byte)'A' &&
                        _html[curPos + 7] == (byte)'[')
                    {
                        // we detected start of comments here, instead of parsing the rest here we will
                        // call special function tuned to do the job much more effectively
                        _chunk.Tag      = "![CDATA[";
                        _chunk.Type     = HTMLchunkType.Comment;
                        _chunk.Comments = true;
                        // _chunk.Append((byte)'!');
                        // _chunk.Append((byte)'-');
                        // _chunk.Append((byte)'-');
                        curPos            += 8;
                        _chunk             = ParseCDATA(ref curPos, out bool fullTag);
                        _chunk.ChunkLength = curPos - _chunk.ChunkOffset;
                        if (_p.AutoKeepComments || _p.KeepRawHTML)
                        {
                            if (!_p.AutoExtractBetweenTagsOnly)
                            {
                                _chunk.Html = GetString(_chunk.ChunkOffset, _chunk.ChunkLength);
                            }
                            else
                            {
                                _chunk.Html = GetString(_chunk.ChunkOffset + 4 + 5, _chunk.ChunkLength - fullTag ? 7 + 5 : 4 + 5);
                            }
                        }
                        return(_chunk);
                    }
                }
            }
            else
            {
                // empty tag but its not closed, so we will call it open...
                _chunk.Type = HTMLchunkType.OpenTag;
                // end of data... before it started
                return(_chunk);
            }

            // tag ID, non-zero if matched by heuristics engine
            var tagId = 0;

            // STAGE 0: lets try some heuristics to see if we can quickly identify most common tags
            // that should be present most of the time, this should save a lot of looping and string creation
            if (EnableHeuristics && curPos < _maxHeuDataLength)
            {
                // check if we have got closure of the tag
                if (cPeek == (byte)'/')
                {
                    _chunk.Closure    = true;
                    _chunk.EndClosure = false;
                    _chunk.Type       = HTMLchunkType.CloseTag;
                    curPos++;
                    cPeek = _html[curPos];
                }
                c = _html[curPos + 1];
                // probability of having a match is very high (or so we expect)
                tagId = _he.MatchTag(cPeek, c);
                if (tagId != 0)
                {
                    if (tagId < 0)
                    {
                        tagId *= -1;
                        // single character tag
                        _chunk.Tag = _he.GetString(tagId);
                        // see if we got fully closed tag
                        if (c == (byte)'>')
                        {
                            curPos += 2;
                            goto ReturnChunk;
                        }
                        cPeek = c;
                        curPos++;
                        // everything else means we need to continue scanning as we may have params and stuff
                        goto AttributeParsing;
                    }
                    else
                    {
                        // ok, we have here 2 or more character string that we need to check further
                        // often when we have full 2 char match the next char will be >, if that's the case
                        // then we definately matched our tag
                        var nextChar = _html[curPos + 2];
                        if (nextChar == (byte)'>')
                        {
                            //oChunk.sTag=oHE.GetString(iTagID);
                            _chunk.Tag = _he.GetTwoCharString(cPeek, c);
                            curPos    += 3;
                            goto ReturnChunk;
                        }

                        // ok, check next char for space, if that's the case we still got our tag
                        // but need to skip to attribute parsing
                        if (nextChar == (byte)' ')
                        {
                            //_chunk.Tag = _he.GetString(tagId);
                            _chunk.Tag = _he.GetTwoCharString(cPeek, c);
                            curPos    += 2;
                            cPeek      = nextChar;
                            goto AttributeParsing;
                        }

                        // ok, we are not very lucky, but it is still worth fighting for
                        // now we need to check fully long string against what we have matched, maybe
                        // we got exact match and we can avoid full parsing of the tag
                        var tag = _he.GetStringData(tagId);

                        if (curPos + tag.Length + 5 >= _dataLength)
                        {
                            goto TagParsing;
                        }

                        // in a loop (and this is not an ideal solution, but still)
                        for (int i = 2; i < tag.Length; i++)
                        {
                            // if a single char is not matched, then we
                            if (tag[i] != _html[curPos + i])
                            {
                                goto TagParsing;
                            }
                        }

                        // ok we matched full long word, but we need to be sure that char
                        // after the word is ' ' or '>' as otherwise we may have matched prefix of even longer word
                        nextChar = _html[curPos + tag.Length];
                        if (nextChar == (byte)'>')
                        {
                            _chunk.Tag = _he.GetString(tagId);
                            curPos    += tag.Length + 1;
                            goto ReturnChunk;
                        }
                        if (nextChar == (byte)' ')
                        {
                            cPeek      = nextChar;
                            _chunk.Tag = _he.GetString(tagId);
                            curPos    += tag.Length;
                            goto AttributeParsing;
                        }
                        // no luck: we need to parse tag fully as our heuristical matching failed miserably :'o(
                    }
                }
            }
TagParsing:

            _text.Clear();
            var charType = 0;

            // STAGE 1: parse tag (anything until > or /> or whitespace leading to start of attribute)
            while (cPeek != 0)
            {
                charType = _tagCharTypes[cPeek];

                //if (cPeek <= 32 && whiteSpace[cPeek] == 1)
                if (charType == (byte)TagCharType.WhiteSpace)
                {
                    curPos++;
                    // speculative loop unroll -- we have a very good chance of seeing non-space char next
                    // so instead of setting up loop we will just read it directly, this should save ticks
                    // on having to prepare while() loop
                    if (curPos < _dataLength)
                    {
                        c = _html[curPos++];
                    }
                    else
                    {
                        c = 0;
                    }
                    charType = _tagCharTypes[c];

                    //if (c == ' ' || c == '\t' || c == 13 || c == 10)
                    //if (c <= 32 && whiteSpace[c] == 1)
                    if (charType == (byte)TagCharType.WhiteSpace)
                    {
                        while (curPos < _dataLength)
                        {
                            c        = _html[curPos++];
                            charType = _tagCharTypes[c];
                            if (charType == (byte)TagCharType.WhiteSpace)
                            //if(c != ' ' && c != '\t' && c != 13 && c != 10)
                            {
                                //cPeek = _html[curPos];
                                continue;
                            }
                            break;
                        }
                        if (curPos >= _dataLength)
                        {
                            c = 0;
                        }
                    }

                    //whiteSpaceHere = true;

                    // now, if we have already got tag it means that we are most likely
                    // going to need to parse tag attributes
                    if (_text._bufPos > 0)
                    {
                        _chunk.Tag = _text.SetToStringASCII();
                        // _chunk.Append((byte)' ');
                        curPos--;
                        if (curPos < _dataLength)
                        {
                            cPeek = _html[curPos];
                        }
                        else
                        {
                            cPeek = 0;
                        }
                        break;
                    }
                }
                else
                {
                    // reuse Peeked char from previous run
                    //c = cPeek; curPos++;
                    if (curPos < _dataLength)
                    {
                        c = _html[curPos++];
                    }
                    else
                    {
                        c = 0;
                    }
                }
                if (curPos < _dataLength)
                {
                    cPeek = _html[curPos];
                }
                else
                {
                    cPeek = 0;
                }
                // most likely we should have lower-cased ASCII char
                if (charType == (byte)TagCharType.LowerCasedASCIIorDigit)
                {
                    _text._buffer[_text._bufPos++] = c;
                    // _chunk.Append(c);
                    continue;
                }

                // tag end - we did not have any params
                if (c == (byte)'>')
                {
                    if (_text._bufPos > 0)
                    {
                        _chunk.Tag = _text.SetToStringASCII();
                    }
                    if (!_chunk.Closure)
                    {
                        _chunk.Type = HTMLchunkType.OpenTag;
                    }
                    return(_chunk);
                }

                // closure of tag sign
                if (c == (byte)'/')
                {
                    _chunk.Closure    = true;
                    _chunk.EndClosure = (_text._bufPos > 0);
                    _chunk.Type       = HTMLchunkType.CloseTag;
                    continue;
                }

                // 03/08/08 XML support: ?xml tags - grrr
                if (c == (byte)'?')
                {
                    _text._buffer[_text._bufPos++] = c;
                    continue;
                }

                // nope, we have got upper cased ASCII char - this seems to be LESS likely than > and /
                //if (c >= 65 && c <= 90)
                if (charType > 32)
                {
                    // bCharType in this case contains already lower-cased char
                    _text._buffer[_text._bufPos++] = charType;
                    // _chunk.Append(bCharType);
                    continue;
                }

                // we might have namespace : sign here - all text before would have to be
                // saved as namespace and we will need to continue parsing actual tag
                if (charType == (byte)TagCharType.NameSpaceColon)
                {
                    // ok here we got a choice - we can just continue and treat the whole
                    // thing as a single tag with namespace stuff prefixed, OR
                    // we can separate first part into namespace and keep tag as normal
                    _text._buffer[_text._bufPos++] = (byte)':';
                    continue;
                }
                // ok, we have got some other char - we break out to deal with it in attributes part
                break;
            }

            if (cPeek == 0)
            {
                return(_chunk);
            }

            // if true then equal sign was found
            //var equalsSign = false;

            // STAGE 2: parse attributes (if any available)
            // attribute name can be standalone or with value after =
            // attribute itself can't have entities or anything like this - we expect it to be in ASCII characters
AttributeParsing:

            string attrName;

            if (tagId != 0)
            {
                // first, skip whitespace:
                if (cPeek <= 32 && _tagCharTypes[cPeek] == (byte)TagCharType.WhiteSpace)
                {
                    // most likely next char is not-whitespace
                    curPos++;
                    if (curPos >= _dataLength)
                    {
                        goto ReturnChunk;
                    }
                    cPeek = _html[curPos];
                    if (cPeek <= 32 && _tagCharTypes[cPeek] == (byte)TagCharType.WhiteSpace)
                    {
                        // ok long loop here then
                        while (curPos < _dataLength)
                        {
                            cPeek = _html[curPos++];
                            if (cPeek <= 32 && _tagCharTypes[cPeek] == (byte)TagCharType.WhiteSpace)
                            {
                                continue;
                            }
                            break;
                        }
                        if (cPeek == (byte)'>')
                        {
                            goto ReturnChunk;
                        }
                        curPos--;
                        if (curPos >= _dataLength)
                        {
                            goto ReturnChunk;
                        }
                    }
                    if (curPos >= _dataLength)
                    {
                        goto ReturnChunk;
                    }
                }

                // ok we have got matched tag, it is possible that we might be able to quickly match
                // attribute name known to be used for that tag:
                var attrId = _he.MatchAttr(cPeek, tagId);
                if (attrId > 0)
                {
                    var attr = _he.GetAttrData(attrId);
                    if (curPos + attr.Length + 2 >= _dataLength)
                    {
                        goto ActualAttributeParsing;
                    }
                    // in a loop (and this is not an ideal solution, but still)
                    for (var i = 1; i < attr.Length; i++)
                    {
                        // if a single char is not matched, then we
                        if (attr[i] != _html[curPos + i])
                        {
                            goto ActualAttributeParsing;
                        }
                    }
                    var nextChar = _html[curPos + attr.Length];
                    // ok, we expect next symbol to be =
                    if (nextChar == (byte)'=')
                    {
                        attrName = _he.GetAttr(attrId);
                        curPos  += attr.Length + 1;
                        cPeek    = _html[curPos];
                        goto AttributeValueParsing;
                    }
                }
            }

ActualAttributeParsing:

            _text.Clear();
            // doing exactly the same thing as in tag parsing
            while (cPeek != 0)
            {
                charType = _tagCharTypes[cPeek];
                //if (cPeek <= 32 && whiteSpace[cPeek] == 1)
                if (charType == (byte)TagCharType.WhiteSpace)
                {
                    curPos++;
                    // speculative loop unroll -- we have a very good chance of seeing non-space char next
                    // so instead of setting up loop we will just read it directly, this should save ticks
                    // on having to prepare while() loop
                    if (curPos < _dataLength)
                    {
                        c = _html[curPos++];
                    }
                    else
                    {
                        cPeek = 0;
                        break;
                    }
                    charType = _tagCharTypes[c];
                    //if (c == ' ' || c == '\t' || c == 13 || c == 10)
                    //if (c <= 32 && whiteSpace[c] == 1)
                    if (charType == (byte)TagCharType.WhiteSpace)
                    {
                        while (curPos < _dataLength)
                        {
                            c        = _html[curPos++];
                            charType = _tagCharTypes[c];
                            if (charType == (byte)TagCharType.WhiteSpace)
                            //if(c != ' ' && c != '\t' && c != 13 && c != 10)
                            {
                                //cPeek = _html[curPos];
                                continue;
                            }
                            //if (c == (byte)'>')
                            // goto ReturnChunk;
                            //curPos--;
                            break;
                        }
                        if (curPos >= _dataLength)
                        {
                            c     = 0;
                            cPeek = 0;
                            break;
                        }
                    }

                    //whiteSpaceHere = true;

                    // now, if we have already got attribute name it means that we need to go to parse value (which may not be present)
                    if (_text._bufPos > 0)
                    {
                        // _chunk.Append((byte)' ');
                        curPos--;
                        if (curPos < _dataLength)
                        {
                            cPeek = _html[curPos];
                        }
                        else
                        {
                            cPeek = 0;
                        }
                        // ok, we have got attribute name and now we have got next char there
                        // most likely we have got = here  and then value
                        if (cPeek == (byte)'=')
                        {
                            //equalsSign = true;
                            // move forward one char
                            curPos++;
                            if (curPos < _dataLength)
                            {
                                cPeek = _html[curPos];
                            }
                            else
                            {
                                cPeek = 0;
                            }
                            break;
                        }
                        // or we can have end of tag itself, doh!
                        if (cPeek == (byte)'>')
                        {
                            // move forward one char
                            curPos++;
                            if (_text._bufPos > 0)
                            {
                                _chunk.AddParam(_text.SetToStringASCII(), "", (byte)' ');
                            }
                            if (!_chunk.Closure)
                            {
                                _chunk.Type = HTMLchunkType.OpenTag;
                            }
                            return(_chunk);
                        }
                        // closure
                        if (cPeek == (byte)'/')
                        {
                            _chunk.Closure    = true;
                            _chunk.EndClosure = true;
                            _chunk.Type       = HTMLchunkType.CloseTag;
                            continue;
                        }
                        // ok, we have got new char starting after current attribute name is fully parsed
                        // this means the attribute name is on its own and the char we found is start
                        // of a new attribute
                        _chunk.AddParam(_text.SetToStringASCII(), "", (byte)' ');
                        _text.Clear();
                        goto AttributeParsing;
                    }
                }
                else
                {
                    // reuse Peeked char from previous run
                    //c = cPeek; curPos++;
                    if (curPos < _dataLength)
                    {
                        c = _html[curPos++];
                    }
                    else
                    {
                        c = 0;
                    }
                }
                if (curPos < _dataLength)
                {
                    cPeek = _html[curPos];
                }
                else
                {
                    cPeek = 0;
                }
                // most likely we should have lower-cased ASCII char here
                if (charType == (byte)TagCharType.LowerCasedASCIIorDigit)
                {
                    _text._buffer[_text._bufPos++] = c;
                    // _chunk.Append(cChar);
                    continue;
                }

                // = with attribute value to follow
                if (c == (byte)'=')
                {
                    //equalsSign=true;
                    break;
                }

                // nope, we have got upper cased ASCII char - this seems to be LESS likely than > and /
                //if(c >= 65 && c <= 90)
                if (charType > 32)
                {
                    // bCharType in this case contains already lower-cased char
                    _text._buffer[_text._bufPos++] = charType;
                    // _chunk.Append(bCharType);
                    continue;
                }

                // tag end - we did not have any params
                if (c == (byte)'>')
                {
                    if (_text._bufPos > 0)
                    {
                        _chunk.AddParam(_text.SetToStringASCII(), "", (byte)' ');
                    }
                    if (!_chunk.Closure)
                    {
                        _chunk.Type = HTMLchunkType.OpenTag;
                    }
                    return(_chunk);
                }

                // closure of tag sign
                if (c == (byte)'/')
                {
                    _chunk.Closure    = true;
                    _chunk.EndClosure = true;
                    _chunk.Type       = HTMLchunkType.CloseTag;
                    continue;
                }

                // some other char
                _text._buffer[_text._bufPos++] = c;
                // _chunk.Append(cChar);
            }

            if (cPeek == 0)
            {
                if (_text._bufPos > 0)
                {
                    _chunk.AddParam(_text.SetToStringASCII(), "", (byte)' ');
                }
                if (!_chunk.Closure)
                {
                    _chunk.Type = HTMLchunkType.OpenTag;
                }
                return(_chunk);
            }

            attrName = _text.SetToStringASCII();

AttributeValueParsing:

            /// ***********************************************************************
            /// STAGE 3: parse attribute value
            /// ***********************************************************************

            // the value could be just string, or in quotes (single or double)
            // or we can have next attribute name start, in which case we will jump back to attribute parsing

            // for tracking quotes purposes
            var quotes = cPeek;

            int valueStartOffset;

            // skip whitespace if any
            if (cPeek <= 32 && _tagCharTypes[cPeek] == (byte)TagCharType.WhiteSpace)
            {
                curPos++;
                // speculative loop unroll -- we have a very good chance of seeing non-space char next
                // so instead of setting up loop we will just read it directly, this should save ticks
                // on having to prepare while() loop
                if (curPos < _dataLength)
                {
                    cPeek = _html[curPos];
                }
                else
                {
                    valueStartOffset = curPos - 1;
                    goto AttributeValueEnd;
                }

                //if (c == ' ' || c == '\t' || c == 13 || c == 10)
                //if (c <= 32 && whiteSpace[c] == 1)
                if (cPeek <= 32 && _tagCharTypes[cPeek] == (byte)TagCharType.WhiteSpace)
                {
                    while (curPos < _dataLength)
                    {
                        cPeek = _html[curPos++];
                        if (cPeek <= 32 && _tagCharTypes[cPeek] == (byte)TagCharType.WhiteSpace)
                        //if(c != ' ' && c != '\t' && c != 13 && c != 10)
                        {
                            //cPeek = _html[curPos];
                            continue;
                        }
                        curPos--;
                        break;
                    }
                    if (curPos >= _dataLength)
                    {
                        valueStartOffset = curPos - 1;
                        goto AttributeValueEnd;
                    }
                }
                quotes = cPeek;
            }

            // because we deal with VALUE of the attribute it means we can't lower-case it,
            // or skip whitespace (if in quotes), which in practice means that we don't need to copy
            // it to temporary string buffer, we can just remember starting offset and then create string from
            // data in bHTML

            // ok, first char can be one of the quote chars or something else
            if (cPeek != '\"' && cPeek != '\'')
            {
                valueStartOffset = curPos;
                quotes           = (byte)' ';
                // any other char here means we have value up until next whitespace or end of tag
                // this gives us good opportunity to scan fairly quickly without otherwise redundant
                // checks - this should happen fairly rarely, however loop dealing with data between quotes
                // will happen often enough and its best to eliminate as much stuff from it as possible
                //sText.bBuffer[sText.iBufPos++]=cPeek;

                // move to next char
                if (curPos < _dataLength)
                {
                    cPeek = _html[curPos++];
                }
                else
                {
                    goto AttributeValueEnd;
                }

                while (cPeek != 0)
                {
                    // if whitespace then we got our value and need to go back to param
                    if (cPeek <= 32 && _tagCharTypes[cPeek] == (byte)TagCharType.WhiteSpace)
                    {
                        _chunk.AddParam(attrName, GetString(valueStartOffset, curPos - valueStartOffset - 1), (byte)' ');
                        curPos--;
                        goto AttributeParsing;
                    }
                    // end of tag?
                    if (cPeek == (byte)'>')
                    {
                        //curPos--;
                        break;
                    }
                    if (curPos < _dataLength)
                    {
                        cPeek = _html[curPos++];
                    }
                    else
                    {
                        curPos = _dataLength + 1;
                        goto AttributeValueEnd;
                    }
                }

                // ok we are done, add outstanding attribute
                _chunk.AddParam(attrName, GetString(valueStartOffset, curPos - valueStartOffset - 1), (byte)' ');

                goto ReturnChunk;
            }

            // move one step forward
            curPos++;
            valueStartOffset = curPos;
            if (curPos < _dataLength)
            {
                cPeek = _html[curPos++];
            }
            else
            {
                goto AttributeValueEnd;
            }

            // attribute value parsing from between two quotes
            while (cPeek != 0)
            {
                // check whether we have got possible entity (can be anything starting with &)
                if (cPeek == 38)
                {
                    var prevPos    = curPos;
                    var entityChar = _e.CheckForEntity(_html, ref curPos, _dataLength);
                    // restore current symbol
                    if (entityChar == 0)
                    {
                        if (curPos < _dataLength)
                        {
                            cPeek = _html[curPos++];
                        }
                        else
                        {
                            break;
                        }
                        //_text.Buffer[_text.BufPos++] = 38; //(byte)'&';;
                        continue;
                    }
                    else
                    {
                        // okay we have got an entity, our hope of not having to copy stuff into variable
                        // is over, we have to continue in a slower fashion :(
                        // but thankfully this should happen very rarely, so, annoying to code, but
                        // most codepaths will run very fast!
                        var preEntLen = prevPos - valueStartOffset - 1;

                        // 14/05/08 need to clear text - it contains attribute name text
                        _text.Clear();
                        // copy previous data
                        if (preEntLen > 0)
                        {
                            Array.Copy(_html, valueStartOffset, _text._buffer, 0, preEntLen);
                            _text._bufPos = preEntLen;
                        }
                        // we have to skip now to next byte, since
                        // some converted chars might well be control chars like >
                        _chunk.Entities = true;
                        if (c == (byte)'<')
                        {
                            _chunk.LtEntity = true;
                        }
                        // unless is space we will ignore it
                        // note that this won't work if &nbsp; is defined as it should
                        // byte int value of 160, rather than 32.
                        //if (c != ' ')
                        _text.Append(entityChar);
                        if (curPos < _dataLength)
                        {
                            cPeek = _html[curPos++];
                        }
                        else
                        {
                            goto AttributeValueEnd;
                        }

                        // okay, we continue here using in effect new inside loop as we might have more entities here
                        // attribute value parsing from between two quotes
                        while (cPeek != 0)
                        {
                            // check whether we have got possible entity (can be anything starting with &)
                            if (cPeek == 38)
                            {
                                var newEntityChar = _e.CheckForEntity(_html, ref curPos, _dataLength);
                                // restore current symbol
                                if (newEntityChar != 0)
                                {
                                    if (newEntityChar == (byte)'<')
                                    {
                                        _chunk.LtEntity = true;
                                    }
                                    _text.Append(newEntityChar);
                                    if (curPos < _dataLength)
                                    {
                                        cPeek = _html[curPos++];
                                    }
                                    else
                                    {
                                        goto AttributeValueEnd;
                                    }
                                    continue;
                                }
                            }

                            // check if is end of quotes
                            if (cPeek == quotes)
                            {
                                // ok we finished scanning it: add param with value and then go back to param name parsing
                                _chunk.AddParam(attrName, _text.SetToString(), quotes);
                                if (curPos < _dataLength)
                                {
                                    cPeek = _html[curPos];
                                }
                                else
                                {
                                    break;
                                }
                                goto AttributeParsing;
                            }
                            _text._buffer[_text._bufPos++] = cPeek;
                            //_text.Append(cPeek);
                            if (curPos < _dataLength)
                            {
                                cPeek = _html[curPos++];
                            }
                            else
                            {
                                break;
                            }
                        }
                        _chunk.AddParam(attrName, _text.SetToString(), quotes);
                        goto ReturnChunk;
                    }
                }

                // check if is end of quotes
                if (cPeek == quotes)
                {
                    // ok we finished scanning it: add param with value and then go back to param name parsing
                    //_text.Clear();
                    _chunk.AddParam(attrName, GetString(valueStartOffset, curPos - valueStartOffset - 1), quotes);
                    if (curPos < _dataLength)
                    {
                        cPeek = _html[curPos];
                    }
                    else /*curPos++;*/ break {
                        ;
                    }
                    goto AttributeParsing;
                }

                if (curPos < _dataLength)
                {
                    cPeek = _html[curPos++];
                }
                else /*curPos++;*/ break {
                    ;
                }