Esempio n. 1
0
        /// <summary> FilterInputBytes -> filterBytes
        /// 
        /// Helper function for getsObj. Appends Unicode characters
        /// onto the TclObject associated with the GetsState after
        /// converting them from raw bytes encoded in the Channel.
        /// 
        /// Consumes available bytes from channel buffers.  When channel
        /// buffers are exhausted, reads more bytes from channel device into
        /// a new channel buffer.  It is the caller's responsibility to
        /// free the channel buffers that have been exhausted.
        /// 
        /// The return value is -1 if there was an error reading from the
        /// channel, 0 otherwise.
        /// 
        /// FIXME: Doc modification of object's StringBuffer
        /// 
        /// Status object keeps track of how much data from channel buffers
        /// has been consumed and where characters should be stored.
        /// </summary>

        internal int filterBytes(GetsState gs)
        {
            ChannelBuffer buf;
            byte[] raw;
            int rawStart, rawEnd;
            char[] dst;
            int offset, toRead, spaceLeft, result, rawLen, length;
            TclObject obj;
            int ENCODING_LINESIZE = 20; // Lower bound on how many bytes
            // to convert at a time. Since we
            // don't know a priori how many
            // bytes of storage this many
            // source bytes will use, we
            // actually need at least
            // ENCODING_LINESIZE bytes of room.

            bool goto_read = false; // Set to true when jumping to the read
            // label, used to simulate a goto.

            obj = gs.obj;

            // Subtract the number of bytes that were removed from channel buffer
            // during last call.

            buf = gs.buf;
            if (buf != null)
            {
                buf.nextRemoved += gs.rawRead.i;
                if (buf.nextRemoved >= buf.nextAdded)
                {
                    buf = buf.next;
                }
            }
            gs.totalChars += gs.charsWrote.i;

            while (true)
            {
                if (goto_read || (buf == null) || (buf.nextAdded == Tcl.Lang.ChannelBuffer.BUFFER_PADDING))
                {
                    // All channel buffers were exhausted and the caller still hasn't
                    // seen EOL.  Need to read more bytes from the channel device.
                    // Side effect is to allocate another channel buffer.

                    //read:
                    if (blocked)
                    {
                        if (!blocking)
                        {
                            gs.charsWrote.i = 0;
                            gs.rawRead.i = 0;
                            return -1;
                        }
                        blocked = false;
                    }
                    if (Input != 0)
                    {
                        gs.charsWrote.i = 0;
                        gs.rawRead.i = 0;
                        return -1;
                    }
                    buf = inQueueTail;
                    gs.buf = buf;
                }

                // Convert some of the bytes from the channel buffer to characters.
                // Space in obj's string rep is used to hold the characters.

                rawStart = buf.nextRemoved;
                raw = buf.buf;
                rawEnd = buf.nextAdded;
                rawLen = rawEnd - rawStart;

                //dst = *gsPtr->dstPtr;
                //offset = dst - objPtr->bytes;
                toRead = ENCODING_LINESIZE;
                if (toRead > rawLen)
                {
                    toRead = rawLen;
                }
                //dstNeeded = toRead * TCL_UTF_MAX + 1;
                //spaceLeft = objPtr->length - offset - TCL_UTF_MAX - 1;
                //if (dstNeeded > spaceLeft) {
                //    length = offset * 2;
                //    if (offset < dstNeeded) {
                //        length = offset + dstNeeded;
                //    }
                //    length += TCL_UTF_MAX + 1;
                //    Tcl_SetObjLength(objPtr, length);
                //    spaceLeft = length - offset;
                //    dst = objPtr->bytes + offset;
                //    *gsPtr->dstPtr = dst;
                //}
                dst = new char[toRead];
                gs.state = encodingState;
                result = externalToUnicode(raw, rawStart, rawLen, dst, 0, toRead, gs.rawRead, null, gs.charsWrote);
                TclString.append(gs.obj, dst, 0, gs.charsWrote.i);

                // Make sure that if we go through 'gets', that we reset the
                // TCL_ENCODING_START flag still.

                encodingStart = false;

                if (result == TCL_CONVERT_MULTIBYTE)
                {
                    // The last few bytes in this channel buffer were the start of a
                    // multibyte sequence.  If this buffer was full, then move them to
                    // the next buffer so the bytes will be contiguous.  

                    ChannelBuffer next;
                    int extra;

                    next = buf.next;
                    if (buf.nextAdded < buf.bufLength)
                    {
                        if (gs.rawRead.i > 0)
                        {
                            // Some raw bytes were converted to UTF-8.  Fall through,
                            // returning those UTF-8 characters because a EOL might be
                            // present in them.
                        }
                        else if (eofCond)
                        {
                            // There was a partial character followed by EOF on the
                            // device.  Fall through, returning that nothing was found.

                            buf.nextRemoved = buf.nextAdded;
                        }
                        else
                        {
                            // There are no more cached raw bytes left.  See if we can
                            // get some more.

                            goto_read = true;
                            goto read; //goto read;
                        }
                    }
                    else
                    {
                        if (next == null)
                        {
                            next = new ChannelBuffer(bufSize);
                            buf.next = next;
                            inQueueTail = next;
                        }
                        extra = rawLen - gs.rawRead.i;
                        Array.Copy(raw, gs.rawRead.i, next.buf, Tcl.Lang.ChannelBuffer.BUFFER_PADDING - extra, extra);
                        next.nextRemoved -= extra;
                        buf.nextAdded -= extra;
                    }
                }

                goto read_brk; // End loop in the normal case

      read:
                ;
            }

        read_brk:
            ;


            gs.buf = buf;
            return 0;
        }
Esempio n. 2
0
        /// <summary> PeekAhead -> peekAhead
        /// 
        /// Helper function used by getsObj.  Called when we've seen a
        /// \r at the end of the string and want to look ahead one
        /// character to see if it is a \n.
        /// 
        /// Characters read from the channel are appended to gs.obj
        /// via the filterBytes method.
        /// </summary>

        internal void peekAhead(GetsState gs)
        {
            ChannelBuffer buf;
            //Tcl_DriverBlockModeProc *blockModeProc;
            int bytesLeft;
            bool goto_cleanup = false; // Set to true when jumping to the
            // cleanup label, used to simulate a goto.

            buf = gs.buf;

            // If there's any more raw input that's still buffered, we'll peek into
            // that.  Otherwise, only get more data from the channel driver if it
            // looks like there might actually be more data.  The assumption is that
            // if the channel buffer is filled right up to the end, then there
            // might be more data to read.

            {
                //blockModeProc = NULL;
                if (buf.next == null)
                {
                    bytesLeft = buf.nextAdded - (buf.nextRemoved + gs.rawRead.i);
                    if (bytesLeft == 0)
                    {
                        if (buf.nextAdded < buf.bufLength)
                        {
                            // Don't peek ahead if last read was short read.
                            goto_cleanup = true;
                            goto cleanup_brk;
                        }
                        // FIXME: This non-blocking check is currently disabled, non-blocking
                        // is not currently supported and it is not clean why we would
                        // need to depend on non-blocking IO when peeking anyway.
                        if (blocking)
                        {
                            //blockModeProc = Tcl_ChannelBlockModeProc(chanPtr->typePtr);
                            //if (false)
                            //{
                            //  // Don't peek ahead if cannot set non-blocking mode.
                            //  goto_cleanup = true;
                            //  goto cleanup_brk;
                            //}
                            //StackSetBlockMode(chanPtr, TCL_MODE_NONBLOCKING);
                        }
                    }
                }
                //if (filterBytes(gs) == 0) {
                //    dstEndPtr.i = gs.charsWrote.i; *gsPtr->dstPtr + gs.bytesWrote.i
                //}
                filterBytes(gs);
                //if (blockModeProc != NULL) {
                //    StackSetBlockMode(chanPtr, TCL_MODE_BLOCKING);
                //}
            }

        cleanup_brk:
            ;


            if (goto_cleanup)
            {
                buf.nextRemoved += gs.rawRead.i;
                gs.rawRead.i = 0;
                gs.totalChars += gs.charsWrote.i;
                //gs.bytesWrote.i = 0;
                gs.charsWrote.i = 0;
            }
        }
Esempio n. 3
0
        /// <summary> Tcl_GetsObj -> getsObj
        /// 
        /// Accumulate input from the input channel until end-of-line or
        /// end-of-file has been seen.  Bytes read from the input channel
        /// are converted to Unicode using the encoding specified by the
        /// channel.
        /// 
        /// Returns the number of characters accumulated in the object
        /// or -1 if error, blocked, or EOF. If -1, use Tcl_GetErrno()
        /// to retrieve the POSIX error code for the error or condition
        /// that occurred.
        /// 
        /// FIXME: Above setting of error code is not fully implemented.
        /// 
        /// Will consume input from the channel.
        /// On reading EOF, leave channel at EOF char.
        /// On reading EOL, leave channel after EOL, but don't
        /// return EOL in dst buffer.
        /// </summary>

        internal int getsObj(TclObject obj)
        {
            GetsState gs;
            ChannelBuffer buf;
            bool oldEncodingStart, oldEncodingEnd;
            int oldRemoved, skip, inEofChar;
            int copiedTotal, oldLength;
            bool in_binary_encoding = false;
            int dst, dstEnd, eol, eof;
            Object oldState;

            buf = inQueueHead;
            //encoding = this.encoding;

            // Preserved so we can restore the channel's state in case we don't
            // find a newline in the available input.

            oldLength = 0;
            oldEncodingStart = encodingStart;
            oldEncodingEnd = encodingEnd;
            oldState = encodingState;
            oldRemoved = Tcl.Lang.ChannelBuffer.BUFFER_PADDING;
            if (buf != null)
            {
                oldRemoved = buf.nextRemoved;
            }

            // If there is no encoding, use "iso8859-1" -- readLine() doesn't
            // produce ByteArray objects.

            if ((System.Object)encoding == null)
            {
                in_binary_encoding = true;
                encoding = EncodingCmd.getJavaName("utf-8");
            }

            System.Diagnostics.Debug.WriteLine("getsObj encoding is " + encoding);

            // Object used by filterBytes to keep track of how much data has
            // been consumed from the channel buffers.

            gs = new GetsState(this);
            gs.obj = obj;
            //gs.dst = &dst;
            gs.encoding = encoding;
            gs.buf = buf;
            gs.state = oldState;
            gs.rawRead.i = 0;
            //gs.bytesWrote.i = 0;
            gs.charsWrote.i = 0;
            gs.totalChars = 0;

            // Ensure that tobj is an empty TclString object.
            // Cheat a bit and grab the StringBuffer out of
            // the TclString so we can query the data that
            // was just added to the buffer.
            TclString.empty(obj);
            StringBuilder obj_sbuf = ((TclString)obj.InternalRep).sbuf;

            dst = 0;
            dstEnd = dst;

            skip = 0;
            eof = -1;
            inEofChar = eofChar;

            // Used to implement goto like functionality for restore
            // and goteol loop terminaltion blocks.

            bool restore = false;
            bool goteol = false;

            // This is just here so that eol and copiedTotal are
            // definitely assigned before the try block.
            eol = -1;
            copiedTotal = -1;

            {
                while (true)
                {
                    if (dst >= dstEnd)
                    {
                        if (filterBytes(gs) != 0)
                        {
                            restore = true;
                            goto restore_or_goteol_brk; //goto restore
                        }
                        dstEnd += gs.charsWrote.i; // dstEnd = dst + gs.bytesWrote;
                    }

                    // Remember if EOF char is seen, then look for EOL anyhow, because
                    // the EOL might be before the EOF char.

                    if (inEofChar != '\x0000')
                    {
                        for (eol = dst; eol < dstEnd; eol++)
                        {
                            if (obj_sbuf[eol] == inEofChar)
                            {
                                dstEnd = eol;
                                eof = eol;
                                break;
                            }
                        }
                    }

                    // On EOL, leave current file position pointing after the EOL, but
                    // don't store the EOL in the output string.

                    switch (translation)
                    {

                        case TclIO.TRANS_LF:
                            {
                                for (eol = dst; eol < dstEnd; eol++)
                                {
                                    if (obj_sbuf[eol] == '\n')
                                    {
                                        skip = 1;
                                        goteol = true;
                                        goto restore_or_goteol_brk; //goto goteol
                                    }
                                }
                                break;
                            }

                        case TclIO.TRANS_CR:
                            {
                                for (eol = dst; eol < dstEnd; eol++)
                                {
                                    if (obj_sbuf[eol] == '\r')
                                    {
                                        skip = 1;
                                        goteol = true;
                                        goto restore_or_goteol_brk; //goto goteol
                                    }
                                }
                                break;
                            }

                        case TclIO.TRANS_CRLF:
                            {
                                for (eol = dst; eol < dstEnd; eol++)
                                {
                                    if (obj_sbuf[eol] == '\r')
                                    {
                                        eol++;

                                        // If a CR is at the end of the buffer,
                                        // then check for a LF at the begining
                                        // of the next buffer.

                                        if (eol >= dstEnd)
                                        {
                                            //int offset;

                                            //offset = eol - objPtr->bytes;
                                            dst = dstEnd;
                                            if (filterBytes(gs) != 0)
                                            {
                                                restore = true;
                                                goto restore_or_goteol_brk; //goto restore
                                            }
                                            dstEnd += gs.charsWrote.i; // dstEnd = dst + gs.bytesWrote
                                            //eol = objPtr->bytes + offset;
                                            if (eol >= dstEnd)
                                            {
                                                skip = 0;
                                                goteol = true;
                                                goto restore_or_goteol_brk; //goto goteol
                                            }
                                        }
                                        if (obj_sbuf[eol] == '\n')
                                        {
                                            eol--;
                                            skip = 2;
                                            goteol = true;
                                            goto restore_or_goteol_brk; //goto goteol
                                        }
                                    }
                                }
                                break;
                            }

                        case TclIO.TRANS_AUTO:
                            {
                                eol = dst;
                                skip = 1;
                                if (sawCR_Renamed_Field)
                                {
                                    sawCR_Renamed_Field = false;
                                    if ((eol < dstEnd) && (obj_sbuf[eol] == '\n'))
                                    {
                                        // Skip the raw bytes that make up the '\n'.

                                        char[] tmp = new char[1];
                                        IntPtr rawRead = new IntPtr(this);

                                        buf = gs.buf;
                                        // FIXME: We don't actually pass gs.state here, should we?
                                        //if (btc != null) btc.reset();
                                        externalToUnicode(buf.buf, buf.nextRemoved, gs.rawRead.i, tmp, 0, 1, rawRead, null, null);
                                        buf.nextRemoved += rawRead.i;
                                        gs.rawRead.i -= rawRead.i;
                                        //gs.bytesWrote.i--;
                                        gs.charsWrote.i--;
                                        obj_sbuf.Remove(dst, 1);
                                        dstEnd--;
                                    }
                                }
                                for (eol = dst; eol < dstEnd; eol++)
                                {
                                    if (obj_sbuf[eol] == '\r')
                                    {
                                        eol++;
                                        if (eol == dstEnd)
                                        {
                                            // If buffer ended on \r, peek ahead to see if a
                                            // \n is available.

                                            //int offset;
                                            IntPtr dstEndPtr = new IntPtr();

                                            //offset = eol /* - objPtr->bytes*/;
                                            dst = dstEnd;

                                            // FIXME: Why does this peek in AUTO mode
                                            // but filter in CRLF mode?
                                            peekAhead(gs);
                                            //dstEnd = dstEndPtr.i;
                                            dstEnd += gs.charsWrote.i;
                                            //eol = /*objPtr->bytes + */ offset;
                                            if (eol >= dstEnd)
                                            {
                                                eol--;
                                                sawCR_Renamed_Field = true;
                                                goteol = true;
                                                goto restore_or_goteol_brk; //goto goteol
                                            }
                                        }
                                        if (obj_sbuf[eol] == '\n')
                                        {
                                            skip++;
                                        }
                                        eol--;
                                        goteol = true; //goto goteol
                                        goto restore_or_goteol_brk;
                                    }
                                    else if (obj_sbuf[eol] == '\n')
                                    {
                                        goteol = true;
                                        goto restore_or_goteol_brk; //goto goteol
                                    }
                                }
                            }
                            break;
                    }
                    if (eof != -1)
                    {
                        // EOF character was seen.  On EOF, leave current file position
                        // pointing at the EOF character, but don't store the EOF
                        // character in the output string.

                        dstEnd = eof;
                        eofCond = true;
                        stickyEofCond = true;
                        encodingEnd = true;
                    }
                    if (eofCond)
                    {
                        skip = 0;
                        eol = dstEnd;
                        if (eol == oldLength)
                        {
                            // If we didn't append any bytes before encountering EOF,
                            // caller needs to see -1.

                            obj_sbuf.Length = oldLength;
                            commonGetsCleanup();
                            copiedTotal = -1;
                            goto restore_or_goteol_brk; //goto done
                        }
                        goteol = true;
                        goto restore_or_goteol_brk; //goto goteol
                    }
                    dst = dstEnd;
                }
            }

        restore_or_goteol_brk:
            ;
            // end restore_or_goteol: block

            if (goteol)
            {
                // Found EOL or EOF, but the output buffer may now contain too many
                // characters.  We need to know how many raw bytes correspond to
                // the number of characters we want, plus how many raw bytes
                // correspond to the character(s) making up EOL (if any), so we can
                // remove the correct number of bytes from the channel buffer.

                int linelen = eol - dst + skip;
                char[] tmp = new char[linelen];

                buf = gs.buf;
                encodingState = gs.state;
                if (btc != null)
                {
                    btc = this.encoding.GetDecoder();
                }
                externalToUnicode(buf.buf, buf.nextRemoved, gs.rawRead.i, tmp, 0, linelen, gs.rawRead, null, gs.charsWrote);
                buf.nextRemoved += gs.rawRead.i;

                // Recycle all the emptied buffers.

                obj_sbuf.Length = eol;
                commonGetsCleanup();
                blocked = false;
                copiedTotal = gs.totalChars + gs.charsWrote.i - skip;
            }
            if (restore)
            {
                // Couldn't get a complete line.  This only happens if we get a error
                // reading from the channel or we are non-blocking and there wasn't
                // an EOL or EOF in the data available.

                buf = inQueueHead;
                buf.nextRemoved = oldRemoved;

                for (buf = buf.next; buf != null; buf = buf.next)
                {
                    buf.nextRemoved = Tcl.Lang.ChannelBuffer.BUFFER_PADDING;
                }
                commonGetsCleanup();

                encodingState = oldState;
                //if (btc != null) btc.reset(); // Not sure we want to reset encoder state here
                encodingStart = oldEncodingStart;
                encodingEnd = oldEncodingEnd;
                obj_sbuf.Length = oldLength;

                // We didn't get a complete line so we need to indicate to UpdateInterest
                // that the gets blocked.  It will wait for more data instead of firing
                // a timer, avoiding a busy wait.  This is where we are assuming that the
                // next operation is a gets.  No more file events will be delivered on 
                // this channel until new data arrives or some operation is performed
                // on the channel (e.g. gets, read, fconfigure) that changes the blocking
                // state.  Note that this means a file event will not be delivered even
                // though a read would be able to consume the buffered data.

                needMoreData = true;
                copiedTotal = -1;
            }

            // Update the notifier state so we don't block while there is still
            // data in the buffers.

            //done:
            // Reset original encoding in case it was set to binary
            if (in_binary_encoding)
                encoding = null;

            updateInterest();

            // FIXME: copiedTotal seems to be returning incorrect values
            // for some tests, need to make caller code use the return
            // value instead of the length of the returned object before
            // these errors can be detected by the test suite.
            return copiedTotal;
        }