/// <summary> FilterInputBytes -> filterBytes /// /// Helper function for getsObj. Appends Unicode characters /// onto the TclObject associated with the GetsState after /// converting them from raw bytes encoded in the Channel. /// /// Consumes available bytes from channel buffers. When channel /// buffers are exhausted, reads more bytes from channel device into /// a new channel buffer. It is the caller's responsibility to /// free the channel buffers that have been exhausted. /// /// The return value is -1 if there was an error reading from the /// channel, 0 otherwise. /// /// FIXME: Doc modification of object's StringBuffer /// /// Status object keeps track of how much data from channel buffers /// has been consumed and where characters should be stored. /// </summary> internal int filterBytes(GetsState gs) { ChannelBuffer buf; byte[] raw; int rawStart, rawEnd; char[] dst; int offset, toRead, spaceLeft, result, rawLen, length; TclObject obj; int ENCODING_LINESIZE = 20; // Lower bound on how many bytes // to convert at a time. Since we // don't know a priori how many // bytes of storage this many // source bytes will use, we // actually need at least // ENCODING_LINESIZE bytes of room. bool goto_read = false; // Set to true when jumping to the read // label, used to simulate a goto. obj = gs.obj; // Subtract the number of bytes that were removed from channel buffer // during last call. buf = gs.buf; if (buf != null) { buf.nextRemoved += gs.rawRead.i; if (buf.nextRemoved >= buf.nextAdded) { buf = buf.next; } } gs.totalChars += gs.charsWrote.i; while (true) { if (goto_read || (buf == null) || (buf.nextAdded == Tcl.Lang.ChannelBuffer.BUFFER_PADDING)) { // All channel buffers were exhausted and the caller still hasn't // seen EOL. Need to read more bytes from the channel device. // Side effect is to allocate another channel buffer. //read: if (blocked) { if (!blocking) { gs.charsWrote.i = 0; gs.rawRead.i = 0; return -1; } blocked = false; } if (Input != 0) { gs.charsWrote.i = 0; gs.rawRead.i = 0; return -1; } buf = inQueueTail; gs.buf = buf; } // Convert some of the bytes from the channel buffer to characters. // Space in obj's string rep is used to hold the characters. rawStart = buf.nextRemoved; raw = buf.buf; rawEnd = buf.nextAdded; rawLen = rawEnd - rawStart; //dst = *gsPtr->dstPtr; //offset = dst - objPtr->bytes; toRead = ENCODING_LINESIZE; if (toRead > rawLen) { toRead = rawLen; } //dstNeeded = toRead * TCL_UTF_MAX + 1; //spaceLeft = objPtr->length - offset - TCL_UTF_MAX - 1; //if (dstNeeded > spaceLeft) { // length = offset * 2; // if (offset < dstNeeded) { // length = offset + dstNeeded; // } // length += TCL_UTF_MAX + 1; // Tcl_SetObjLength(objPtr, length); // spaceLeft = length - offset; // dst = objPtr->bytes + offset; // *gsPtr->dstPtr = dst; //} dst = new char[toRead]; gs.state = encodingState; result = externalToUnicode(raw, rawStart, rawLen, dst, 0, toRead, gs.rawRead, null, gs.charsWrote); TclString.append(gs.obj, dst, 0, gs.charsWrote.i); // Make sure that if we go through 'gets', that we reset the // TCL_ENCODING_START flag still. encodingStart = false; if (result == TCL_CONVERT_MULTIBYTE) { // The last few bytes in this channel buffer were the start of a // multibyte sequence. If this buffer was full, then move them to // the next buffer so the bytes will be contiguous. ChannelBuffer next; int extra; next = buf.next; if (buf.nextAdded < buf.bufLength) { if (gs.rawRead.i > 0) { // Some raw bytes were converted to UTF-8. Fall through, // returning those UTF-8 characters because a EOL might be // present in them. } else if (eofCond) { // There was a partial character followed by EOF on the // device. Fall through, returning that nothing was found. buf.nextRemoved = buf.nextAdded; } else { // There are no more cached raw bytes left. See if we can // get some more. goto_read = true; goto read; //goto read; } } else { if (next == null) { next = new ChannelBuffer(bufSize); buf.next = next; inQueueTail = next; } extra = rawLen - gs.rawRead.i; Array.Copy(raw, gs.rawRead.i, next.buf, Tcl.Lang.ChannelBuffer.BUFFER_PADDING - extra, extra); next.nextRemoved -= extra; buf.nextAdded -= extra; } } goto read_brk; // End loop in the normal case read: ; } read_brk: ; gs.buf = buf; return 0; }
/// <summary> PeekAhead -> peekAhead /// /// Helper function used by getsObj. Called when we've seen a /// \r at the end of the string and want to look ahead one /// character to see if it is a \n. /// /// Characters read from the channel are appended to gs.obj /// via the filterBytes method. /// </summary> internal void peekAhead(GetsState gs) { ChannelBuffer buf; //Tcl_DriverBlockModeProc *blockModeProc; int bytesLeft; bool goto_cleanup = false; // Set to true when jumping to the // cleanup label, used to simulate a goto. buf = gs.buf; // If there's any more raw input that's still buffered, we'll peek into // that. Otherwise, only get more data from the channel driver if it // looks like there might actually be more data. The assumption is that // if the channel buffer is filled right up to the end, then there // might be more data to read. { //blockModeProc = NULL; if (buf.next == null) { bytesLeft = buf.nextAdded - (buf.nextRemoved + gs.rawRead.i); if (bytesLeft == 0) { if (buf.nextAdded < buf.bufLength) { // Don't peek ahead if last read was short read. goto_cleanup = true; goto cleanup_brk; } // FIXME: This non-blocking check is currently disabled, non-blocking // is not currently supported and it is not clean why we would // need to depend on non-blocking IO when peeking anyway. if (blocking) { //blockModeProc = Tcl_ChannelBlockModeProc(chanPtr->typePtr); //if (false) //{ // // Don't peek ahead if cannot set non-blocking mode. // goto_cleanup = true; // goto cleanup_brk; //} //StackSetBlockMode(chanPtr, TCL_MODE_NONBLOCKING); } } } //if (filterBytes(gs) == 0) { // dstEndPtr.i = gs.charsWrote.i; *gsPtr->dstPtr + gs.bytesWrote.i //} filterBytes(gs); //if (blockModeProc != NULL) { // StackSetBlockMode(chanPtr, TCL_MODE_BLOCKING); //} } cleanup_brk: ; if (goto_cleanup) { buf.nextRemoved += gs.rawRead.i; gs.rawRead.i = 0; gs.totalChars += gs.charsWrote.i; //gs.bytesWrote.i = 0; gs.charsWrote.i = 0; } }
/// <summary> Tcl_GetsObj -> getsObj /// /// Accumulate input from the input channel until end-of-line or /// end-of-file has been seen. Bytes read from the input channel /// are converted to Unicode using the encoding specified by the /// channel. /// /// Returns the number of characters accumulated in the object /// or -1 if error, blocked, or EOF. If -1, use Tcl_GetErrno() /// to retrieve the POSIX error code for the error or condition /// that occurred. /// /// FIXME: Above setting of error code is not fully implemented. /// /// Will consume input from the channel. /// On reading EOF, leave channel at EOF char. /// On reading EOL, leave channel after EOL, but don't /// return EOL in dst buffer. /// </summary> internal int getsObj(TclObject obj) { GetsState gs; ChannelBuffer buf; bool oldEncodingStart, oldEncodingEnd; int oldRemoved, skip, inEofChar; int copiedTotal, oldLength; bool in_binary_encoding = false; int dst, dstEnd, eol, eof; Object oldState; buf = inQueueHead; //encoding = this.encoding; // Preserved so we can restore the channel's state in case we don't // find a newline in the available input. oldLength = 0; oldEncodingStart = encodingStart; oldEncodingEnd = encodingEnd; oldState = encodingState; oldRemoved = Tcl.Lang.ChannelBuffer.BUFFER_PADDING; if (buf != null) { oldRemoved = buf.nextRemoved; } // If there is no encoding, use "iso8859-1" -- readLine() doesn't // produce ByteArray objects. if ((System.Object)encoding == null) { in_binary_encoding = true; encoding = EncodingCmd.getJavaName("utf-8"); } System.Diagnostics.Debug.WriteLine("getsObj encoding is " + encoding); // Object used by filterBytes to keep track of how much data has // been consumed from the channel buffers. gs = new GetsState(this); gs.obj = obj; //gs.dst = &dst; gs.encoding = encoding; gs.buf = buf; gs.state = oldState; gs.rawRead.i = 0; //gs.bytesWrote.i = 0; gs.charsWrote.i = 0; gs.totalChars = 0; // Ensure that tobj is an empty TclString object. // Cheat a bit and grab the StringBuffer out of // the TclString so we can query the data that // was just added to the buffer. TclString.empty(obj); StringBuilder obj_sbuf = ((TclString)obj.InternalRep).sbuf; dst = 0; dstEnd = dst; skip = 0; eof = -1; inEofChar = eofChar; // Used to implement goto like functionality for restore // and goteol loop terminaltion blocks. bool restore = false; bool goteol = false; // This is just here so that eol and copiedTotal are // definitely assigned before the try block. eol = -1; copiedTotal = -1; { while (true) { if (dst >= dstEnd) { if (filterBytes(gs) != 0) { restore = true; goto restore_or_goteol_brk; //goto restore } dstEnd += gs.charsWrote.i; // dstEnd = dst + gs.bytesWrote; } // Remember if EOF char is seen, then look for EOL anyhow, because // the EOL might be before the EOF char. if (inEofChar != '\x0000') { for (eol = dst; eol < dstEnd; eol++) { if (obj_sbuf[eol] == inEofChar) { dstEnd = eol; eof = eol; break; } } } // On EOL, leave current file position pointing after the EOL, but // don't store the EOL in the output string. switch (translation) { case TclIO.TRANS_LF: { for (eol = dst; eol < dstEnd; eol++) { if (obj_sbuf[eol] == '\n') { skip = 1; goteol = true; goto restore_or_goteol_brk; //goto goteol } } break; } case TclIO.TRANS_CR: { for (eol = dst; eol < dstEnd; eol++) { if (obj_sbuf[eol] == '\r') { skip = 1; goteol = true; goto restore_or_goteol_brk; //goto goteol } } break; } case TclIO.TRANS_CRLF: { for (eol = dst; eol < dstEnd; eol++) { if (obj_sbuf[eol] == '\r') { eol++; // If a CR is at the end of the buffer, // then check for a LF at the begining // of the next buffer. if (eol >= dstEnd) { //int offset; //offset = eol - objPtr->bytes; dst = dstEnd; if (filterBytes(gs) != 0) { restore = true; goto restore_or_goteol_brk; //goto restore } dstEnd += gs.charsWrote.i; // dstEnd = dst + gs.bytesWrote //eol = objPtr->bytes + offset; if (eol >= dstEnd) { skip = 0; goteol = true; goto restore_or_goteol_brk; //goto goteol } } if (obj_sbuf[eol] == '\n') { eol--; skip = 2; goteol = true; goto restore_or_goteol_brk; //goto goteol } } } break; } case TclIO.TRANS_AUTO: { eol = dst; skip = 1; if (sawCR_Renamed_Field) { sawCR_Renamed_Field = false; if ((eol < dstEnd) && (obj_sbuf[eol] == '\n')) { // Skip the raw bytes that make up the '\n'. char[] tmp = new char[1]; IntPtr rawRead = new IntPtr(this); buf = gs.buf; // FIXME: We don't actually pass gs.state here, should we? //if (btc != null) btc.reset(); externalToUnicode(buf.buf, buf.nextRemoved, gs.rawRead.i, tmp, 0, 1, rawRead, null, null); buf.nextRemoved += rawRead.i; gs.rawRead.i -= rawRead.i; //gs.bytesWrote.i--; gs.charsWrote.i--; obj_sbuf.Remove(dst, 1); dstEnd--; } } for (eol = dst; eol < dstEnd; eol++) { if (obj_sbuf[eol] == '\r') { eol++; if (eol == dstEnd) { // If buffer ended on \r, peek ahead to see if a // \n is available. //int offset; IntPtr dstEndPtr = new IntPtr(); //offset = eol /* - objPtr->bytes*/; dst = dstEnd; // FIXME: Why does this peek in AUTO mode // but filter in CRLF mode? peekAhead(gs); //dstEnd = dstEndPtr.i; dstEnd += gs.charsWrote.i; //eol = /*objPtr->bytes + */ offset; if (eol >= dstEnd) { eol--; sawCR_Renamed_Field = true; goteol = true; goto restore_or_goteol_brk; //goto goteol } } if (obj_sbuf[eol] == '\n') { skip++; } eol--; goteol = true; //goto goteol goto restore_or_goteol_brk; } else if (obj_sbuf[eol] == '\n') { goteol = true; goto restore_or_goteol_brk; //goto goteol } } } break; } if (eof != -1) { // EOF character was seen. On EOF, leave current file position // pointing at the EOF character, but don't store the EOF // character in the output string. dstEnd = eof; eofCond = true; stickyEofCond = true; encodingEnd = true; } if (eofCond) { skip = 0; eol = dstEnd; if (eol == oldLength) { // If we didn't append any bytes before encountering EOF, // caller needs to see -1. obj_sbuf.Length = oldLength; commonGetsCleanup(); copiedTotal = -1; goto restore_or_goteol_brk; //goto done } goteol = true; goto restore_or_goteol_brk; //goto goteol } dst = dstEnd; } } restore_or_goteol_brk: ; // end restore_or_goteol: block if (goteol) { // Found EOL or EOF, but the output buffer may now contain too many // characters. We need to know how many raw bytes correspond to // the number of characters we want, plus how many raw bytes // correspond to the character(s) making up EOL (if any), so we can // remove the correct number of bytes from the channel buffer. int linelen = eol - dst + skip; char[] tmp = new char[linelen]; buf = gs.buf; encodingState = gs.state; if (btc != null) { btc = this.encoding.GetDecoder(); } externalToUnicode(buf.buf, buf.nextRemoved, gs.rawRead.i, tmp, 0, linelen, gs.rawRead, null, gs.charsWrote); buf.nextRemoved += gs.rawRead.i; // Recycle all the emptied buffers. obj_sbuf.Length = eol; commonGetsCleanup(); blocked = false; copiedTotal = gs.totalChars + gs.charsWrote.i - skip; } if (restore) { // Couldn't get a complete line. This only happens if we get a error // reading from the channel or we are non-blocking and there wasn't // an EOL or EOF in the data available. buf = inQueueHead; buf.nextRemoved = oldRemoved; for (buf = buf.next; buf != null; buf = buf.next) { buf.nextRemoved = Tcl.Lang.ChannelBuffer.BUFFER_PADDING; } commonGetsCleanup(); encodingState = oldState; //if (btc != null) btc.reset(); // Not sure we want to reset encoder state here encodingStart = oldEncodingStart; encodingEnd = oldEncodingEnd; obj_sbuf.Length = oldLength; // We didn't get a complete line so we need to indicate to UpdateInterest // that the gets blocked. It will wait for more data instead of firing // a timer, avoiding a busy wait. This is where we are assuming that the // next operation is a gets. No more file events will be delivered on // this channel until new data arrives or some operation is performed // on the channel (e.g. gets, read, fconfigure) that changes the blocking // state. Note that this means a file event will not be delivered even // though a read would be able to consume the buffered data. needMoreData = true; copiedTotal = -1; } // Update the notifier state so we don't block while there is still // data in the buffers. //done: // Reset original encoding in case it was set to binary if (in_binary_encoding) encoding = null; updateInterest(); // FIXME: copiedTotal seems to be returning incorrect values // for some tests, need to make caller code use the return // value instead of the length of the returned object before // these errors can be detected by the test suite. return copiedTotal; }