// This function is the meat of the conversion process. It is really long, which // normally wouldn't be a virtue (especially as an "in-line" function), but in an // effort to save memory fragmentation by using stack memory to buffer the input // and output data, I'm using the alloca memory allocation function. Because of this // it can't be allocated in some subroutine and returned to a calling program (or the // stack will have erased them), so it has to be one big fat long function... // The basic structure is: // // o Check Input Data // o Give the sub-class (via PreConvert) the opportunity to load tables and do // any special preprocessing it needs to ahead of the actual conversion // o Possibly call the TECkit COM interface to convert Unicode flavors that the // engine (for this conversion) might not support (indicated via PreConvert) // o Normalize the input data to a byte array based on it's input EncodingForm // o Allocate (on the stack) a buffer for the output data (min 10000 bytes) // o Call the subclass (via DoConvert) to do the actual conversion. // o Normalize the output data to match the requested output EncodingForm (including // possibly calling the TECkit COM interface). // o Return the resultant BSTR and size of items to the output pointer variables. // protected virtual unsafe string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { Util.DebugWriteLine(className, "BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (sInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } if (sInput.Length == 0) { rciOutput = 0; return(""); } #if DEBUG && __MonoCS__ // for debugging only BEGIN //byte[] baIn = System.Text.Encoding.UTF8.GetBytes(sInput); // works byte[] baIn = System.Text.Encoding.BigEndianUnicode.GetBytes(sInput); // easier to read Util.DebugWriteLine(className, Util.getDisplayBytes("Input BigEndianUnicode", baIn)); baIn = System.Text.Encoding.Unicode.GetBytes(sInput); Util.DebugWriteLine(className, Util.getDisplayBytes("Input Unicode", baIn)); int nInLen = sInput.Length; byte [] baIn2 = new byte[nInLen]; for (int i = 0; i < nInLen; i++) { baIn2[i] = (byte)(sInput[i] & 0xFF); } Util.DebugWriteLine(className, Util.getDisplayBytes("Input Narrowized", baIn2)); /* * System.Text.Encoding encFrom = System.Text.Encoding.GetEncoding(12000); * System.Text.Encoding encTo = System.Text.Encoding.UTF8; * * // Perform the conversion from one encoding to the other. * Util.DebugWriteLine(className, "Starting with " + baIn.Length.ToString() + " bytes."); * byte[] baOut2 = System.Text.Encoding.Convert(encFrom, encTo, baIn); * Util.DebugWriteLine(className, "Converted to " + baOut2.Length.ToString() + " bytes."); * string resultString = System.Text.Encoding.Default.GetString(baOut2, 0, baOut2.Length); * Util.DebugWriteLine(className, "Test output '" + resultString + "'"); */ // for debugging only END #endif // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms ( bForward, ref eInEncodingForm, ref eOutEncodingForm ); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert ( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward ); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed(byte *lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine Util.DebugWriteLine(className, "Calling GetBytes"); ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); #if DEBUG && __MonoCS__ byte[] baOut = new byte[nBufSize]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nBufSize, baOut); Util.DebugWriteLine(className, Util.getDisplayBytes("Input Bytes", baOut)); #endif // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); #if DEBUG && __MonoCS__ Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); byte[] baOut2 = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut2); Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut2)); Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut2) + "'"); #endif string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode); #if DEBUG && __MonoCS__ Util.DebugWriteLine(className, "normalized result '" + result + "'"); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult)); baResult = System.Text.Encoding.Unicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult)); baResult = System.Text.Encoding.UTF8.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult)); Util.DebugWriteLine(className, "Returning."); #endif return(result); } } }
/// legacy data as a byte array as input, we need to treat it as a byte array. /// </summary> protected virtual unsafe string InternalConvertEx(EncodingForm eInEncodingForm, byte[] baInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward) { Util.DebugWriteLine(className, "(input bytes) BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (baInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } if (baInput.Length == 0) { rciOutput = 0; return(""); } // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward); int nBufSize = baInput.Length; fixed(byte *lpInBuffer = baInput) { int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); byte[] baOut = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); #if DEBUG Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut)); Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut) + "'"); #endif string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode); #if DEBUG Util.DebugWriteLine(className, "normalized result '" + result + "'"); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult)); baResult = System.Text.Encoding.Unicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult)); baResult = System.Text.Encoding.UTF8.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult)); Util.DebugWriteLine(className, "Returning."); #endif return(result); } } }