protected virtual void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); // by default, the form it comes in is okay for the engine (never really true, so // each engine's COM wrapper must override this; but this is here to see what you // must do). For example, for CC, the input must be UTF8Bytes for Unicode, so // you'd set the eInFormEngine to UTF8Bytes. eInFormEngine = eInEncodingForm; eOutFormEngine = eOutEncodingForm; }
// this is the helper method that returns the input data normalized internal static unsafe byte *GetBytes(string strInput, int cnCountIn, EncodingForm eEncFormIn, int nCodePageIn, EncodingForm eFormEngineIn, byte *pBuf, ref int nBufSize, ref bool bDebugDisplayMode) { Util.DebugWriteLine(className, "BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eEncFormIn.ToString() + ", " + "eFormEngineIn " + eFormEngineIn.ToString()); // if the form the user gave is not what the engine wants (and it isn't legacy // since legacy forms are already handled later)... if ((eEncFormIn != eFormEngineIn) && !EncConverter.IsLegacyFormat(eEncFormIn)) { // we can do some of the conversions ourself. For example, if the input form // is UTF16 and the desired form is UTF8, then simply use CCUnicode8 below if ((eEncFormIn == EncodingForm.UTF16) && (eFormEngineIn == EncodingForm.UTF8Bytes)) { Util.DebugWriteLine(className, "using CCUnicode8"); eEncFormIn = (EncodingForm)CCUnicode8; } // we can also do the following one else if ((eEncFormIn == EncodingForm.UTF8String) && (eFormEngineIn == EncodingForm.UTF8Bytes)) { ; // i.e. don't have TECkit do this one... } else { strInput = EncConverters.UnicodeEncodingFormConvertEx(strInput, eEncFormIn, cnCountIn, eFormEngineIn, NormalizeFlags.None, out cnCountIn); eEncFormIn = eFormEngineIn; } } int nInLen = 0; switch (eEncFormIn) { case EncodingForm.LegacyBytes: case EncodingForm.UTF8Bytes: { if (cnCountIn != 0) { nInLen = cnCountIn; // item count should be the number of bytes directly. } else { // if the user didn't give the length (i.e. via ConvertEx), then get it // from the BSTR length. nInLen will be the # of bytes. nInLen = strInput.Length * 2; } // these forms are for C++ apps that want to use the BSTR to transfer // bytes rather than OLECHARs. nInLen = StringToByteStar(strInput, pBuf, nInLen, true); if (eEncFormIn == EncodingForm.LegacyBytes) { DisplayDebugCharValues(pBuf, nInLen, "Received (LegacyBytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode); } else { DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Received (UTF8Bytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode); } break; } case EncodingForm.LegacyString: { if (cnCountIn != 0) { nInLen = cnCountIn; // item count should be the number of bytes directly (after conversion below). } else { nInLen = strInput.Length; // the # of bytes will *be* the # of chars in the string after we're done. } DisplayDebugUCharValues(strInput, "Received (LegacyString) from client...", ref bDebugDisplayMode); // use a code page converter to narrowize using the input string // (but the 'easier' Add method will send 0; if so, then // fallback to the original method. byte[] ba = null; // first check if it's a symbol font (sometimes the user // incorrectly sends a few spaces first, so check the // first couple of bytes. If it is (and the code page is 0), then // change the code page to be CP_SYMBOL if ((nCodePageIn == 0) && (((strInput[0] & 0xF000) == 0xF000) || ((strInput.Length > 1) && ((strInput[1] & 0xF000) == 0xF000)) || ((strInput.Length > 2) && ((strInput[2] & 0xF000) == 0xF000)) ) ) { nCodePageIn = EncConverters.cnSymbolFontCodePage; } #if __MonoCS__ // Narrowizing by code page 0 doesn't seem to be what we want on Linux. // Treating it as a symbol font or stripping off the low byte works better. if (nCodePageIn == 0) { ba = BruteForceNarrowize(strInput, nInLen); } else #else if (true) #endif { // if it's a symbol or iso-8859 encoding, then we can handle just // taking the low byte (i.e. the catch case) if ((nCodePageIn == EncConverters.cnSymbolFontCodePage) || (nCodePageIn == EncConverters.cnIso8859_1CodePage) ) { try { Encoding enc = Encoding.GetEncoding(nCodePageIn); ba = enc.GetBytes(strInput); Util.DebugWriteLine(className, "Narrowized by given code page."); } catch { ba = BruteForceNarrowize(strInput, nInLen); } } else { // otherwise, simply use CP_ACP (or the default code page) to // narrowize it. Util.DebugWriteLine(className, "Narrowizing by given code page."); Encoding enc = Encoding.GetEncoding(nCodePageIn); ba = enc.GetBytes(strInput); } } // turn that byte array into a byte array... ByteArrToByteStar(ba, pBuf); if (cnCountIn != 0) { nInLen = cnCountIn; // item count should be the number of bytes directly. } else { // if the user didn't give the length (i.e. via ConvertEx), then get it // from the BSTR length. nInLen will be the # of bytes. nInLen = ba.Length; } DisplayDebugCharValues(pBuf, nInLen, "Sending (LegacyBytes) to Converter/DLL...", ref bDebugDisplayMode); break; } // this following form *must* be widened UTF8 via the default code page case EncodingForm.UTF8String: { DisplayDebugUCharValues(strInput, "Received (UTF8String) from client...", ref bDebugDisplayMode); // use a code page converter to narrowize using the input string Encoding enc = Encoding.Default; byte[] ba = enc.GetBytes(strInput); // turn that byte array into a byte array... ByteArrToByteStar(ba, pBuf); if (cnCountIn != 0) { nInLen = cnCountIn; // item count should be the number of bytes directly. } else { // if the user didn't give the length (i.e. via ConvertEx), then get it // from the BSTR length. nInLen will be the # of bytes. nInLen = ba.Length; } DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode); break; } // this is a special case for CC where the input was actually UTF16, but the // CC DLL is expecting (usually) UTF8, so convert from UTF16->UTF8 narrow case (EncodingForm)CCUnicode8: { DisplayDebugUCharValues(strInput, "Received (UTF16) from client...", ref bDebugDisplayMode); UTF8Encoding enc = new UTF8Encoding(); byte[] ba = enc.GetBytes(strInput); // turn that byte array into a byte array... ByteArrToByteStar(ba, pBuf); // since we've changed the format, we don't care how many UTF16 words came in nInLen = ba.Length; DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode); break; } case EncodingForm.UTF16: { if (cnCountIn != 0) { nInLen = cnCountIn; // item count should be the number of 16-bit words directly } else { nInLen = strInput.Length; } DisplayDebugUCharValues(strInput, "Received (UTF16) from client and sending to Converter/DLL...", ref bDebugDisplayMode); // but this should be the count of bytes... nInLen *= 2; StringToByteStar(strInput, pBuf, nInLen, false); break; } case EncodingForm.UTF16BE: case EncodingForm.UTF32: case EncodingForm.UTF32BE: { if (cnCountIn != 0) { nInLen = cnCountIn; // item count is the number of Uni chars // for UTF32, the converter's actually expecting the length to be twice // this much again. if (eEncFormIn != EncodingForm.UTF16BE) { nInLen *= 2; } } else { nInLen = strInput.Length; } DisplayDebugUCharValues(pBuf, nInLen, "Received (UTF16BE/32/32BE) from client/Sending to Converter/DLL...", ref bDebugDisplayMode); // for the byte count, double it (possibly again) nInLen *= 2; StringToByteStar(strInput, pBuf, nInLen, false); break; } default: EncConverters.ThrowError(ErrStatus.InEncFormNotSupported); break; } pBuf[nInLen] = pBuf[nInLen + 1] = pBuf[nInLen + 2] = pBuf[nInLen + 3] = 0; nBufSize = (int)nInLen; return(pBuf); }
internal static unsafe string GetString(byte *lpOutBuffer, int nOutLen, EncodingForm eOutEncodingForm, int nCodePageOut, EncodingForm eFormEngineOut, NormalizeFlags eNormalizeOutput, out int rciOutput, ref bool bDebugDisplayMode) { // null terminate the output and turn it into a (real) array of bytes Util.DebugWriteLine(className, "BEGIN"); lpOutBuffer[nOutLen] = lpOutBuffer[nOutLen + 1] = lpOutBuffer[nOutLen + 2] = lpOutBuffer[nOutLen + 3] = 0; byte[] baOut = new byte[nOutLen]; ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); Util.DebugWriteLine(className, Util.getDisplayBytes("byte array", baOut)); // check to see if the engine handled the given output form. If not, then see // if it's a conversion we can easily do (otherwise we'll ask TEC to do the // conversion for us (later) so that all engines can handle all possible // output encoding forms. Util.DebugWriteLine(className, "eOutEncodingForm " + eOutEncodingForm.ToString() + ", " + "eFormEngineOut " + eFormEngineOut.ToString()); if (eOutEncodingForm != eFormEngineOut) { if (EncConverter.IsLegacyFormat(eOutEncodingForm)) { if ((eFormEngineOut == EncodingForm.LegacyBytes) && (eOutEncodingForm == EncodingForm.LegacyString)) { // in this case, just *pretend* the engine outputs LegacyString (the // LegacyString case below really means "convert LegacyBytes to // LegacyString) eFormEngineOut = eOutEncodingForm; } } else // unicode forms { // if the engine gives UTF8 and the client wants UTF16... if ((eOutEncodingForm == EncodingForm.UTF16) && (eFormEngineOut == EncodingForm.UTF8Bytes)) { // use the special form to convert it below Util.DebugWriteLine(className, "using CCUnicode8"); eOutEncodingForm = eFormEngineOut = (EncodingForm)CCUnicode8; } // or vise versa else if ((eFormEngineOut == EncodingForm.UTF16) && ((eOutEncodingForm == EncodingForm.UTF8Bytes) || (eOutEncodingForm == EncodingForm.UTF8String))) { // engine gave UTF16, but user wants a UTF8 flavor. // Decoder d = Encoding.Unicode.GetChars(baOut); // d.GetChars( UTF8Encoding enc = new UTF8Encoding(); baOut = enc.GetBytes(Encoding.Unicode.GetChars(baOut)); eFormEngineOut = eOutEncodingForm; nOutLen = baOut.Length; } // these conversions we can do ourself else if ((eOutEncodingForm == EncodingForm.UTF8String) || (eOutEncodingForm == EncodingForm.UTF16)) { #if _MSC_VER // Doesn't this wipe out the distinction? // On Linux we need to be able to convert the output from UTF32 to UTF16. eFormEngineOut = eOutEncodingForm; #endif } } } int nItems = 0, nCharsLen = 0; char[] caOut = null; switch (eFormEngineOut) { case EncodingForm.LegacyBytes: case EncodingForm.UTF8Bytes: { if (eFormEngineOut == EncodingForm.LegacyBytes) { DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyBytes)...", ref bDebugDisplayMode); } else { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8Bytes)...", ref bDebugDisplayMode); } // stuff the returned 'bytes' into the BSTR as narrow characters rather than // converting to wide nItems = nOutLen; nCharsLen = (nOutLen + 1) / 2; caOut = new char[nCharsLen]; ByteArrToCharArr(baOut, caOut); break; } case EncodingForm.LegacyString: { DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyString)...", ref bDebugDisplayMode); nCharsLen = nItems = nOutLen; #if __MonoCS__ // Narrowizing by code page 0 doesn't seem to be what we want on Linux. // Treating it as a symbol font or stripping off the low byte works better. if (nCodePageOut == 0) { caOut = BruteForceWiden(nCodePageOut, baOut, nCharsLen); } else #else if (true) #endif { try { // this will throw (for some reason) when doing symbol fonts // (apparently, CP_SYMBOL is no longer supported). caOut = Encoding.GetEncoding(nCodePageOut).GetChars(baOut); } catch { if ((nCodePageOut == EncConverters.cnSymbolFontCodePage) || (nCodePageOut == EncConverters.cnIso8859_1CodePage)) { caOut = BruteForceWiden(nCodePageOut, baOut, nCharsLen); } else { throw; } } } break; } case EncodingForm.UTF16: { nCharsLen = nItems = (nOutLen / 2); DisplayDebugUCharValues(baOut, "Received (UTF16) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode); caOut = Encoding.Unicode.GetChars(baOut); break; } case EncodingForm.UTF8String: { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8String)...", ref bDebugDisplayMode); // this encoding form is always encoded using the default code page. caOut = Encoding.Default.GetChars(baOut); nCharsLen = nItems = nOutLen; break; } case (EncodingForm)CCUnicode8: { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode); caOut = Encoding.UTF8.GetChars(baOut); nCharsLen = nItems = caOut.Length; break; } case EncodingForm.UTF16BE: case EncodingForm.UTF32: case EncodingForm.UTF32BE: { nCharsLen = nItems = nOutLen / 2; DisplayDebugUCharValues(baOut, "Received (UTF16BE/32/32BE) back from Converter/DLL...", ref bDebugDisplayMode); caOut = new char[nCharsLen]; ByteArrToCharArr(baOut, caOut); // for UTF32, it is half again as little in the item count. if (eFormEngineOut != EncodingForm.UTF16BE) { nItems /= 2; } break; } default: EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); break; } #if !v22_AllowEmptyReturn if ((nCharsLen <= 0) #if DEBUG || (nCharsLen != caOut.Length) #endif ) { EncConverters.ThrowError(ErrStatus.NoReturnDataBadOutForm); } #endif // check to see if the engine handled the given output form. If not, then ask // TEC to do the conversion for us so that all engines can handle all possible // output encoding forms (e.g. caller requested utf32, but above CC could only // give us utf16/8) // Also, if the caller wanted something other than "None" for the eNormalizeOutput, // then we also have to call TEC for that as well (but I think this only makes // sense if the output is utf16(be) or utf32(be)) // p.s. if this had been a TEC converter, then the eNormalizeOutput flag would // ahready have been reset to None (by this point), since we would have directly // requested that normalized form when we created the converter--see // TecEncConverter.PreConvert) string strOutput = new string(caOut); #if DEBUG byte[] byteArray = Encoding.BigEndianUnicode.GetBytes(caOut); Util.DebugWriteLine(className, Util.getDisplayBytes("characters", byteArray)); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(strOutput); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized strOutput in UTF16BE", baResult)); #endif if ((eFormEngineOut != eOutEncodingForm) || (eNormalizeOutput != NormalizeFlags.None)) { strOutput = EncConverters.UnicodeEncodingFormConvertEx(strOutput, eFormEngineOut, nItems, eOutEncodingForm, eNormalizeOutput, out nItems); } DisplayDebugUCharValues(strOutput, "Returning back to client...", ref bDebugDisplayMode); rciOutput = nItems; return(strOutput); }
protected void CheckInitEncForms ( bool bForward, ref EncodingForm eInEncodingForm, ref EncodingForm eOutEncodingForm ) { Util.DebugWriteLine(className, "eEncFormIn1 " + eInEncodingForm.ToString() + ", " + "eEncFormOut1 " + eOutEncodingForm.ToString()); // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 if (eInEncodingForm == EncodingForm.Unspecified) { NormConversionType eType; if (bForward) { eType = NormalizeLhsConversionType(m_eConversionType); } else { eType = NormalizeRhsConversionType(m_eConversionType); } if (eType == NormConversionType.eLegacy) { eInEncodingForm = EncodingForm.LegacyString; } else // eUnicode { eInEncodingForm = DefaultUnicodeEncForm(bForward, true); } } // do the same for the output form if (eOutEncodingForm == EncodingForm.Unspecified) { NormConversionType eType; if (bForward) { eType = NormalizeRhsConversionType(m_eConversionType); } else { eType = NormalizeLhsConversionType(m_eConversionType); } if (eType == NormConversionType.eLegacy) { eOutEncodingForm = EncodingForm.LegacyString; } else // eUnicode { eOutEncodingForm = DefaultUnicodeEncForm(bForward, false); } } Util.DebugWriteLine(className, "eEncFormIn2 " + eInEncodingForm.ToString() + ", " + "eEncFormOut2 " + eOutEncodingForm.ToString()); CheckForBadForm(bForward, eInEncodingForm, eOutEncodingForm); }
// This function is the meat of the conversion process. It is really long, which // normally wouldn't be a virtue (especially as an "in-line" function), but in an // effort to save memory fragmentation by using stack memory to buffer the input // and output data, I'm using the alloca memory allocation function. Because of this // it can't be allocated in some subroutine and returned to a calling program (or the // stack will have erased them), so it has to be one big fat long function... // The basic structure is: // // o Check Input Data // o Give the sub-class (via PreConvert) the opportunity to load tables and do // any special preprocessing it needs to ahead of the actual conversion // o Possibly call the TECkit COM interface to convert Unicode flavors that the // engine (for this conversion) might not support (indicated via PreConvert) // o Normalize the input data to a byte array based on it's input EncodingForm // o Allocate (on the stack) a buffer for the output data (min 10000 bytes) // o Call the subclass (via DoConvert) to do the actual conversion. // o Normalize the output data to match the requested output EncodingForm (including // possibly calling the TECkit COM interface). // o Return the resultant BSTR and size of items to the output pointer variables. // protected virtual unsafe string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { Util.DebugWriteLine(className, "BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (sInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } if (sInput.Length == 0) { rciOutput = 0; return(""); } #if DEBUG && __MonoCS__ // for debugging only BEGIN //byte[] baIn = System.Text.Encoding.UTF8.GetBytes(sInput); // works byte[] baIn = System.Text.Encoding.BigEndianUnicode.GetBytes(sInput); // easier to read Util.DebugWriteLine(className, Util.getDisplayBytes("Input BigEndianUnicode", baIn)); baIn = System.Text.Encoding.Unicode.GetBytes(sInput); Util.DebugWriteLine(className, Util.getDisplayBytes("Input Unicode", baIn)); int nInLen = sInput.Length; byte [] baIn2 = new byte[nInLen]; for (int i = 0; i < nInLen; i++) { baIn2[i] = (byte)(sInput[i] & 0xFF); } Util.DebugWriteLine(className, Util.getDisplayBytes("Input Narrowized", baIn2)); /* * System.Text.Encoding encFrom = System.Text.Encoding.GetEncoding(12000); * System.Text.Encoding encTo = System.Text.Encoding.UTF8; * * // Perform the conversion from one encoding to the other. * Util.DebugWriteLine(className, "Starting with " + baIn.Length.ToString() + " bytes."); * byte[] baOut2 = System.Text.Encoding.Convert(encFrom, encTo, baIn); * Util.DebugWriteLine(className, "Converted to " + baOut2.Length.ToString() + " bytes."); * string resultString = System.Text.Encoding.Default.GetString(baOut2, 0, baOut2.Length); * Util.DebugWriteLine(className, "Test output '" + resultString + "'"); */ // for debugging only END #endif // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms ( bForward, ref eInEncodingForm, ref eOutEncodingForm ); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert ( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward ); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed(byte *lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine Util.DebugWriteLine(className, "Calling GetBytes"); ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); #if DEBUG && __MonoCS__ byte[] baOut = new byte[nBufSize]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nBufSize, baOut); Util.DebugWriteLine(className, Util.getDisplayBytes("Input Bytes", baOut)); #endif // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); #if DEBUG && __MonoCS__ Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); byte[] baOut2 = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut2); Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut2)); Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut2) + "'"); #endif string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode); #if DEBUG && __MonoCS__ Util.DebugWriteLine(className, "normalized result '" + result + "'"); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult)); baResult = System.Text.Encoding.Unicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult)); baResult = System.Text.Encoding.UTF8.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult)); Util.DebugWriteLine(className, "Returning."); #endif return(result); } } }
/// <summary> /// If we're returning legacy data as a byte array, we need to return it as a byte array. /// </summary> /// <returns> protected virtual unsafe byte[] InternalConvertEx(EncodingForm eInEncodingForm, string sInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward) { Util.DebugWriteLine(className, "(output bytes) BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (sInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } Util.DebugWriteLine(className, "sInput.Length() is " + sInput.Length.ToString() + "."); if (sInput.Length == 0) { // this section added 11/10/2011 by Jim K rciOutput = 0; return(new byte[0]); } Util.DebugWriteLine(className, "sInput is " + sInput + "."); // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed(byte *lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine Util.DebugWriteLine(className, "Calling GetBytes"); ECNormalizeData.GetBytes(sInput, sInput.Length, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); byte[] baOut = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); #if DEBUG Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut)); Util.DebugWriteLine(className, "Returning."); #endif rciOutput = nOutLen; return(baOut); } } }
/// legacy data as a byte array as input, we need to treat it as a byte array. /// </summary> protected virtual unsafe string InternalConvertEx(EncodingForm eInEncodingForm, byte[] baInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward) { Util.DebugWriteLine(className, "(input bytes) BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (baInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } if (baInput.Length == 0) { rciOutput = 0; return(""); } // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward); int nBufSize = baInput.Length; fixed(byte *lpInBuffer = baInput) { int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); byte[] baOut = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); #if DEBUG Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut)); Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut) + "'"); #endif string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode); #if DEBUG Util.DebugWriteLine(className, "normalized result '" + result + "'"); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult)); baResult = System.Text.Encoding.Unicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult)); baResult = System.Text.Encoding.UTF8.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult)); Util.DebugWriteLine(className, "Returning."); #endif return(result); } } }
protected unsafe override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // If the user uses one of the *Byte forms, change that to the *String forms so // the value matches what the TECkit engine is expecting (that is, the TECkit // engine is expecting a value of '1' (=LegacyString) even if it comes in as // LegacyBytes). It'll still get converted correctly later, but when create the // the TECkit "converter" object, which happens during here, it is expecting // to see the other value. if (eInEncodingForm == EncodingForm.LegacyBytes) { eInEncodingForm = EncodingForm.LegacyString; } else if (eInEncodingForm == EncodingForm.UTF8Bytes) { eInEncodingForm = EncodingForm.UTF8String; } if (eOutEncodingForm == EncodingForm.LegacyBytes) { eOutEncodingForm = EncodingForm.LegacyString; } else if (eOutEncodingForm == EncodingForm.UTF8Bytes) { eOutEncodingForm = EncodingForm.UTF8String; } // See if we have a converter already for this combination or whether we need to make a // new one string strConverterKey = eInEncodingForm.ToString() + eOutEncodingForm.ToString() + eNormalizeOutput.ToString() + bForward.ToString(); // If this is a compilable map (i.e. ImplType SIL.map), then see if the map file has changed bool bReload = false; if (m_bCompileable && !String.IsNullOrEmpty(m_strMapFileSpec)) { // first make sure it's there and get the last time it was modified DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it. if (!DoesFileExist(m_strMapFileSpec, ref timeModified)) { EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strMapFileSpec); } // if it has been modified or it's not already loaded... if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey)) { // ... just remove this key if it existed (so we fall thru and do Load) ResetConverter((IntPtr)m_mapConverters[strConverterKey]); m_mapConverters.Remove(strConverterKey); bReload = true; } } else if (IsFileLoaded()) { // the tec file could also have changed out from underneath us (in which case we'd need to reload it). DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it. if (!DoesFileExist(m_strTecFileSpec, ref timeModified)) { EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strTecFileSpec); } // if it has been modified or it's not already loaded... if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey)) { m_baMapping = null; // triggers a reload m_lhsFlags = m_rhsFlags = 0; // ... just remove this key if it existed (so we fall thru and do Load) ResetConverter((IntPtr)m_mapConverters[strConverterKey]); m_mapConverters.Remove(strConverterKey); bReload = true; } } if (m_mapConverters.ContainsKey(strConverterKey)) { m_converter = (IntPtr)m_mapConverters[strConverterKey]; } else { int status = (int)ErrStatus.NoError; // load the map now Load(bReload); // is there no better way to do this? ushort eFormOut1 = System.Convert.ToUInt16((int)eOutEncodingForm); ushort eFormOut2 = System.Convert.ToUInt16((int)eNormalizeOutput); UInt16 eFormOut = System.Convert.ToUInt16(eFormOut1 | eFormOut2); // make a converter for this new combination. Util.DebugWriteLine(this, "Creating TECkit converter: in " + eInEncodingForm.ToString() + ", out " + eOutEncodingForm.ToString()); if (IsFileLoaded()) { fixed(byte *pbyMapping = m_baMapping) { status = TECkit_CreateConverter( pbyMapping, m_nMapSize, (byte)((bForward) ? 1 : 0), System.Convert.ToUInt16((int)eInEncodingForm), eFormOut, out m_converter ); } } else { status = TECkit_CreateConverter( (byte *)0, m_nMapSize, (byte)((bForward) ? 1 : 0), System.Convert.ToUInt16((int)eInEncodingForm), eFormOut, out m_converter ); } if (status == (int)ErrStatus.NoError) { m_mapConverters[strConverterKey] = m_converter; } else { EncConverters.ThrowError(status); } } // since TEC can handle output normalization directly (by requesting it here // in the creation of the converter), reset the requesting flag so we won't // attempt to do it later (all other converters that can't do implicit output // normalization will *not* have reset the flag and then after their conversion, // if the flag is still set, we'll call TEC to do it for them see // ECNormalizeData.GetString). eNormalizeOutput = NormalizeFlags.None; }
protected unsafe override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert( eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // If the user uses one of the *Byte forms, change that to the *String forms so // the value matches what the TECkit engine is expecting (that is, the TECkit // engine is expecting a value of '1' (=LegacyString) even if it comes in as // LegacyBytes). It'll still get converted correctly later, but when create the // the TECkit "converter" object, which happens during here, it is expecting // to see the other value. if( eInEncodingForm == EncodingForm.LegacyBytes ) eInEncodingForm = EncodingForm.LegacyString; else if( eInEncodingForm == EncodingForm.UTF8Bytes ) eInEncodingForm = EncodingForm.UTF8String; if( eOutEncodingForm == EncodingForm.LegacyBytes ) eOutEncodingForm = EncodingForm.LegacyString; else if( eOutEncodingForm == EncodingForm.UTF8Bytes ) eOutEncodingForm = EncodingForm.UTF8String; // See if we have a converter already for this combination or whether we need to make a // new one string strConverterKey = eInEncodingForm.ToString() + eOutEncodingForm.ToString() + eNormalizeOutput.ToString() + bForward.ToString(); // If this is a compilable map (i.e. ImplType SIL.map), then see if the map file has changed bool bReload = false; if (m_bCompileable && !String.IsNullOrEmpty(m_strMapFileSpec)) { // first make sure it's there and get the last time it was modified DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it. if (!DoesFileExist(m_strMapFileSpec, ref timeModified)) EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strMapFileSpec); // if it has been modified or it's not already loaded... if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey)) { // ... just remove this key if it existed (so we fall thru and do Load) ResetConverter((Int32)m_mapConverters[strConverterKey]); m_mapConverters.Remove(strConverterKey); bReload = true; } } else if (IsFileLoaded()) { // the tec file could also have changed out from underneath us (in which case we'd need to reload it). DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it. if (!DoesFileExist(m_strTecFileSpec, ref timeModified)) EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strTecFileSpec); // if it has been modified or it's not already loaded... if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey)) { m_baMapping = null; // triggers a reload m_lhsFlags = m_rhsFlags = 0; // ... just remove this key if it existed (so we fall thru and do Load) ResetConverter((Int32)m_mapConverters[strConverterKey]); m_mapConverters.Remove(strConverterKey); bReload = true; } } if( m_mapConverters.ContainsKey(strConverterKey) ) { m_converter = (Int32)m_mapConverters[strConverterKey]; } else { int status = (int)ErrStatus.NoError; // load the map now Load(bReload); // is there no better way to do this? ushort eFormOut1 = System.Convert.ToUInt16((int)eOutEncodingForm); ushort eFormOut2 = System.Convert.ToUInt16((int)eNormalizeOutput); UInt16 eFormOut = System.Convert.ToUInt16(eFormOut1 | eFormOut2); // make a converter for this new combination. fixed(Int32* converter = &m_converter) { if( IsFileLoaded() ) { fixed(byte* pbyMapping = m_baMapping) { status = TECkit_CreateConverter( pbyMapping, m_nMapSize, (byte)((bForward) ? 1 : 0), System.Convert.ToUInt16((int)eInEncodingForm), eFormOut, (void*)converter ); } } else { status = TECkit_CreateConverter( (byte*)0, m_nMapSize, (byte)((bForward) ? 1 : 0), System.Convert.ToUInt16((int)eInEncodingForm), eFormOut, (void*)converter ); } } if( status == (int)ErrStatus.NoError ) { m_mapConverters[strConverterKey] = m_converter; } else EncConverters.ThrowError(status); } // since TEC can handle output normalization directly (by requesting it here // in the creation of the converter), reset the requesting flag so we won't // attempt to do it later (all other converters that can't do implicit output // normalization will *not* have reset the flag and then after their conversion, // if the flag is still set, we'll call TEC to do it for them see // ECNormalizeData.GetString). eNormalizeOutput = NormalizeFlags.None; }