protected override string InternalConvert ( EncodingForm eInEncodingForm, string sInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, bool bForward ) { // this routine is only called by one of the 'implicit' methods (e.g. // ConvertToUnicode). For these "COM" standard methods, the length of the // string is specified by the BSTR itself and always/only supports UTF-16-like // (i.e. wide) data. So, pass 0 so that the function will determine the length // from the BSTR itself (just in case the user happens to have a value of 0 in // the data (i.e. it won't necessarily be null terminated... don't ask... Int32 iOutput = 0; return(InternalConvertEx ( eInEncodingForm, sInput, 0, eOutEncodingForm, eNormalizeOutput, out iOutput, bForward )); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); eInFormEngine = EncodingForm.LegacyBytes; eOutFormEngine = EncodingForm.LegacyBytes; // do the load at this point. m_bForward = bForward; // keep track so we can see during DoConvert if (m_bForward) { LoadForward(); } else { LoadReverse(); } }
// Since each sub-class has to do basic input/output encoding format processing, they // should all mostly come thru this and the next functions. protected virtual string InternalConvert ( EncodingForm eInEncodingForm, string sInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, bool bForward ) { Util.DebugWriteLine(className, "BEGIN"); // this routine is only called by one of the 'implicit' methods (e.g. // ConvertToUnicode). For these "COM standard" methods, the length of the string // is specified by the BSTR itself and always/only supports UTF-16-like (i.e. wide) // data. So, pass 0 so that the function will determine the length from the BSTR // itself (just in case the user happens to have a value of 0 in the data (i.e. // it won't necessarily be null terminated... int ciOutput = 0; return(InternalConvertEx ( eInEncodingForm, sInput, 0, eOutEncodingForm, eNormalizeOutput, out ciOutput, bForward )); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // this converter only deals with 'String' flavors, so if it's // Unicode_to(_from)_Unicode, then we expect UTF-16 and if it's // Legacy_to(_from)_Legacy, then we expect LegacyString if (m_bLegacy) { eInFormEngine = eOutFormEngine = EncodingForm.LegacyString; } else { eInFormEngine = eOutFormEngine = EncodingForm.UTF16; } // the bForward that comes here might be different from the IEncConverter->DirectionForward // (if it came in from a call to ConvertEx), so use *this* value to determine the direction // for the forthcoming conversion (DoConvert). m_bReverseLookup = !bForward; // check to see if the file(s) need to be (re-)loaded at this point. Load(); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do its thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode) { // We could use UTF-8 here, but wide data works just fine. // the windows version definitely needs UTF16, but for some reason _MSC_VER is not defined on windows (as I think Jim is expecting) // so I'll use the opposite logic of 'if not mono'... #if !__MonoCS__ // _MSC_VER Util.DebugWriteLine(this, "eInFormEngine UTF16"); eInFormEngine = EncodingForm.UTF16; #else Util.DebugWriteLine(this, "eInFormEngine UTF32"); eInFormEngine = EncodingForm.UTF32; #endif } else { // legacy Util.DebugWriteLine(this, "eInFormEngine LegacyBytes"); eInFormEngine = EncodingForm.LegacyBytes; } if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode) { #if !__MonoCS__ // _MSC_VER Util.DebugWriteLine(this, "eOutFormEngine UTF16"); eOutFormEngine = EncodingForm.UTF16; #else Util.DebugWriteLine(this, "eOutFormEngine UTF32"); eOutFormEngine = EncodingForm.UTF32; #endif } else { Util.DebugWriteLine(this, "eOutFormEngine LegacyBytes"); eOutFormEngine = EncodingForm.LegacyBytes; } // do the load at this point Load(); // then do the C++ encoding form settings CppPreConvert((int)eInFormEngine, (int)eOutFormEngine, (int)eNormalizeOutput, bForward); }
// we override this method from EncConverter so that we can call all of the step's // convert functions in turn (i.e. for this one, it isn't sufficient to just // provide a "DoConvert" method) // and we override this from CmpdEncConverter to we can add our bit of only calling // the 2nd step (i.e. the fallback converter) if the 1st step doesn't change the // string. protected override string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { if (CountConverters != 2) { EncConverters.ThrowError(ErrStatus.FallbackTwoStepsRequired); } IEncConverter rConverter = (IEncConverter)m_aEncConverter[0]; if (rConverter == null) { EncConverters.ThrowError(ErrStatus.MissingConverter); } rConverter.Debug = Debug; bool bDirectionForward = (bForward) ? (bool)m_aDirectionForward[0] : !(bool)m_aDirectionForward[0]; string strOutput = rConverter.ConvertEx( sInput, eInEncodingForm, ciInput, eOutEncodingForm, out rciOutput, eNormalizeOutput, bDirectionForward); // call the fallback if the string wasn't changed if (strOutput == sInput) { IEncConverter rFallbackConverter = (IEncConverter)m_aEncConverter[1]; if (rFallbackConverter == null) { EncConverters.ThrowError(ErrStatus.MissingConverter); } rFallbackConverter.Debug = Debug; bDirectionForward = (bForward) ? (bool)m_aDirectionForward[1] : !(bool)m_aDirectionForward[1]; strOutput = rFallbackConverter.ConvertEx( sInput, eInEncodingForm, ciInput, eOutEncodingForm, out rciOutput, eNormalizeOutput, bDirectionForward); } return(strOutput); }
protected override unsafe void PreConvert( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode) { #if __MonoCS__ // returning this value will cause the input Unicode data (of any form, // UTF16, BE, etc.) to be converted to UTF8 narrow bytes before calling // DoConvert. eInFormEngine = EncodingForm.UTF8Bytes; #else eInFormEngine = EncodingForm.UTF16; #endif } else { // legacy eInFormEngine = EncodingForm.LegacyBytes; } if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode) { #if __MonoCS__ eOutFormEngine = EncodingForm.UTF8Bytes; #else eOutFormEngine = EncodingForm.UTF16; #endif } else { eOutFormEngine = EncodingForm.LegacyBytes; } // do the load at this point. Load(ConverterIdentifier); // Finally, let the C++ code do its thing. int encInForm = (int)eInEncodingForm; int encInEngine = (int)eInFormEngine; int encOutForm = (int)eOutEncodingForm; int encOutEngine = (int)eOutFormEngine; int normOutput = (int)eNormalizeOutput; CppPreconvert(encInForm, ref encInEngine, encOutForm, ref encOutEngine, ref normOutput, bForward, 0); eInFormEngine = (EncodingForm)encInEngine; eOutFormEngine = (EncodingForm)encOutEngine; eNormalizeOutput = (NormalizeFlags)normOutput; }
protected void CheckInitEncForms ( bool bForward, ref EncodingForm eInEncodingForm, ref EncodingForm eOutEncodingForm ) { // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 if (eInEncodingForm == EncodingForm.Unspecified) { NormConversionType eType; if (bForward) { eType = NormalizeLhsConversionType(m_eConversionType); } else { eType = NormalizeRhsConversionType(m_eConversionType); } if (eType == NormConversionType.eLegacy) { eInEncodingForm = EncodingForm.LegacyString; } else // eUnicode { eInEncodingForm = DefaultUnicodeEncForm(bForward, true); } } // do the same for the output form if (eOutEncodingForm == EncodingForm.Unspecified) { NormConversionType eType; if (bForward) { eType = NormalizeRhsConversionType(m_eConversionType); } else { eType = NormalizeLhsConversionType(m_eConversionType); } if (eType == NormConversionType.eLegacy) { eOutEncodingForm = EncodingForm.LegacyString; } else // eUnicode { eOutEncodingForm = DefaultUnicodeEncForm(bForward, false); } } CheckForBadForm(bForward, eInEncodingForm, eOutEncodingForm); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do its thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode) { if (Util.IsUnix) { // returning this value will cause the input Unicode data (of any form, UTF16, BE, etc.) // to be converted to UTF8 narrow bytes before calling DoConvert. eInFormEngine = EncodingForm.UTF8Bytes; } else { eInFormEngine = EncodingForm.UTF16; } } else { // legacy eInFormEngine = EncodingForm.LegacyBytes; } // Output will be stored in a typical C# string, so eOutFormEngine will be UTF16, // even though the Perl script is writing UTF8 bytes to output. if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode) { if (Util.IsUnix) { eOutFormEngine = EncodingForm.UTF8Bytes; } else { eOutFormEngine = EncodingForm.UTF16; } } else { eOutFormEngine = EncodingForm.LegacyBytes; } // do the load at this point. Load(ConverterIdentifier); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // we have to know what the forward flag state is (and we can't use m_bForward because // that might be different (e.g. if this was called from ConvertEx). m_bToWide = bForward; if (!IsLegacyFormat(eInEncodingForm) && IsLegacyFormat(eOutEncodingForm)) { m_bToWide = !bForward; } // check if this is the special UTF8 code page, and if so, request that the engine // form be UTF8Bytes (this is the one code page converter where both sides are // Unicode. if (m_bToWide) { // going "to wide" means the output form required by the engine is UTF16. eOutFormEngine = EncodingForm.UTF16; if (m_nCodePage == CP_UTF8) { eInFormEngine = EncodingForm.UTF8Bytes; } else { eInFormEngine = EncodingForm.LegacyBytes; } } else { // going "from wide" means the input form required by the engine is UTF16. eInFormEngine = EncodingForm.UTF16; if (m_nCodePage == CP_UTF8) { eOutFormEngine = EncodingForm.UTF8Bytes; } else if (IsLegacyFormat(eOutEncodingForm)) { eOutFormEngine = EncodingForm.LegacyString; } } }
protected override void PreConvert(EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward) { base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); _bForward = bForward; if (!IsLoaded) { Load(); } }
// we override this method from EncConverter so that we can call all of the step's // convert functions in turn (i.e. for this one, it isn't sufficient to just // provide a "DoConvert" method) // and we override this from CmpdEncConverter to we can add our bit of only calling // the 2nd step (i.e. the fallback converter) if the 1st step doesn't change the // string. protected override string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { if( CountConverters != 2 ) EncConverters.ThrowError(ErrStatus.FallbackTwoStepsRequired); IEncConverter rConverter = (IEncConverter)m_aEncConverter[0]; if (rConverter == null) EncConverters.ThrowError(ErrStatus.MissingConverter); rConverter.Debug = Debug; bool bDirectionForward = (bForward) ? (bool)m_aDirectionForward[0] : !(bool)m_aDirectionForward[0]; string strOutput = rConverter.ConvertEx( sInput, eInEncodingForm, ciInput, eOutEncodingForm, out rciOutput, eNormalizeOutput, bDirectionForward); // call the fallback if the string wasn't changed if( strOutput == sInput ) { IEncConverter rFallbackConverter = (IEncConverter)m_aEncConverter[1]; if (rFallbackConverter == null) EncConverters.ThrowError(ErrStatus.MissingConverter); rFallbackConverter.Debug = Debug; bDirectionForward = (bForward) ? (bool)m_aDirectionForward[1] : !(bool)m_aDirectionForward[1]; strOutput = rFallbackConverter.ConvertEx( sInput, eInEncodingForm, ciInput, eOutEncodingForm, out rciOutput, eNormalizeOutput, bDirectionForward); } return strOutput; }
protected bool m_bIsInRepository; // indicates whether this converter is in the static repository (true) or not (false) #endregion Member Variable Definitions #region Public Interface /// <summary> /// The class constructor. </summary> public EncConverter(string sProgId, string sImplementType) { m_strProgramID = sProgId; m_strImplementType = sImplementType; m_lProcessType = (Int32)ProcessTypeFlags.DontKnow; m_eConversionType = ConvType.Legacy_to_from_Unicode; m_bForward = true; m_eEncodingInput = EncodingForm.Unspecified; m_eEncodingOutput = EncodingForm.Unspecified; m_eNormalizeOutput = NormalizeFlags.None; m_nCodePageInput = 0; m_nCodePageOutput = 0; m_bDebugDisplayMode = false; m_bIsInRepository = false; }
protected virtual void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // by default, the form it comes in is okay for the engine (never really true, so // each engine's COM wrapper must override this; but this is here to see what you // must do). For example, for CC, the input must be UTF8Bytes for Unicode, so // you'd set the eInFormEngine to UTF8Bytes. eInFormEngine = eInEncodingForm; eOutFormEngine = eOutEncodingForm; }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // The CC DLL (conversion engine) usually works in UTF8 for Unicode. As a future // enhancement, it might be possible to get a (marked) value from the repository // telling us what form to use (which would be UTF8Bytes by default and could be // something else if the user developed a UTF32 cc table--using the xYYYY syntax // rather than the uXXXX syntax). But for now, assume that all CC tables that // use Unicode want UTF8. if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode) { // returning this value will cause the input Unicode data (of any form, UTF16, BE, etc.) // to be converted to UTF8 narrow bytes before calling DoConvert. eInFormEngine = EncodingForm.UTF8Bytes; } else { // legacy eInFormEngine = EncodingForm.LegacyBytes; } if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode) { eOutFormEngine = EncodingForm.UTF8Bytes; } else { eOutFormEngine = EncodingForm.LegacyBytes; } // do the load at this point. Load(ConverterIdentifier); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do its thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // do the load at this point. Load(ConverterIdentifier); }
// This function is the meat of the conversion process. It is really long, which // normally wouldn't be a virtue (especially as an "in-line" function), but in an // effort to save memory fragmentation by using stack memory to buffer the input // and output data, I'm using the alloca memory allocation function. Because of this // it can't be allocated in some subroutine and returned to a calling program (or the // stack will have erased them), so it has to be one big fat long function... // The basic structure is: // // o Check Input Data // o Give the sub-class (via PreConvert) the opportunity to load tables and do // any special preprocessing it needs to ahead of the actual conversion // o Possibly call the TECkit COM interface to convert Unicode flavors that the // engine (for this conversion) might not support (indicated via PreConvert) // o Normalize the input data to a byte array based on it's input EncodingForm // o Allocate (on the stack) a buffer for the output data (min 10000 bytes) // o Call the subclass (via DoConvert) to do the actual conversion. // o Normalize the output data to match the requested output EncodingForm (including // possibly calling the TECkit COM interface). // o Return the resultant BSTR and size of items to the output pointer variables. // protected virtual unsafe string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { if( sInput == null ) EncConverters.ThrowError(ErrStatus.IncompleteChar); // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms ( bForward, ref eInEncodingForm, ref eOutEncodingForm ); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert ( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward ); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed (byte* lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed (byte* lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); return ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode); } } }
// [DispId(18)] public virtual string ConvertEx(string sInput, EncodingForm inEnc, int ciInput, EncodingForm outEnc, out int ciOutput, NormalizeFlags eNormalizeOutput, bool bForward) { return(InternalConvertEx(inEnc, sInput, ciInput, outEnc, eNormalizeOutput, out ciOutput, bForward)); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert( eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // we have to know what the forward flag state is (and we can't use m_bForward because // that might be different (e.g. if this was called from ConvertEx). m_bToWide = bForward; // check if this is the special UTF8 code page, and if so, request that the engine // form be UTF8Bytes (this is the one code page converter where both sides are // Unicode. if( m_bToWide ) { // going "to wide" means the output form required by the engine is UTF16. eOutFormEngine = EncodingForm.UTF16; if( m_nCodePage == CP_UTF8 ) eInFormEngine = EncodingForm.UTF8Bytes; } else { // going "from wide" means the input form required by the engine is UTF16. eInFormEngine = EncodingForm.UTF16; if( m_nCodePage == CP_UTF8 ) eOutFormEngine = EncodingForm.UTF8Bytes; } }
// this is the helper method that returns the input data normalized internal static unsafe byte* GetBytes(string strInput, int cnCountIn, EncodingForm eEncFormIn, int nCodePageIn, EncodingForm eFormEngineIn, byte* pBuf, ref int nBufSize, ref bool bDebugDisplayMode) { // if the form the user gave is not what the engine wants (and it isn't legacy // since legacy forms are already handled later)... if ((eEncFormIn != eFormEngineIn) && !EncConverter.IsLegacyFormat(eEncFormIn)) { // we can do some of the conversions ourself. For example, if the input form // is UTF16 and the desired form is UTF8, then simply use CCUnicode8 below if ((eEncFormIn == EncodingForm.UTF16) && (eFormEngineIn == EncodingForm.UTF8Bytes)) { eEncFormIn = (EncodingForm)CCUnicode8; } // we can also do the following one else if ((eEncFormIn == EncodingForm.UTF8String) && (eFormEngineIn == EncodingForm.UTF8Bytes)) { ; // i.e. don't have TECkit do this one... } else { strInput = EncConverters.UnicodeEncodingFormConvertEx(strInput, eEncFormIn, cnCountIn, eFormEngineIn, NormalizeFlags.None, out cnCountIn); eEncFormIn = eFormEngineIn; } } int nInLen = 0; switch (eEncFormIn) { case EncodingForm.LegacyBytes: case EncodingForm.UTF8Bytes: { if (cnCountIn != 0) nInLen = cnCountIn; // item count should be the number of bytes directly. else // if the user didn't give the length (i.e. via ConvertEx), then get it // from the BSTR length. nInLen will be the # of bytes. nInLen = strInput.Length * 2; // these forms are for C++ apps that want to use the BSTR to transfer // bytes rather than OLECHARs. nInLen = StringToByteStar(strInput, pBuf, nInLen); if (eEncFormIn == EncodingForm.LegacyBytes) DisplayDebugCharValues(pBuf, nInLen, "Received (LegacyBytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode); else DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Received (UTF8Bytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode); break; } case EncodingForm.LegacyString: { if (cnCountIn != 0) nInLen = cnCountIn; // item count should be the number of bytes directly (after conversion below). else nInLen = strInput.Length; // the # of bytes will *be* the # of chars in the string after we're done. DisplayDebugUCharValues(strInput, "Received (LegacyString) from client...", ref bDebugDisplayMode); // use a code page converter to narrowize using the input string // (but the 'easier' Add method will send 0; if so, then // fallback to the original method. byte[] ba = null; // first check if it's a symbol font (sometimes the user // incorrectly sends a few spaces first, so check the // first couple of bytes. If it is (and the code page is 0), then // change the code page to be CP_SYMBOL if ((nCodePageIn == 0) && (((strInput[0] & 0xF000) == 0xF000) || ((strInput.Length > 1) && ((strInput[1] & 0xF000) == 0xF000)) || ((strInput.Length > 2) && ((strInput[2] & 0xF000) == 0xF000)) ) ) { nCodePageIn = EncConverters.cnSymbolFontCodePage; } // if it's a symbol or iso-8859 encoding, then we can handle just // taking the low byte (i.e. the catch case) if ((nCodePageIn == EncConverters.cnSymbolFontCodePage) || (nCodePageIn == EncConverters.cnIso8859_1CodePage) ) { try { Encoding enc = Encoding.GetEncoding(nCodePageIn); ba = enc.GetBytes(strInput); } catch { // for some reason, symbol fonts don't appear to be supported in // .Net... Use cpIso8859 as the fallback // oops: cp8859 won't work for symbol data, so if GetBytes // fails, just go back to stripping out the low byte as we had it // originally. This'll work for both 8859 and symbol ba = new byte[nInLen]; for (int i = 0; i < nInLen; i++) ba[i] = (byte)(strInput[i] & 0xFF); } } else { // otherwise, simply use CP_ACP (or the default code page) to // narrowize it. Encoding enc = Encoding.GetEncoding(nCodePageIn); ba = enc.GetBytes(strInput); } // turn that byte array into a byte array... ByteArrToByteStar(ba, pBuf); if (cnCountIn != 0) nInLen = cnCountIn; // item count should be the number of bytes directly. else // if the user didn't give the length (i.e. via ConvertEx), then get it // from the BSTR length. nInLen will be the # of bytes. nInLen = ba.Length; DisplayDebugCharValues(pBuf, nInLen, "Sending (LegacyBytes) to Converter/DLL...", ref bDebugDisplayMode); break; } // this following form *must* be widened UTF8 via the default code page case EncodingForm.UTF8String: { DisplayDebugUCharValues(strInput, "Received (UTF8String) from client...", ref bDebugDisplayMode); // use a code page converter to narrowize using the input string Encoding enc = Encoding.Default; byte[] ba = enc.GetBytes(strInput); // turn that byte array into a byte array... ByteArrToByteStar(ba, pBuf); if (cnCountIn != 0) nInLen = cnCountIn; // item count should be the number of bytes directly. else // if the user didn't give the length (i.e. via ConvertEx), then get it // from the BSTR length. nInLen will be the # of bytes. nInLen = ba.Length; DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode); break; } // this is a special case for CC where the input was actually UTF16, but the // CC DLL is expecting (usually) UTF8, so convert from UTF16->UTF8 narrow case (EncodingForm)CCUnicode8: { DisplayDebugUCharValues(strInput, "Received (UTF16) from client...", ref bDebugDisplayMode); UTF8Encoding enc = new UTF8Encoding(); byte[] ba = enc.GetBytes(strInput); // turn that byte array into a byte array... ByteArrToByteStar(ba, pBuf); // since we've changed the format, we don't care how many UTF16 words came in nInLen = ba.Length; DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode); break; } case EncodingForm.UTF16: { if (cnCountIn != 0) nInLen = cnCountIn; // item count should be the number of 16-bit words directly else nInLen = strInput.Length; DisplayDebugUCharValues(strInput, "Received (UTF16) from client and sending to Converter/DLL...", ref bDebugDisplayMode); // but this should be the count of bytes... nInLen *= 2; StringToByteStar(strInput, pBuf, nInLen); break; } case EncodingForm.UTF16BE: case EncodingForm.UTF32: case EncodingForm.UTF32BE: { if (cnCountIn != 0) { nInLen = cnCountIn; // item count is the number of Uni chars // for UTF32, the converter's actually expecting the length to be twice // this much again. if (eEncFormIn != EncodingForm.UTF16BE) nInLen *= 2; } else { nInLen = strInput.Length; } DisplayDebugUCharValues(pBuf, nInLen, "Received (UTF16BE/32/32BE) from client/Sending to Converter/DLL...", ref bDebugDisplayMode); // for the byte count, double it (possibly again) nInLen *= 2; StringToByteStar(strInput, pBuf, nInLen); break; } default: EncConverters.ThrowError(ErrStatus.InEncFormNotSupported); break; } pBuf[nInLen] = pBuf[nInLen + 1] = pBuf[nInLen + 2] = pBuf[nInLen + 3] = 0; nBufSize = (int)nInLen; return pBuf; }
public static bool IsLegacyFormat(EncodingForm eForm) { return((eForm == EncodingForm.LegacyString) || (eForm == EncodingForm.LegacyBytes)); }
// This function is the meat of the conversion process. It is really long, which // normally wouldn't be a virtue (especially as an "in-line" function), but in an // effort to save memory fragmentation by using stack memory to buffer the input // and output data, I'm using the alloca memory allocation function. Because of this // it can't be allocated in some subroutine and returned to a calling program (or the // stack will have erased them), so it has to be one big fat long function... // The basic structure is: // // o Check Input Data // o Give the sub-class (via PreConvert) the opportunity to load tables and do // any special preprocessing it needs to ahead of the actual conversion // o Possibly call the TECkit COM interface to convert Unicode flavors that the // engine (for this conversion) might not support (indicated via PreConvert) // o Normalize the input data to a byte array based on it's input EncodingForm // o Allocate (on the stack) a buffer for the output data (min 10000 bytes) // o Call the subclass (via DoConvert) to do the actual conversion. // o Normalize the output data to match the requested output EncodingForm (including // possibly calling the TECkit COM interface). // o Return the resultant BSTR and size of items to the output pointer variables. // protected virtual unsafe string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { Util.DebugWriteLine(className, "BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (sInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } if (sInput.Length == 0) { rciOutput = 0; return(""); } #if DEBUG && __MonoCS__ // for debugging only BEGIN //byte[] baIn = System.Text.Encoding.UTF8.GetBytes(sInput); // works byte[] baIn = System.Text.Encoding.BigEndianUnicode.GetBytes(sInput); // easier to read Util.DebugWriteLine(className, Util.getDisplayBytes("Input BigEndianUnicode", baIn)); baIn = System.Text.Encoding.Unicode.GetBytes(sInput); Util.DebugWriteLine(className, Util.getDisplayBytes("Input Unicode", baIn)); int nInLen = sInput.Length; byte [] baIn2 = new byte[nInLen]; for (int i = 0; i < nInLen; i++) { baIn2[i] = (byte)(sInput[i] & 0xFF); } Util.DebugWriteLine(className, Util.getDisplayBytes("Input Narrowized", baIn2)); /* * System.Text.Encoding encFrom = System.Text.Encoding.GetEncoding(12000); * System.Text.Encoding encTo = System.Text.Encoding.UTF8; * * // Perform the conversion from one encoding to the other. * Util.DebugWriteLine(className, "Starting with " + baIn.Length.ToString() + " bytes."); * byte[] baOut2 = System.Text.Encoding.Convert(encFrom, encTo, baIn); * Util.DebugWriteLine(className, "Converted to " + baOut2.Length.ToString() + " bytes."); * string resultString = System.Text.Encoding.Default.GetString(baOut2, 0, baOut2.Length); * Util.DebugWriteLine(className, "Test output '" + resultString + "'"); */ // for debugging only END #endif // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms ( bForward, ref eInEncodingForm, ref eOutEncodingForm ); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert ( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward ); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed(byte *lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine Util.DebugWriteLine(className, "Calling GetBytes"); ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); #if DEBUG && __MonoCS__ byte[] baOut = new byte[nBufSize]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nBufSize, baOut); Util.DebugWriteLine(className, Util.getDisplayBytes("Input Bytes", baOut)); #endif // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); #if DEBUG && __MonoCS__ Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); byte[] baOut2 = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut2); Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut2)); Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut2) + "'"); #endif string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode); #if DEBUG && __MonoCS__ Util.DebugWriteLine(className, "normalized result '" + result + "'"); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult)); baResult = System.Text.Encoding.Unicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult)); baResult = System.Text.Encoding.UTF8.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult)); Util.DebugWriteLine(className, "Returning."); #endif return(result); } } }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do its thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode) { // We could use UTF-8 here, but wide data works just fine. // the Windows version definitely needs UTF16. if (Util.IsUnix) { Util.DebugWriteLine(this, "eInFormEngine UTF32"); eInFormEngine = EncodingForm.UTF32; } else { Util.DebugWriteLine(this, "eInFormEngine UTF16"); eInFormEngine = EncodingForm.UTF16; } } else { // legacy Util.DebugWriteLine(this, "eInFormEngine LegacyBytes"); eInFormEngine = EncodingForm.LegacyBytes; } if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode) { if (Util.IsUnix) { Util.DebugWriteLine(this, "eOutFormEngine UTF32"); eOutFormEngine = EncodingForm.UTF32; } else { Util.DebugWriteLine(this, "eOutFormEngine UTF16"); eOutFormEngine = EncodingForm.UTF16; } } else { Util.DebugWriteLine(this, "eOutFormEngine LegacyBytes"); eOutFormEngine = EncodingForm.LegacyBytes; } // do the load at this point Load(); // then do the C++ encoding form settings CppPreConvert((int)eInFormEngine, (int)eOutFormEngine, (int)eNormalizeOutput, bForward); }
protected void CheckForBadForm ( bool bForward, EncodingForm inEnc, EncodingForm outEnc ) { if( EncConverters.IsUnidirectional(m_eConversionType) && !bForward ) { EncConverters.ThrowError(ErrStatus.InvalidConversionType); } else { bool bLhsUnicode = (NormalizeLhsConversionType(m_eConversionType) == NormConversionType.eUnicode); bool bRhsUnicode = (NormalizeRhsConversionType(m_eConversionType) == NormConversionType.eUnicode); if( bForward ) { if( bLhsUnicode ) { if( IsLegacyFormat(inEnc) ) EncConverters.ThrowError(ErrStatus.InEncFormNotSupported); } else // !bLhsUnicode { if( !IsLegacyFormat(inEnc) ) EncConverters.ThrowError(ErrStatus.InEncFormNotSupported); } if( bRhsUnicode ) { if( IsLegacyFormat(outEnc) ) EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); } else // !bRhsUnicode { if( !IsLegacyFormat(outEnc) ) EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); } } else // reverse { if( bLhsUnicode ) { if( IsLegacyFormat(outEnc) ) EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); } else // !bLhsUnicode { if( !IsLegacyFormat(outEnc) ) EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); } if( bRhsUnicode ) { if( IsLegacyFormat(inEnc) ) EncConverters.ThrowError(ErrStatus.InEncFormNotSupported); } else // !bRhsUnicode { if( !IsLegacyFormat(inEnc) ) EncConverters.ThrowError(ErrStatus.InEncFormNotSupported); } } } }
// This function is the meat of the conversion process. It is really long, which // normally wouldn't be a virtue (especially as an "in-line" function), but in an // effort to save memory fragmentation by using stack memory to buffer the input // and output data, I'm using the alloca memory allocation function. Because of this // it can't be allocated in some subroutine and returned to a calling program (or the // stack will have erased them), so it has to be one big fat long function... // The basic structure is: // // o Check Input Data // o Give the sub-class (via PreConvert) the opportunity to load tables and do // any special preprocessing it needs to ahead of the actual conversion // o Possibly call the TECkit COM interface to convert Unicode flavors that the // engine (for this conversion) might not support (indicated via PreConvert) // o Normalize the input data to a byte array based on it's input EncodingForm // o Allocate (on the stack) a buffer for the output data (min 10000 bytes) // o Call the subclass (via DoConvert) to do the actual conversion. // o Normalize the output data to match the requested output EncodingForm (including // possibly calling the TECkit COM interface). // o Return the resultant BSTR and size of items to the output pointer variables. // protected virtual unsafe string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { if (sInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms ( bForward, ref eInEncodingForm, ref eOutEncodingForm ); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert ( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward ); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed(byte *lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); return(ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode)); } } }
internal static unsafe string GetString(byte *lpOutBuffer, int nOutLen, EncodingForm eOutEncodingForm, int nCodePageOut, EncodingForm eFormEngineOut, NormalizeFlags eNormalizeOutput, out int rciOutput, ref bool bDebugDisplayMode) { // null terminate the output and turn it into a (real) array of bytes Util.DebugWriteLine(className, "BEGIN"); lpOutBuffer[nOutLen] = lpOutBuffer[nOutLen + 1] = lpOutBuffer[nOutLen + 2] = lpOutBuffer[nOutLen + 3] = 0; byte[] baOut = new byte[nOutLen]; ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); Util.DebugWriteLine(className, Util.getDisplayBytes("byte array", baOut)); // check to see if the engine handled the given output form. If not, then see // if it's a conversion we can easily do (otherwise we'll ask TEC to do the // conversion for us (later) so that all engines can handle all possible // output encoding forms. Util.DebugWriteLine(className, "eOutEncodingForm " + eOutEncodingForm.ToString() + ", " + "eFormEngineOut " + eFormEngineOut.ToString()); if (eOutEncodingForm != eFormEngineOut) { if (EncConverter.IsLegacyFormat(eOutEncodingForm)) { if ((eFormEngineOut == EncodingForm.LegacyBytes) && (eOutEncodingForm == EncodingForm.LegacyString)) { // in this case, just *pretend* the engine outputs LegacyString (the // LegacyString case below really means "convert LegacyBytes to // LegacyString) eFormEngineOut = eOutEncodingForm; } } else // unicode forms { // if the engine gives UTF8 and the client wants UTF16... if ((eOutEncodingForm == EncodingForm.UTF16) && (eFormEngineOut == EncodingForm.UTF8Bytes)) { // use the special form to convert it below Util.DebugWriteLine(className, "using CCUnicode8"); eOutEncodingForm = eFormEngineOut = (EncodingForm)CCUnicode8; } // or vise versa else if ((eFormEngineOut == EncodingForm.UTF16) && ((eOutEncodingForm == EncodingForm.UTF8Bytes) || (eOutEncodingForm == EncodingForm.UTF8String))) { // engine gave UTF16, but user wants a UTF8 flavor. // Decoder d = Encoding.Unicode.GetChars(baOut); // d.GetChars( UTF8Encoding enc = new UTF8Encoding(); baOut = enc.GetBytes(Encoding.Unicode.GetChars(baOut)); eFormEngineOut = eOutEncodingForm; nOutLen = baOut.Length; } // these conversions we can do ourself else if ((eOutEncodingForm == EncodingForm.UTF8String) || (eOutEncodingForm == EncodingForm.UTF16)) { #if _MSC_VER // Doesn't this wipe out the distinction? // On Linux we need to be able to convert the output from UTF32 to UTF16. eFormEngineOut = eOutEncodingForm; #endif } } } int nItems = 0, nCharsLen = 0; char[] caOut = null; switch (eFormEngineOut) { case EncodingForm.LegacyBytes: case EncodingForm.UTF8Bytes: { if (eFormEngineOut == EncodingForm.LegacyBytes) { DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyBytes)...", ref bDebugDisplayMode); } else { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8Bytes)...", ref bDebugDisplayMode); } // stuff the returned 'bytes' into the BSTR as narrow characters rather than // converting to wide nItems = nOutLen; nCharsLen = (nOutLen + 1) / 2; caOut = new char[nCharsLen]; ByteArrToCharArr(baOut, caOut); break; } case EncodingForm.LegacyString: { DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyString)...", ref bDebugDisplayMode); nCharsLen = nItems = nOutLen; #if __MonoCS__ // Narrowizing by code page 0 doesn't seem to be what we want on Linux. // Treating it as a symbol font or stripping off the low byte works better. if (nCodePageOut == 0) { caOut = BruteForceWiden(nCodePageOut, baOut, nCharsLen); } else #else if (true) #endif { try { // this will throw (for some reason) when doing symbol fonts // (apparently, CP_SYMBOL is no longer supported). caOut = Encoding.GetEncoding(nCodePageOut).GetChars(baOut); } catch { if ((nCodePageOut == EncConverters.cnSymbolFontCodePage) || (nCodePageOut == EncConverters.cnIso8859_1CodePage)) { caOut = BruteForceWiden(nCodePageOut, baOut, nCharsLen); } else { throw; } } } break; } case EncodingForm.UTF16: { nCharsLen = nItems = (nOutLen / 2); DisplayDebugUCharValues(baOut, "Received (UTF16) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode); caOut = Encoding.Unicode.GetChars(baOut); break; } case EncodingForm.UTF8String: { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8String)...", ref bDebugDisplayMode); // this encoding form is always encoded using the default code page. caOut = Encoding.Default.GetChars(baOut); nCharsLen = nItems = nOutLen; break; } case (EncodingForm)CCUnicode8: { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode); caOut = Encoding.UTF8.GetChars(baOut); nCharsLen = nItems = caOut.Length; break; } case EncodingForm.UTF16BE: case EncodingForm.UTF32: case EncodingForm.UTF32BE: { nCharsLen = nItems = nOutLen / 2; DisplayDebugUCharValues(baOut, "Received (UTF16BE/32/32BE) back from Converter/DLL...", ref bDebugDisplayMode); caOut = new char[nCharsLen]; ByteArrToCharArr(baOut, caOut); // for UTF32, it is half again as little in the item count. if (eFormEngineOut != EncodingForm.UTF16BE) { nItems /= 2; } break; } default: EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); break; } #if !v22_AllowEmptyReturn if ((nCharsLen <= 0) #if DEBUG || (nCharsLen != caOut.Length) #endif ) { EncConverters.ThrowError(ErrStatus.NoReturnDataBadOutForm); } #endif // check to see if the engine handled the given output form. If not, then ask // TEC to do the conversion for us so that all engines can handle all possible // output encoding forms (e.g. caller requested utf32, but above CC could only // give us utf16/8) // Also, if the caller wanted something other than "None" for the eNormalizeOutput, // then we also have to call TEC for that as well (but I think this only makes // sense if the output is utf16(be) or utf32(be)) // p.s. if this had been a TEC converter, then the eNormalizeOutput flag would // ahready have been reset to None (by this point), since we would have directly // requested that normalized form when we created the converter--see // TecEncConverter.PreConvert) string strOutput = new string(caOut); #if DEBUG byte[] byteArray = Encoding.BigEndianUnicode.GetBytes(caOut); Util.DebugWriteLine(className, Util.getDisplayBytes("characters", byteArray)); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(strOutput); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized strOutput in UTF16BE", baResult)); #endif if ((eFormEngineOut != eOutEncodingForm) || (eNormalizeOutput != NormalizeFlags.None)) { strOutput = EncConverters.UnicodeEncodingFormConvertEx(strOutput, eFormEngineOut, nItems, eOutEncodingForm, eNormalizeOutput, out nItems); } DisplayDebugUCharValues(strOutput, "Returning back to client...", ref bDebugDisplayMode); rciOutput = nItems; return(strOutput); }
public static bool IsLegacyFormat(EncodingForm eForm) { return ((eForm == EncodingForm.LegacyString) || (eForm == EncodingForm.LegacyBytes)); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); eInFormEngine = EncodingForm.LegacyBytes; eOutFormEngine = EncodingForm.LegacyBytes; // do the load at this point. m_bForward = bForward; // keep track so we can see during DoConvert if (m_bForward) LoadForward(); else LoadReverse(); }
/// legacy data as a byte array as input, we need to treat it as a byte array. /// </summary> protected virtual unsafe string InternalConvertEx(EncodingForm eInEncodingForm, byte[] baInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward) { Util.DebugWriteLine(className, "(input bytes) BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (baInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } if (baInput.Length == 0) { rciOutput = 0; return(""); } // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward); int nBufSize = baInput.Length; fixed(byte *lpInBuffer = baInput) { int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); byte[] baOut = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); #if DEBUG Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut)); Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut) + "'"); #endif string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode); #if DEBUG Util.DebugWriteLine(className, "normalized result '" + result + "'"); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult)); baResult = System.Text.Encoding.Unicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult)); baResult = System.Text.Encoding.UTF8.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult)); Util.DebugWriteLine(className, "Returning."); #endif return(result); } } }
/// <summary> /// If we're returning legacy data as a byte array, we need to return it as a byte array. /// </summary> /// <returns> protected virtual unsafe byte[] InternalConvertEx(EncodingForm eInEncodingForm, string sInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward) { Util.DebugWriteLine(className, "(output bytes) BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (sInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } Util.DebugWriteLine(className, "sInput.Length() is " + sInput.Length.ToString() + "."); if (sInput.Length == 0) { // this section added 11/10/2011 by Jim K rciOutput = 0; return(new byte[0]); } Util.DebugWriteLine(className, "sInput is " + sInput + "."); // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed(byte *lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine Util.DebugWriteLine(className, "Calling GetBytes"); ECNormalizeData.GetBytes(sInput, sInput.Length, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); byte[] baOut = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); #if DEBUG Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut)); Util.DebugWriteLine(className, "Returning."); #endif rciOutput = nOutLen; return(baOut); } } }
protected void CheckInitEncForms ( bool bForward, ref EncodingForm eInEncodingForm, ref EncodingForm eOutEncodingForm ) { // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 if( eInEncodingForm == EncodingForm.Unspecified ) { NormConversionType eType; if( bForward ) eType = NormalizeLhsConversionType(m_eConversionType); else eType = NormalizeRhsConversionType(m_eConversionType); if( eType == NormConversionType.eLegacy ) eInEncodingForm = EncodingForm.LegacyString; else // eUnicode eInEncodingForm = DefaultUnicodeEncForm(bForward,true); } // do the same for the output form if( eOutEncodingForm == EncodingForm.Unspecified ) { NormConversionType eType; if( bForward ) eType = NormalizeRhsConversionType(m_eConversionType); else eType = NormalizeLhsConversionType(m_eConversionType); if( eType == NormConversionType.eLegacy ) eOutEncodingForm = EncodingForm.LegacyString; else // eUnicode eOutEncodingForm = DefaultUnicodeEncForm(bForward,false); } CheckForBadForm(bForward, eInEncodingForm, eOutEncodingForm); }
protected void CheckForBadForm ( bool bForward, EncodingForm inEnc, EncodingForm outEnc ) { if (EncConverters.IsUnidirectional(m_eConversionType) && !bForward) { EncConverters.ThrowError(ErrStatus.InvalidConversionType); } else { bool bLhsUnicode = (NormalizeLhsConversionType(m_eConversionType) == NormConversionType.eUnicode); bool bRhsUnicode = (NormalizeRhsConversionType(m_eConversionType) == NormConversionType.eUnicode); if (bForward) { if (bLhsUnicode) { if (IsLegacyFormat(inEnc)) { EncConverters.ThrowError(ErrStatus.InEncFormNotSupported); } } else // !bLhsUnicode { if (!IsLegacyFormat(inEnc)) { EncConverters.ThrowError(ErrStatus.InEncFormNotSupported); } } if (bRhsUnicode) { if (IsLegacyFormat(outEnc)) { EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); } } else // !bRhsUnicode { if (!IsLegacyFormat(outEnc)) { EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); } } } else // reverse { if (bLhsUnicode) { if (IsLegacyFormat(outEnc)) { EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); } } else // !bLhsUnicode { if (!IsLegacyFormat(outEnc)) { EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); } } if (bRhsUnicode) { if (IsLegacyFormat(inEnc)) { EncConverters.ThrowError(ErrStatus.InEncFormNotSupported); } } else // !bRhsUnicode { if (!IsLegacyFormat(inEnc)) { EncConverters.ThrowError(ErrStatus.InEncFormNotSupported); } } } } }
// we override this method from EncConverter so that we can call all of the step's // convert functions in turn (i.e. for this one, it isn't sufficient to just // provide a "DoConvert" method) protected override string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { // setup common items for both directions. int nSize = m_aEncConverter.Count; string strOutput = null; EncodingForm inForm = eInEncodingForm; Int32 ciOutput = 0; IEncConverter rConverter; NormalizeFlags eNormalizeFlags; ConvType eConversionType; EncodingForm outForm; NormConversionType eType; bool bDirectionForward; int i; try { if (bForward) { for (i = 0; i < nSize; i++) { rConverter = (IEncConverter)m_aEncConverter[i]; if (rConverter == null) { EncConverters.ThrowError(ErrStatus.MissingConverter); } rConverter.Debug = Debug; eNormalizeFlags = (NormalizeFlags)m_aNormalizeOutput[i]; if (i == (nSize - 1)) { eNormalizeFlags = eNormalizeOutput; } eConversionType = rConverter.ConversionType; bDirectionForward = (bool)m_aDirectionForward[i]; // if this is the last one, then use the user's requested output format if (i == (nSize - 1)) { outForm = eOutEncodingForm; } else { if (bDirectionForward) { eType = NormalizeRhsConversionType(eConversionType); } else { eType = NormalizeLhsConversionType(eConversionType); } if (eType == NormConversionType.eLegacy) { outForm = EncodingForm.LegacyBytes; } else { outForm = EncodingForm.Unspecified; } } strOutput = rConverter.ConvertEx( sInput, inForm, ciInput, outForm, out ciOutput, eNormalizeFlags, bDirectionForward); // setup input for the next step sInput = strOutput; inForm = outForm; ciInput = ciOutput; // it's possible the user cancelled the debug mode so get it back Debug = rConverter.Debug; } } else // reverse { for (i = nSize - 1; i >= 0; i--) { rConverter = (IEncConverter)m_aEncConverter[i]; if (rConverter == null) { EncConverters.ThrowError(ErrStatus.MissingConverter); } rConverter.Debug = Debug; eNormalizeFlags = (NormalizeFlags)m_aNormalizeOutput[i]; if (i == 0) { eNormalizeFlags = eNormalizeOutput; } eConversionType = rConverter.ConversionType; // the direction is the opposite of what the user said in // reverse mode. bDirectionForward = !(bool)m_aDirectionForward[i]; // if this is the last one, then use the user's requested output format if (i == 0) { outForm = eOutEncodingForm; } else { if (bDirectionForward) { eType = NormalizeRhsConversionType(eConversionType); } else { eType = NormalizeRhsConversionType(eConversionType); } if (eType == NormConversionType.eLegacy) { outForm = EncodingForm.LegacyBytes; } else { outForm = EncodingForm.Unspecified; } } strOutput = rConverter.ConvertEx( sInput, inForm, ciInput, outForm, out ciOutput, eNormalizeFlags, bDirectionForward); // setup input for the next step sInput = strOutput; inForm = outForm; ciInput = ciOutput; // it's possible the user cancelled the debug mode so get it back Debug = rConverter.Debug; } } } catch (ApplicationException e) { throw e; } rciOutput = ciOutput; return(strOutput); }
// [DispId(18)] public virtual string ConvertEx(string sInput, EncodingForm inEnc, int ciInput, EncodingForm outEnc, out int ciOutput, NormalizeFlags eNormalizeOutput, bool bForward) { return InternalConvertEx(inEnc, sInput, ciInput, outEnc, eNormalizeOutput, out ciOutput, bForward); }
protected unsafe override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // If the user uses one of the *Byte forms, change that to the *String forms so // the value matches what the TECkit engine is expecting (that is, the TECkit // engine is expecting a value of '1' (=LegacyString) even if it comes in as // LegacyBytes). It'll still get converted correctly later, but when create the // the TECkit "converter" object, which happens during here, it is expecting // to see the other value. if (eInEncodingForm == EncodingForm.LegacyBytes) { eInEncodingForm = EncodingForm.LegacyString; } else if (eInEncodingForm == EncodingForm.UTF8Bytes) { eInEncodingForm = EncodingForm.UTF8String; } if (eOutEncodingForm == EncodingForm.LegacyBytes) { eOutEncodingForm = EncodingForm.LegacyString; } else if (eOutEncodingForm == EncodingForm.UTF8Bytes) { eOutEncodingForm = EncodingForm.UTF8String; } // See if we have a converter already for this combination or whether we need to make a // new one string strConverterKey = eInEncodingForm.ToString() + eOutEncodingForm.ToString() + eNormalizeOutput.ToString() + bForward.ToString(); // If this is a compilable map (i.e. ImplType SIL.map), then see if the map file has changed bool bReload = false; if (m_bCompileable && !String.IsNullOrEmpty(m_strMapFileSpec)) { // first make sure it's there and get the last time it was modified DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it. if (!DoesFileExist(m_strMapFileSpec, ref timeModified)) { EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strMapFileSpec); } // if it has been modified or it's not already loaded... if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey)) { // ... just remove this key if it existed (so we fall thru and do Load) ResetConverter((IntPtr)m_mapConverters[strConverterKey]); m_mapConverters.Remove(strConverterKey); bReload = true; } } else if (IsFileLoaded()) { // the tec file could also have changed out from underneath us (in which case we'd need to reload it). DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it. if (!DoesFileExist(m_strTecFileSpec, ref timeModified)) { EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strTecFileSpec); } // if it has been modified or it's not already loaded... if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey)) { m_baMapping = null; // triggers a reload m_lhsFlags = m_rhsFlags = 0; // ... just remove this key if it existed (so we fall thru and do Load) ResetConverter((IntPtr)m_mapConverters[strConverterKey]); m_mapConverters.Remove(strConverterKey); bReload = true; } } if (m_mapConverters.ContainsKey(strConverterKey)) { m_converter = (IntPtr)m_mapConverters[strConverterKey]; } else { int status = (int)ErrStatus.NoError; // load the map now Load(bReload); // is there no better way to do this? ushort eFormOut1 = System.Convert.ToUInt16((int)eOutEncodingForm); ushort eFormOut2 = System.Convert.ToUInt16((int)eNormalizeOutput); UInt16 eFormOut = System.Convert.ToUInt16(eFormOut1 | eFormOut2); // make a converter for this new combination. Util.DebugWriteLine(this, "Creating TECkit converter: in " + eInEncodingForm.ToString() + ", out " + eOutEncodingForm.ToString()); if (IsFileLoaded()) { fixed(byte *pbyMapping = m_baMapping) { status = TECkit_CreateConverter( pbyMapping, m_nMapSize, (byte)((bForward) ? 1 : 0), System.Convert.ToUInt16((int)eInEncodingForm), eFormOut, out m_converter ); } } else { status = TECkit_CreateConverter( (byte *)0, m_nMapSize, (byte)((bForward) ? 1 : 0), System.Convert.ToUInt16((int)eInEncodingForm), eFormOut, out m_converter ); } if (status == (int)ErrStatus.NoError) { m_mapConverters[strConverterKey] = m_converter; } else { EncConverters.ThrowError(status); } } // since TEC can handle output normalization directly (by requesting it here // in the creation of the converter), reset the requesting flag so we won't // attempt to do it later (all other converters that can't do implicit output // normalization will *not* have reset the flag and then after their conversion, // if the flag is still set, we'll call TEC to do it for them see // ECNormalizeData.GetString). eNormalizeOutput = NormalizeFlags.None; }
internal static unsafe string GetString(byte* lpOutBuffer, int nOutLen, EncodingForm eOutEncodingForm, int nCodePageOut, EncodingForm eFormEngineOut, NormalizeFlags eNormalizeOutput, out int rciOutput, ref bool bDebugDisplayMode) { // null terminate the output and turn it into a (real) array of bytes lpOutBuffer[nOutLen] = lpOutBuffer[nOutLen + 1] = lpOutBuffer[nOutLen + 2] = lpOutBuffer[nOutLen + 3] = 0; byte[] baOut = new byte[nOutLen]; ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); // check to see if the engine handled the given output form. If not, then see // if it's a conversion we can easily do (otherwise we'll ask TEC to do the // conversion for us (later) so that all engines can handle all possible // output encoding forms. if (eOutEncodingForm != eFormEngineOut) { if (EncConverter.IsLegacyFormat(eOutEncodingForm)) { if ((eFormEngineOut == EncodingForm.LegacyBytes) && (eOutEncodingForm == EncodingForm.LegacyString)) { // in this case, just *pretend* the engine outputs LegacyString (the // LegacyString case below really means "convert LegacyBytes to // LegacyString) eFormEngineOut = eOutEncodingForm; } } else // unicode forms { // if the engine gives UTF8 and the client wants UTF16... if ((eOutEncodingForm == EncodingForm.UTF16) && (eFormEngineOut == EncodingForm.UTF8Bytes)) { // use the special form to convert it below eOutEncodingForm = eFormEngineOut = (EncodingForm)CCUnicode8; } // or vise versa else if ((eFormEngineOut == EncodingForm.UTF16) && ((eOutEncodingForm == EncodingForm.UTF8Bytes) || (eOutEncodingForm == EncodingForm.UTF8String))) { // engine gave UTF16, but user wants a UTF8 flavor. // Decoder d = Encoding.Unicode.GetChars(baOut); // d.GetChars( UTF8Encoding enc = new UTF8Encoding(); baOut = enc.GetBytes(Encoding.Unicode.GetChars(baOut)); eFormEngineOut = eOutEncodingForm; nOutLen = baOut.Length; } // these conversions we can do ourself else if ((eOutEncodingForm == EncodingForm.UTF8String) || (eOutEncodingForm == EncodingForm.UTF16)) { eFormEngineOut = eOutEncodingForm; } } } int nItems = 0, nCharsLen = 0; char[] caOut = null; switch (eFormEngineOut) { case EncodingForm.LegacyBytes: case EncodingForm.UTF8Bytes: { if (eFormEngineOut == EncodingForm.LegacyBytes) DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyBytes)...", ref bDebugDisplayMode); else DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8Bytes)...", ref bDebugDisplayMode); // stuff the returned 'bytes' into the BSTR as narrow characters rather than // converting to wide nItems = nOutLen; nCharsLen = (nOutLen + 1) / 2; caOut = new char[nCharsLen]; ByteArrToCharArr(baOut, caOut); break; } case EncodingForm.LegacyString: { DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyString)...", ref bDebugDisplayMode); nCharsLen = nItems = nOutLen; try { // this will throw (for some reason) when doing symbol fonts // (apparently, CP_SYMBOL is no longer supported). caOut = Encoding.GetEncoding(nCodePageOut).GetChars(baOut); } catch { if ((nCodePageOut == EncConverters.cnSymbolFontCodePage) || (nCodePageOut == EncConverters.cnIso8859_1CodePage)) { char chMask = (char)0; if (nCodePageOut == EncConverters.cnSymbolFontCodePage) chMask = (char)0xF000; // do it the 'hard way' caOut = new char[nCharsLen]; for (int i = 0; i < nCharsLen; i++) caOut[i] = (char)(baOut[i] | chMask); } else throw; } break; } case EncodingForm.UTF16: { nCharsLen = nItems = (nOutLen / 2); DisplayDebugUCharValues(baOut, "Received (UTF16) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode); caOut = Encoding.Unicode.GetChars(baOut); break; } case EncodingForm.UTF8String: { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8String)...", ref bDebugDisplayMode); // this encoding form is always encoded using the default code page. caOut = Encoding.Default.GetChars(baOut); nCharsLen = nItems = nOutLen; break; } case (EncodingForm)CCUnicode8: { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode); caOut = Encoding.UTF8.GetChars(baOut); nCharsLen = nItems = caOut.Length; break; } case EncodingForm.UTF16BE: case EncodingForm.UTF32: case EncodingForm.UTF32BE: { nCharsLen = nItems = nOutLen / 2; DisplayDebugUCharValues(baOut, "Received (UTF16BE/32/32BE) back from Converter/DLL...", ref bDebugDisplayMode); caOut = new char[nCharsLen]; ByteArrToCharArr(baOut, caOut); // for UTF32, it is half again as little in the item count. if (eFormEngineOut != EncodingForm.UTF16BE) nItems /= 2; break; } default: EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); break; } #if !v22_AllowEmptyReturn if ((nCharsLen <= 0) #if DEBUG || (nCharsLen != caOut.Length) #endif ) { EncConverters.ThrowError(ErrStatus.NoReturnDataBadOutForm); } #endif // check to see if the engine handled the given output form. If not, then ask // TEC to do the conversion for us so that all engines can handle all possible // output encoding forms (e.g. caller requested utf32, but above CC could only // give us utf16/8) // Also, if the caller wanted something other than "None" for the eNormalizeOutput, // then we also have to call TEC for that as well (but I think this only makes // sense if the output is utf16(be) or utf32(be)) // p.s. if this had been a TEC converter, then the eNormalizeOutput flag would // ahready have been reset to None (by this point), since we would have directly // requested that normalized form when we created the converter--see // TecEncConverter.PreConvert) string strOutput = new string(caOut); if ((eFormEngineOut != eOutEncodingForm) || (eNormalizeOutput != NormalizeFlags.None)) { strOutput = EncConverters.UnicodeEncodingFormConvertEx(strOutput, eFormEngineOut, nItems, eOutEncodingForm, eNormalizeOutput, out nItems); } DisplayDebugUCharValues(strOutput, "Returning back to client...", ref bDebugDisplayMode); rciOutput = nItems; return strOutput; }
protected override string InternalConvert ( EncodingForm eInEncodingForm, string sInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, bool bForward ) { // this routine is only called by one of the 'implicit' methods (e.g. // ConvertToUnicode). For these "COM" standard methods, the length of the // string is specified by the BSTR itself and always/only supports UTF-16-like // (i.e. wide) data. So, pass 0 so that the function will determine the length // from the BSTR itself (just in case the user happens to have a value of 0 in // the data (i.e. it won't necessarily be null terminated... don't ask... Int32 iOutput = 0; return InternalConvertEx ( eInEncodingForm, sInput, 0, eOutEncodingForm, eNormalizeOutput, out iOutput, bForward ); }
// we override this method from EncConverter so that we can call all of the step's // convert functions in turn (i.e. for this one, it isn't sufficient to just // provide a "DoConvert" method) protected override string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { // setup common items for both directions. int nSize = m_aEncConverter.Count; string strOutput = null; EncodingForm inForm = eInEncodingForm; Int32 ciOutput = 0; IEncConverter rConverter; NormalizeFlags eNormalizeFlags; ConvType eConversionType; EncodingForm outForm; NormConversionType eType; bool bDirectionForward; int i; try { if( bForward ) { for(i = 0; i < nSize; i++ ) { rConverter = (IEncConverter)m_aEncConverter[i]; if (rConverter == null) EncConverters.ThrowError(ErrStatus.MissingConverter); rConverter.Debug = Debug; eNormalizeFlags = (NormalizeFlags)m_aNormalizeOutput[i]; if( i == (nSize-1) ) eNormalizeFlags = eNormalizeOutput; eConversionType = rConverter.ConversionType; bDirectionForward = (bool)m_aDirectionForward[i]; // if this is the last one, then use the user's requested output format if( i == (nSize-1) ) { outForm = eOutEncodingForm; } else { if( bDirectionForward ) eType = NormalizeRhsConversionType(eConversionType); else eType = NormalizeLhsConversionType(eConversionType); if( eType == NormConversionType.eLegacy) outForm = EncodingForm.LegacyBytes; else outForm = EncodingForm.Unspecified; } strOutput = rConverter.ConvertEx( sInput, inForm, ciInput, outForm, out ciOutput, eNormalizeFlags, bDirectionForward); // setup input for the next step sInput = strOutput; inForm = outForm; ciInput = ciOutput; // it's possible the user cancelled the debug mode so get it back Debug = rConverter.Debug; } } else // reverse { for(i = nSize-1; i >= 0; i-- ) { rConverter = (IEncConverter)m_aEncConverter[i]; if (rConverter == null) EncConverters.ThrowError(ErrStatus.MissingConverter); rConverter.Debug = Debug; eNormalizeFlags = (NormalizeFlags)m_aNormalizeOutput[i]; if( i == 0 ) eNormalizeFlags = eNormalizeOutput; eConversionType = rConverter.ConversionType; // the direction is the opposite of what the user said in // reverse mode. bDirectionForward = !(bool)m_aDirectionForward[i]; // if this is the last one, then use the user's requested output format if( i == 0 ) outForm = eOutEncodingForm; else { if( bDirectionForward ) eType = NormalizeRhsConversionType(eConversionType); else eType = NormalizeRhsConversionType(eConversionType); if( eType == NormConversionType.eLegacy ) outForm = EncodingForm.LegacyBytes; else outForm = EncodingForm.Unspecified; } strOutput = rConverter.ConvertEx( sInput, inForm, ciInput, outForm, out ciOutput, eNormalizeFlags, bDirectionForward); // setup input for the next step sInput = strOutput; inForm = outForm; ciInput = ciOutput; // it's possible the user cancelled the debug mode so get it back Debug = rConverter.Debug; } } } catch(ApplicationException e) { throw e; } rciOutput = ciOutput; return strOutput; }
protected unsafe override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert( eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // If the user uses one of the *Byte forms, change that to the *String forms so // the value matches what the TECkit engine is expecting (that is, the TECkit // engine is expecting a value of '1' (=LegacyString) even if it comes in as // LegacyBytes). It'll still get converted correctly later, but when create the // the TECkit "converter" object, which happens during here, it is expecting // to see the other value. if( eInEncodingForm == EncodingForm.LegacyBytes ) eInEncodingForm = EncodingForm.LegacyString; else if( eInEncodingForm == EncodingForm.UTF8Bytes ) eInEncodingForm = EncodingForm.UTF8String; if( eOutEncodingForm == EncodingForm.LegacyBytes ) eOutEncodingForm = EncodingForm.LegacyString; else if( eOutEncodingForm == EncodingForm.UTF8Bytes ) eOutEncodingForm = EncodingForm.UTF8String; // See if we have a converter already for this combination or whether we need to make a // new one string strConverterKey = eInEncodingForm.ToString() + eOutEncodingForm.ToString() + eNormalizeOutput.ToString() + bForward.ToString(); // If this is a compilable map (i.e. ImplType SIL.map), then see if the map file has changed bool bReload = false; if (m_bCompileable && !String.IsNullOrEmpty(m_strMapFileSpec)) { // first make sure it's there and get the last time it was modified DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it. if (!DoesFileExist(m_strMapFileSpec, ref timeModified)) EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strMapFileSpec); // if it has been modified or it's not already loaded... if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey)) { // ... just remove this key if it existed (so we fall thru and do Load) ResetConverter((Int32)m_mapConverters[strConverterKey]); m_mapConverters.Remove(strConverterKey); bReload = true; } } else if (IsFileLoaded()) { // the tec file could also have changed out from underneath us (in which case we'd need to reload it). DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it. if (!DoesFileExist(m_strTecFileSpec, ref timeModified)) EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strTecFileSpec); // if it has been modified or it's not already loaded... if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey)) { m_baMapping = null; // triggers a reload m_lhsFlags = m_rhsFlags = 0; // ... just remove this key if it existed (so we fall thru and do Load) ResetConverter((Int32)m_mapConverters[strConverterKey]); m_mapConverters.Remove(strConverterKey); bReload = true; } } if( m_mapConverters.ContainsKey(strConverterKey) ) { m_converter = (Int32)m_mapConverters[strConverterKey]; } else { int status = (int)ErrStatus.NoError; // load the map now Load(bReload); // is there no better way to do this? ushort eFormOut1 = System.Convert.ToUInt16((int)eOutEncodingForm); ushort eFormOut2 = System.Convert.ToUInt16((int)eNormalizeOutput); UInt16 eFormOut = System.Convert.ToUInt16(eFormOut1 | eFormOut2); // make a converter for this new combination. fixed(Int32* converter = &m_converter) { if( IsFileLoaded() ) { fixed(byte* pbyMapping = m_baMapping) { status = TECkit_CreateConverter( pbyMapping, m_nMapSize, (byte)((bForward) ? 1 : 0), System.Convert.ToUInt16((int)eInEncodingForm), eFormOut, (void*)converter ); } } else { status = TECkit_CreateConverter( (byte*)0, m_nMapSize, (byte)((bForward) ? 1 : 0), System.Convert.ToUInt16((int)eInEncodingForm), eFormOut, (void*)converter ); } } if( status == (int)ErrStatus.NoError ) { m_mapConverters[strConverterKey] = m_converter; } else EncConverters.ThrowError(status); } // since TEC can handle output normalization directly (by requesting it here // in the creation of the converter), reset the requesting flag so we won't // attempt to do it later (all other converters that can't do implicit output // normalization will *not* have reset the flag and then after their conversion, // if the flag is still set, we'll call TEC to do it for them see // ECNormalizeData.GetString). eNormalizeOutput = NormalizeFlags.None; }
// this is the helper method that returns the input data normalized internal static unsafe byte *GetBytes(string strInput, int cnCountIn, EncodingForm eEncFormIn, int nCodePageIn, EncodingForm eFormEngineIn, byte *pBuf, ref int nBufSize, ref bool bDebugDisplayMode) { Util.DebugWriteLine(className, "BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eEncFormIn.ToString() + ", " + "eFormEngineIn " + eFormEngineIn.ToString()); // if the form the user gave is not what the engine wants (and it isn't legacy // since legacy forms are already handled later)... if ((eEncFormIn != eFormEngineIn) && !EncConverter.IsLegacyFormat(eEncFormIn)) { // we can do some of the conversions ourself. For example, if the input form // is UTF16 and the desired form is UTF8, then simply use CCUnicode8 below if ((eEncFormIn == EncodingForm.UTF16) && (eFormEngineIn == EncodingForm.UTF8Bytes)) { Util.DebugWriteLine(className, "using CCUnicode8"); eEncFormIn = (EncodingForm)CCUnicode8; } // we can also do the following one else if ((eEncFormIn == EncodingForm.UTF8String) && (eFormEngineIn == EncodingForm.UTF8Bytes)) { ; // i.e. don't have TECkit do this one... } else { strInput = EncConverters.UnicodeEncodingFormConvertEx(strInput, eEncFormIn, cnCountIn, eFormEngineIn, NormalizeFlags.None, out cnCountIn); eEncFormIn = eFormEngineIn; } } int nInLen = 0; switch (eEncFormIn) { case EncodingForm.LegacyBytes: case EncodingForm.UTF8Bytes: { if (cnCountIn != 0) { nInLen = cnCountIn; // item count should be the number of bytes directly. } else { // if the user didn't give the length (i.e. via ConvertEx), then get it // from the BSTR length. nInLen will be the # of bytes. nInLen = strInput.Length * 2; } // these forms are for C++ apps that want to use the BSTR to transfer // bytes rather than OLECHARs. nInLen = StringToByteStar(strInput, pBuf, nInLen, true); if (eEncFormIn == EncodingForm.LegacyBytes) { DisplayDebugCharValues(pBuf, nInLen, "Received (LegacyBytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode); } else { DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Received (UTF8Bytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode); } break; } case EncodingForm.LegacyString: { if (cnCountIn != 0) { nInLen = cnCountIn; // item count should be the number of bytes directly (after conversion below). } else { nInLen = strInput.Length; // the # of bytes will *be* the # of chars in the string after we're done. } DisplayDebugUCharValues(strInput, "Received (LegacyString) from client...", ref bDebugDisplayMode); // use a code page converter to narrowize using the input string // (but the 'easier' Add method will send 0; if so, then // fallback to the original method. byte[] ba = null; // first check if it's a symbol font (sometimes the user // incorrectly sends a few spaces first, so check the // first couple of bytes. If it is (and the code page is 0), then // change the code page to be CP_SYMBOL if ((nCodePageIn == 0) && (((strInput[0] & 0xF000) == 0xF000) || ((strInput.Length > 1) && ((strInput[1] & 0xF000) == 0xF000)) || ((strInput.Length > 2) && ((strInput[2] & 0xF000) == 0xF000)) ) ) { nCodePageIn = EncConverters.cnSymbolFontCodePage; } #if __MonoCS__ // Narrowizing by code page 0 doesn't seem to be what we want on Linux. // Treating it as a symbol font or stripping off the low byte works better. if (nCodePageIn == 0) { ba = BruteForceNarrowize(strInput, nInLen); } else #else if (true) #endif { // if it's a symbol or iso-8859 encoding, then we can handle just // taking the low byte (i.e. the catch case) if ((nCodePageIn == EncConverters.cnSymbolFontCodePage) || (nCodePageIn == EncConverters.cnIso8859_1CodePage) ) { try { Encoding enc = Encoding.GetEncoding(nCodePageIn); ba = enc.GetBytes(strInput); Util.DebugWriteLine(className, "Narrowized by given code page."); } catch { ba = BruteForceNarrowize(strInput, nInLen); } } else { // otherwise, simply use CP_ACP (or the default code page) to // narrowize it. Util.DebugWriteLine(className, "Narrowizing by given code page."); Encoding enc = Encoding.GetEncoding(nCodePageIn); ba = enc.GetBytes(strInput); } } // turn that byte array into a byte array... ByteArrToByteStar(ba, pBuf); if (cnCountIn != 0) { nInLen = cnCountIn; // item count should be the number of bytes directly. } else { // if the user didn't give the length (i.e. via ConvertEx), then get it // from the BSTR length. nInLen will be the # of bytes. nInLen = ba.Length; } DisplayDebugCharValues(pBuf, nInLen, "Sending (LegacyBytes) to Converter/DLL...", ref bDebugDisplayMode); break; } // this following form *must* be widened UTF8 via the default code page case EncodingForm.UTF8String: { DisplayDebugUCharValues(strInput, "Received (UTF8String) from client...", ref bDebugDisplayMode); // use a code page converter to narrowize using the input string Encoding enc = Encoding.Default; byte[] ba = enc.GetBytes(strInput); // turn that byte array into a byte array... ByteArrToByteStar(ba, pBuf); if (cnCountIn != 0) { nInLen = cnCountIn; // item count should be the number of bytes directly. } else { // if the user didn't give the length (i.e. via ConvertEx), then get it // from the BSTR length. nInLen will be the # of bytes. nInLen = ba.Length; } DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode); break; } // this is a special case for CC where the input was actually UTF16, but the // CC DLL is expecting (usually) UTF8, so convert from UTF16->UTF8 narrow case (EncodingForm)CCUnicode8: { DisplayDebugUCharValues(strInput, "Received (UTF16) from client...", ref bDebugDisplayMode); UTF8Encoding enc = new UTF8Encoding(); byte[] ba = enc.GetBytes(strInput); // turn that byte array into a byte array... ByteArrToByteStar(ba, pBuf); // since we've changed the format, we don't care how many UTF16 words came in nInLen = ba.Length; DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode); break; } case EncodingForm.UTF16: { if (cnCountIn != 0) { nInLen = cnCountIn; // item count should be the number of 16-bit words directly } else { nInLen = strInput.Length; } DisplayDebugUCharValues(strInput, "Received (UTF16) from client and sending to Converter/DLL...", ref bDebugDisplayMode); // but this should be the count of bytes... nInLen *= 2; StringToByteStar(strInput, pBuf, nInLen, false); break; } case EncodingForm.UTF16BE: case EncodingForm.UTF32: case EncodingForm.UTF32BE: { if (cnCountIn != 0) { nInLen = cnCountIn; // item count is the number of Uni chars // for UTF32, the converter's actually expecting the length to be twice // this much again. if (eEncFormIn != EncodingForm.UTF16BE) { nInLen *= 2; } } else { nInLen = strInput.Length; } DisplayDebugUCharValues(pBuf, nInLen, "Received (UTF16BE/32/32BE) from client/Sending to Converter/DLL...", ref bDebugDisplayMode); // for the byte count, double it (possibly again) nInLen *= 2; StringToByteStar(strInput, pBuf, nInLen, false); break; } default: EncConverters.ThrowError(ErrStatus.InEncFormNotSupported); break; } pBuf[nInLen] = pBuf[nInLen + 1] = pBuf[nInLen + 2] = pBuf[nInLen + 3] = 0; nBufSize = (int)nInLen; return(pBuf); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert( eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // this converter only deals with 'String' flavors, so if it's // Unicode_to(_from)_Unicode, then we expect UTF-16 and if it's // Legacy_to(_from)_Legacy, then we expect LegacyString if( m_bLegacy ) eInFormEngine = eOutFormEngine = EncodingForm.LegacyString; else eInFormEngine = eOutFormEngine = EncodingForm.UTF16; // the bForward that comes here might be different from the IEncConverter->DirectionForward // (if it came in from a call to ConvertEx), so use *this* value to determine the direction // for the forthcoming conversion (DoConvert). m_bReverseLookup = !bForward; // check to see if the file(s) need to be (re-)loaded at this point. Load(); }