Esempio n. 1
0
 protected virtual void PreConvert
 (
     EncodingForm eInEncodingForm,
     ref EncodingForm eInFormEngine,
     EncodingForm eOutEncodingForm,
     ref EncodingForm eOutFormEngine,
     ref NormalizeFlags eNormalizeOutput,
     bool bForward
 )
 {
     Util.DebugWriteLine(className,
                         "eEncFormIn " + eInEncodingForm.ToString() + ", " +
                         "eEncFormOut " + eOutEncodingForm.ToString());
     // by default, the form it comes in is okay for the engine (never really true, so
     //  each engine's COM wrapper must override this; but this is here to see what you
     //  must do). For example, for CC, the input must be UTF8Bytes for Unicode, so
     //  you'd set the eInFormEngine to UTF8Bytes.
     eInFormEngine  = eInEncodingForm;
     eOutFormEngine = eOutEncodingForm;
 }
Esempio n. 2
0
        // this is the helper method that returns the input data normalized
        internal static unsafe byte *GetBytes(string strInput, int cnCountIn, EncodingForm eEncFormIn, int nCodePageIn, EncodingForm eFormEngineIn, byte *pBuf, ref int nBufSize, ref bool bDebugDisplayMode)
        {
            Util.DebugWriteLine(className, "BEGIN");
            Util.DebugWriteLine(className,
                                "eEncFormIn " + eEncFormIn.ToString() + ", " +
                                "eFormEngineIn " + eFormEngineIn.ToString());

            // if the form the user gave is not what the engine wants (and it isn't legacy
            //  since legacy forms are already handled later)...
            if ((eEncFormIn != eFormEngineIn) && !EncConverter.IsLegacyFormat(eEncFormIn))
            {
                // we can do some of the conversions ourself. For example, if the input form
                //  is UTF16 and the desired form is UTF8, then simply use CCUnicode8 below
                if ((eEncFormIn == EncodingForm.UTF16) && (eFormEngineIn == EncodingForm.UTF8Bytes))
                {
                    Util.DebugWriteLine(className, "using CCUnicode8");
                    eEncFormIn = (EncodingForm)CCUnicode8;
                }
                // we can also do the following one
                else if ((eEncFormIn == EncodingForm.UTF8String) && (eFormEngineIn == EncodingForm.UTF8Bytes))
                {
                    ; // i.e. don't have TECkit do this one...
                }
                else
                {
                    strInput   = EncConverters.UnicodeEncodingFormConvertEx(strInput, eEncFormIn, cnCountIn, eFormEngineIn, NormalizeFlags.None, out cnCountIn);
                    eEncFormIn = eFormEngineIn;
                }
            }

            int nInLen = 0;

            switch (eEncFormIn)
            {
            case EncodingForm.LegacyBytes:
            case EncodingForm.UTF8Bytes:
            {
                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;         // item count should be the number of bytes directly.
                }
                else
                {
                    // if the user didn't give the length (i.e. via ConvertEx), then get it
                    //  from the BSTR length. nInLen will be the # of bytes.
                    nInLen = strInput.Length * 2;
                }

                // these forms are for C++ apps that want to use the BSTR to transfer
                //  bytes rather than OLECHARs.
                nInLen = StringToByteStar(strInput, pBuf, nInLen, true);

                if (eEncFormIn == EncodingForm.LegacyBytes)
                {
                    DisplayDebugCharValues(pBuf, nInLen, "Received (LegacyBytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode);
                }
                else
                {
                    DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Received (UTF8Bytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode);
                }
                break;
            }

            case EncodingForm.LegacyString:
            {
                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;           // item count should be the number of bytes directly (after conversion below).
                }
                else
                {
                    nInLen = strInput.Length;         // the # of bytes will *be* the # of chars in the string after we're done.
                }
                DisplayDebugUCharValues(strInput, "Received (LegacyString) from client...", ref bDebugDisplayMode);

                // use a code page converter to narrowize using the input string
                // (but the 'easier' Add method will send 0; if so, then
                //  fallback to the original method.
                byte[] ba = null;

                // first check if it's a symbol font (sometimes the user
                //  incorrectly sends a few spaces first, so check the
                //  first couple of bytes. If it is (and the code page is 0), then
                //  change the code page to be CP_SYMBOL
                if ((nCodePageIn == 0) &&
                    (((strInput[0] & 0xF000) == 0xF000) ||
                     ((strInput.Length > 1) && ((strInput[1] & 0xF000) == 0xF000)) ||
                     ((strInput.Length > 2) && ((strInput[2] & 0xF000) == 0xF000))
                    )
                    )
                {
                    nCodePageIn = EncConverters.cnSymbolFontCodePage;
                }

#if __MonoCS__
                // Narrowizing by code page 0 doesn't seem to be what we want on Linux.
                // Treating it as a symbol font or stripping off the low byte works better.
                if (nCodePageIn == 0)
                {
                    ba = BruteForceNarrowize(strInput, nInLen);
                }
                else
#else
                if (true)
#endif
                {
                    // if it's a symbol or iso-8859 encoding, then we can handle just
                    //  taking the low byte (i.e. the catch case)
                    if ((nCodePageIn == EncConverters.cnSymbolFontCodePage) ||
                        (nCodePageIn == EncConverters.cnIso8859_1CodePage)
                        )
                    {
                        try
                        {
                            Encoding enc = Encoding.GetEncoding(nCodePageIn);
                            ba = enc.GetBytes(strInput);
                            Util.DebugWriteLine(className, "Narrowized by given code page.");
                        }
                        catch
                        {
                            ba = BruteForceNarrowize(strInput, nInLen);
                        }
                    }
                    else
                    {
                        // otherwise, simply use CP_ACP (or the default code page) to
                        //  narrowize it.
                        Util.DebugWriteLine(className, "Narrowizing by given code page.");
                        Encoding enc = Encoding.GetEncoding(nCodePageIn);
                        ba = enc.GetBytes(strInput);
                    }
                }


                // turn that byte array into a byte array...
                ByteArrToByteStar(ba, pBuf);

                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;         // item count should be the number of bytes directly.
                }
                else
                {
                    // if the user didn't give the length (i.e. via ConvertEx), then get it
                    //  from the BSTR length. nInLen will be the # of bytes.
                    nInLen = ba.Length;
                }

                DisplayDebugCharValues(pBuf, nInLen, "Sending (LegacyBytes) to Converter/DLL...", ref bDebugDisplayMode);
                break;
            }

            // this following form *must* be widened UTF8 via the default code page
            case EncodingForm.UTF8String:
            {
                DisplayDebugUCharValues(strInput, "Received (UTF8String) from client...", ref bDebugDisplayMode);

                // use a code page converter to narrowize using the input string
                Encoding enc = Encoding.Default;
                byte[]   ba  = enc.GetBytes(strInput);

                // turn that byte array into a byte array...
                ByteArrToByteStar(ba, pBuf);

                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;         // item count should be the number of bytes directly.
                }
                else
                {
                    // if the user didn't give the length (i.e. via ConvertEx), then get it
                    //  from the BSTR length. nInLen will be the # of bytes.
                    nInLen = ba.Length;
                }

                DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode);
                break;
            }

            // this is a special case for CC where the input was actually UTF16, but the
            //  CC DLL is expecting (usually) UTF8, so convert from UTF16->UTF8 narrow
            case (EncodingForm)CCUnicode8:
            {
                DisplayDebugUCharValues(strInput, "Received (UTF16) from client...", ref bDebugDisplayMode);

                UTF8Encoding enc = new UTF8Encoding();
                byte[]       ba  = enc.GetBytes(strInput);

                // turn that byte array into a byte array...
                ByteArrToByteStar(ba, pBuf);

                // since we've changed the format, we don't care how many UTF16 words came in
                nInLen = ba.Length;

                DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode);
                break;
            }

            case EncodingForm.UTF16:
            {
                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;           // item count should be the number of 16-bit words directly
                }
                else
                {
                    nInLen = strInput.Length;
                }

                DisplayDebugUCharValues(strInput, "Received (UTF16) from client and sending to Converter/DLL...", ref bDebugDisplayMode);

                // but this should be the count of bytes...
                nInLen *= 2;
                StringToByteStar(strInput, pBuf, nInLen, false);
                break;
            }

            case EncodingForm.UTF16BE:
            case EncodingForm.UTF32:
            case EncodingForm.UTF32BE:
            {
                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;         // item count is the number of Uni chars

                    // for UTF32, the converter's actually expecting the length to be twice
                    //  this much again.
                    if (eEncFormIn != EncodingForm.UTF16BE)
                    {
                        nInLen *= 2;
                    }
                }
                else
                {
                    nInLen = strInput.Length;
                }

                DisplayDebugUCharValues(pBuf, nInLen, "Received (UTF16BE/32/32BE) from client/Sending to Converter/DLL...", ref bDebugDisplayMode);

                // for the byte count, double it (possibly again)
                nInLen *= 2;
                StringToByteStar(strInput, pBuf, nInLen, false);
                break;
            }

            default:
                EncConverters.ThrowError(ErrStatus.InEncFormNotSupported);
                break;
            }

            pBuf[nInLen] = pBuf[nInLen + 1] = pBuf[nInLen + 2] = pBuf[nInLen + 3] = 0;
            nBufSize     = (int)nInLen;

            return(pBuf);
        }
Esempio n. 3
0
        internal static unsafe string GetString(byte *lpOutBuffer, int nOutLen, EncodingForm eOutEncodingForm, int nCodePageOut, EncodingForm eFormEngineOut, NormalizeFlags eNormalizeOutput, out int rciOutput, ref bool bDebugDisplayMode)
        {
            // null terminate the output and turn it into a (real) array of bytes
            Util.DebugWriteLine(className, "BEGIN");
            lpOutBuffer[nOutLen] = lpOutBuffer[nOutLen + 1] = lpOutBuffer[nOutLen + 2] = lpOutBuffer[nOutLen + 3] = 0;
            byte[] baOut = new byte[nOutLen];
            ByteStarToByteArr(lpOutBuffer, nOutLen, baOut);
            Util.DebugWriteLine(className, Util.getDisplayBytes("byte array", baOut));

            // check to see if the engine handled the given output form. If not, then see
            //  if it's a conversion we can easily do (otherwise we'll ask TEC to do the
            //  conversion for us (later) so that all engines can handle all possible
            //  output encoding forms.
            Util.DebugWriteLine(className,
                                "eOutEncodingForm " + eOutEncodingForm.ToString() + ", " +
                                "eFormEngineOut " + eFormEngineOut.ToString());
            if (eOutEncodingForm != eFormEngineOut)
            {
                if (EncConverter.IsLegacyFormat(eOutEncodingForm))
                {
                    if ((eFormEngineOut == EncodingForm.LegacyBytes) && (eOutEncodingForm == EncodingForm.LegacyString))
                    {
                        // in this case, just *pretend* the engine outputs LegacyString (the
                        //  LegacyString case below really means "convert LegacyBytes to
                        //  LegacyString)
                        eFormEngineOut = eOutEncodingForm;
                    }
                }
                else    // unicode forms
                {
                    // if the engine gives UTF8 and the client wants UTF16...
                    if ((eOutEncodingForm == EncodingForm.UTF16) && (eFormEngineOut == EncodingForm.UTF8Bytes))
                    {
                        // use the special form to convert it below
                        Util.DebugWriteLine(className, "using CCUnicode8");
                        eOutEncodingForm = eFormEngineOut = (EncodingForm)CCUnicode8;
                    }
                    // or vise versa
                    else if ((eFormEngineOut == EncodingForm.UTF16) &&
                             ((eOutEncodingForm == EncodingForm.UTF8Bytes) || (eOutEncodingForm == EncodingForm.UTF8String)))
                    {
                        // engine gave UTF16, but user wants a UTF8 flavor.
                        // Decoder d = Encoding.Unicode.GetChars(baOut);
                        // d.GetChars(
                        UTF8Encoding enc = new UTF8Encoding();
                        baOut          = enc.GetBytes(Encoding.Unicode.GetChars(baOut));
                        eFormEngineOut = eOutEncodingForm;
                        nOutLen        = baOut.Length;
                    }
                    // these conversions we can do ourself
                    else if ((eOutEncodingForm == EncodingForm.UTF8String) ||
                             (eOutEncodingForm == EncodingForm.UTF16))
                    {
#if _MSC_VER
                        // Doesn't this wipe out the distinction?
                        // On Linux we need to be able to convert the output from UTF32 to UTF16.
                        eFormEngineOut = eOutEncodingForm;
#endif
                    }
                }
            }

            int    nItems = 0, nCharsLen = 0;
            char[] caOut = null;
            switch (eFormEngineOut)
            {
            case EncodingForm.LegacyBytes:
            case EncodingForm.UTF8Bytes:
            {
                if (eFormEngineOut == EncodingForm.LegacyBytes)
                {
                    DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyBytes)...", ref bDebugDisplayMode);
                }
                else
                {
                    DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8Bytes)...", ref bDebugDisplayMode);
                }

                // stuff the returned 'bytes' into the BSTR as narrow characters rather than
                //  converting to wide
                nItems    = nOutLen;
                nCharsLen = (nOutLen + 1) / 2;
                caOut     = new char[nCharsLen];
                ByteArrToCharArr(baOut, caOut);
                break;
            }

            case EncodingForm.LegacyString:
            {
                DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyString)...", ref bDebugDisplayMode);

                nCharsLen = nItems = nOutLen;

#if __MonoCS__
                // Narrowizing by code page 0 doesn't seem to be what we want on Linux.
                // Treating it as a symbol font or stripping off the low byte works better.
                if (nCodePageOut == 0)
                {
                    caOut = BruteForceWiden(nCodePageOut, baOut, nCharsLen);
                }
                else
#else
                if (true)
#endif
                {
                    try
                    {
                        // this will throw (for some reason) when doing symbol fonts
                        //  (apparently, CP_SYMBOL is no longer supported).
                        caOut = Encoding.GetEncoding(nCodePageOut).GetChars(baOut);
                    }
                    catch
                    {
                        if ((nCodePageOut == EncConverters.cnSymbolFontCodePage) || (nCodePageOut == EncConverters.cnIso8859_1CodePage))
                        {
                            caOut = BruteForceWiden(nCodePageOut, baOut, nCharsLen);
                        }
                        else
                        {
                            throw;
                        }
                    }
                }

                break;
            }

            case EncodingForm.UTF16:
            {
                nCharsLen = nItems = (nOutLen / 2);

                DisplayDebugUCharValues(baOut, "Received (UTF16) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode);

                caOut = Encoding.Unicode.GetChars(baOut);
                break;
            }

            case EncodingForm.UTF8String:
            {
                DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8String)...", ref bDebugDisplayMode);

                // this encoding form is always encoded using the default code page.
                caOut = Encoding.Default.GetChars(baOut);

                nCharsLen = nItems = nOutLen;
                break;
            }

            case (EncodingForm)CCUnicode8:
            {
                DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode);

                caOut = Encoding.UTF8.GetChars(baOut);

                nCharsLen = nItems = caOut.Length;
                break;
            }

            case EncodingForm.UTF16BE:
            case EncodingForm.UTF32:
            case EncodingForm.UTF32BE:
            {
                nCharsLen = nItems = nOutLen / 2;

                DisplayDebugUCharValues(baOut, "Received (UTF16BE/32/32BE) back from Converter/DLL...", ref bDebugDisplayMode);

                caOut = new char[nCharsLen];
                ByteArrToCharArr(baOut, caOut);

                // for UTF32, it is half again as little in the item count.
                if (eFormEngineOut != EncodingForm.UTF16BE)
                {
                    nItems /= 2;
                }
                break;
            }

            default:
                EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported);
                break;
            }

#if !v22_AllowEmptyReturn
            if ((nCharsLen <= 0)
#if DEBUG
                || (nCharsLen != caOut.Length)
#endif
                )
            {
                EncConverters.ThrowError(ErrStatus.NoReturnDataBadOutForm);
            }
#endif

            // check to see if the engine handled the given output form. If not, then ask
            //  TEC to do the conversion for us so that all engines can handle all possible
            //  output encoding forms (e.g. caller requested utf32, but above CC could only
            //  give us utf16/8)
            // Also, if the caller wanted something other than "None" for the eNormalizeOutput,
            //  then we also have to call TEC for that as well (but I think this only makes
            //  sense if the output is utf16(be) or utf32(be))
            // p.s. if this had been a TEC converter, then the eNormalizeOutput flag would
            //  ahready have been reset to None (by this point), since we would have directly
            //  requested that normalized form when we created the converter--see
            //  TecEncConverter.PreConvert)
            string strOutput = new string(caOut);
#if DEBUG
            byte[] byteArray = Encoding.BigEndianUnicode.GetBytes(caOut);
            Util.DebugWriteLine(className, Util.getDisplayBytes("characters", byteArray));
            byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(strOutput);
            Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized strOutput in UTF16BE", baResult));
#endif
            if ((eFormEngineOut != eOutEncodingForm) ||
                (eNormalizeOutput != NormalizeFlags.None))
            {
                strOutput = EncConverters.UnicodeEncodingFormConvertEx(strOutput, eFormEngineOut, nItems, eOutEncodingForm, eNormalizeOutput, out nItems);
            }

            DisplayDebugUCharValues(strOutput, "Returning back to client...", ref bDebugDisplayMode);

            rciOutput = nItems;
            return(strOutput);
        }
Esempio n. 4
0
        protected void CheckInitEncForms
        (
            bool bForward,
            ref EncodingForm eInEncodingForm,
            ref EncodingForm eOutEncodingForm
        )
        {
            Util.DebugWriteLine(className,
                                "eEncFormIn1 " + eInEncodingForm.ToString() + ", " +
                                "eEncFormOut1 " + eOutEncodingForm.ToString());
            // if the user hasn't specified, then take the default case for the ConversionType:
            //  if L/RHS == eLegacy, then LegacyString
            //  if L/RHS == eUnicode, then UTF16
            if (eInEncodingForm == EncodingForm.Unspecified)
            {
                NormConversionType eType;
                if (bForward)
                {
                    eType = NormalizeLhsConversionType(m_eConversionType);
                }
                else
                {
                    eType = NormalizeRhsConversionType(m_eConversionType);
                }

                if (eType == NormConversionType.eLegacy)
                {
                    eInEncodingForm = EncodingForm.LegacyString;
                }
                else // eUnicode
                {
                    eInEncodingForm = DefaultUnicodeEncForm(bForward, true);
                }
            }

            // do the same for the output form
            if (eOutEncodingForm == EncodingForm.Unspecified)
            {
                NormConversionType eType;
                if (bForward)
                {
                    eType = NormalizeRhsConversionType(m_eConversionType);
                }
                else
                {
                    eType = NormalizeLhsConversionType(m_eConversionType);
                }

                if (eType == NormConversionType.eLegacy)
                {
                    eOutEncodingForm = EncodingForm.LegacyString;
                }
                else // eUnicode
                {
                    eOutEncodingForm = DefaultUnicodeEncForm(bForward, false);
                }
            }

            Util.DebugWriteLine(className,
                                "eEncFormIn2 " + eInEncodingForm.ToString() + ", " +
                                "eEncFormOut2 " + eOutEncodingForm.ToString());
            CheckForBadForm(bForward, eInEncodingForm, eOutEncodingForm);
        }
Esempio n. 5
0
        // This function is the meat of the conversion process. It is really long, which
        //	normally wouldn't be a virtue (especially as an "in-line" function), but in an
        //	effort to save memory fragmentation by using stack memory to buffer the input
        //	and output data, I'm using the alloca memory allocation function. Because of this
        //	it can't be allocated in some subroutine and returned to a calling program (or the
        //	stack will have erased them), so it has to be one big fat long function...
        //	The basic structure is:
        //
        //	o	Check Input Data
        //  o   Give the sub-class (via PreConvert) the opportunity to load tables and do
        //      any special preprocessing it needs to ahead of the actual conversion
        //  o   Possibly call the TECkit COM interface to convert Unicode flavors that the
        //      engine (for this conversion) might not support (indicated via PreConvert)
        //  o   Normalize the input data to a byte array based on it's input EncodingForm
        //  o       Allocate (on the stack) a buffer for the output data (min 10000 bytes)
        //  o       Call the subclass (via DoConvert) to do the actual conversion.
        //  o   Normalize the output data to match the requested output EncodingForm (including
        //      possibly calling the TECkit COM interface).
        //  o   Return the resultant BSTR and size of items to the output pointer variables.
        //
        protected virtual unsafe string InternalConvertEx
        (
            EncodingForm eInEncodingForm,
            string sInput,
            int ciInput,
            EncodingForm eOutEncodingForm,
            NormalizeFlags eNormalizeOutput,
            out int rciOutput,
            bool bForward
        )
        {
            Util.DebugWriteLine(className, "BEGIN");
            Util.DebugWriteLine(className,
                                "eEncFormIn " + eInEncodingForm.ToString() + ", " +
                                "eEncFormOut " + eOutEncodingForm.ToString());
            if (sInput == null)
            {
                EncConverters.ThrowError(ErrStatus.IncompleteChar);
            }
            if (sInput.Length == 0)
            {
                rciOutput = 0;
                return("");
            }

#if DEBUG && __MonoCS__
// for debugging only BEGIN
            //byte[] baIn = System.Text.Encoding.UTF8.GetBytes(sInput);            // works
            byte[] baIn = System.Text.Encoding.BigEndianUnicode.GetBytes(sInput);  // easier to read
            Util.DebugWriteLine(className, Util.getDisplayBytes("Input BigEndianUnicode", baIn));
            baIn = System.Text.Encoding.Unicode.GetBytes(sInput);
            Util.DebugWriteLine(className, Util.getDisplayBytes("Input Unicode", baIn));

            int     nInLen = sInput.Length;
            byte [] baIn2  = new byte[nInLen];
            for (int i = 0; i < nInLen; i++)
            {
                baIn2[i] = (byte)(sInput[i] & 0xFF);
            }
            Util.DebugWriteLine(className, Util.getDisplayBytes("Input Narrowized", baIn2));

/*
 *          System.Text.Encoding encFrom = System.Text.Encoding.GetEncoding(12000);
 *          System.Text.Encoding encTo   = System.Text.Encoding.UTF8;
 *
 *          // Perform the conversion from one encoding to the other.
 *          Util.DebugWriteLine(className, "Starting with " + baIn.Length.ToString() + " bytes.");
 *          byte[] baOut2 = System.Text.Encoding.Convert(encFrom, encTo, baIn);
 *          Util.DebugWriteLine(className, "Converted to " + baOut2.Length.ToString() + " bytes.");
 *          string resultString = System.Text.Encoding.Default.GetString(baOut2, 0, baOut2.Length);
 *          Util.DebugWriteLine(className, "Test output '" + resultString + "'");
 */
// for debugging only END
#endif

            // if the user hasn't specified, then take the default case for the ConversionType:
            //  if L/RHS == eLegacy, then LegacyString
            //  if L/RHS == eUnicode, then UTF16
            CheckInitEncForms
            (
                bForward,
                ref eInEncodingForm,
                ref eOutEncodingForm
            );

            // allow the converter engine's (and/or its COM wrapper) to do some preprocessing.
            EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified;
            PreConvert
            (
                eInEncodingForm,    // [in] form in the BSTR
                ref eFormEngineIn,  // [out] form the conversion engine wants, etc.
                eOutEncodingForm,
                ref eFormEngineOut,
                ref eNormalizeOutput,
                bForward
            );

            // get enough space for us to normalize the input data (6x ought to be enough)
            int    nBufSize    = sInput.Length * 6;
            byte[] abyInBuffer = new byte[nBufSize];
            fixed(byte *lpInBuffer = abyInBuffer)
            {
                // use a helper class to normalize the data to the format needed by the engine
                Util.DebugWriteLine(className, "Calling GetBytes");
                ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm,
                                         ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer,
                                         ref nBufSize, ref m_bDebugDisplayMode);
#if DEBUG && __MonoCS__
                byte[] baOut = new byte[nBufSize];
                ECNormalizeData.ByteStarToByteArr(lpInBuffer, nBufSize, baOut);
                Util.DebugWriteLine(className, Util.getDisplayBytes("Input Bytes", baOut));
#endif

                // get some space for the converter to fill with, but since this is allocated
                //  on the stack, don't muck around; get 10000 bytes for it.
                int    nOutLen      = Math.Max(10000, nBufSize * 6);
                byte[] abyOutBuffer = new byte[nOutLen];
                fixed(byte *lpOutBuffer = abyOutBuffer)
                {
                    lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0;

                    // call the wrapper sub-classes' DoConvert to let them do it.
                    Util.DebugWriteLine(className, "Calling DoConvert");
                    DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen);
#if DEBUG && __MonoCS__
                    Util.DebugWriteLine(className, "Output length " + nOutLen.ToString());
                    byte[] baOut2 = new byte[nOutLen];
                    ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut2);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut2));
                    Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut2) + "'");
#endif
                    string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm,
                                                              ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput,
                                                              out rciOutput, ref m_bDebugDisplayMode);
#if DEBUG && __MonoCS__
                    Util.DebugWriteLine(className, "normalized result '" + result + "'");
                    byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult));
                    baResult = System.Text.Encoding.Unicode.GetBytes(result);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult));
                    baResult = System.Text.Encoding.UTF8.GetBytes(result);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult));
                    Util.DebugWriteLine(className, "Returning.");
#endif
                    return(result);
                }
            }
        }
Esempio n. 6
0
        /// <summary>
        /// If we're returning legacy data as a byte array, we need to return it as a byte array.
        /// </summary>
        /// <returns>
        protected virtual unsafe byte[] InternalConvertEx(EncodingForm eInEncodingForm,
                                                          string sInput,
                                                          EncodingForm eOutEncodingForm,
                                                          NormalizeFlags eNormalizeOutput,
                                                          out int rciOutput,
                                                          bool bForward)
        {
            Util.DebugWriteLine(className, "(output bytes) BEGIN");
            Util.DebugWriteLine(className,
                                "eEncFormIn " + eInEncodingForm.ToString() + ", " +
                                "eEncFormOut " + eOutEncodingForm.ToString());
            if (sInput == null)
            {
                EncConverters.ThrowError(ErrStatus.IncompleteChar);
            }
            Util.DebugWriteLine(className, "sInput.Length() is " + sInput.Length.ToString() + ".");
            if (sInput.Length == 0)
            {
                // this section added 11/10/2011 by Jim K
                rciOutput = 0;
                return(new byte[0]);
            }
            Util.DebugWriteLine(className, "sInput is " + sInput + ".");
            // if the user hasn't specified, then take the default case for the ConversionType:
            //  if L/RHS == eLegacy, then LegacyString
            //  if L/RHS == eUnicode, then UTF16
            CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm);

            // allow the converter engine's (and/or its COM wrapper) to do some preprocessing.
            EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified;

            PreConvert(
                eInEncodingForm,    // [in] form in the BSTR
                ref eFormEngineIn,  // [out] form the conversion engine wants, etc.
                eOutEncodingForm,
                ref eFormEngineOut,
                ref eNormalizeOutput,
                bForward);
            // get enough space for us to normalize the input data (6x ought to be enough)
            int nBufSize = sInput.Length * 6;

            byte[] abyInBuffer = new byte[nBufSize];
            fixed(byte *lpInBuffer = abyInBuffer)
            {
                // use a helper class to normalize the data to the format needed by the engine
                Util.DebugWriteLine(className, "Calling GetBytes");
                ECNormalizeData.GetBytes(sInput, sInput.Length, eInEncodingForm,
                                         ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer,
                                         ref nBufSize, ref m_bDebugDisplayMode);

                // get some space for the converter to fill with, but since this is allocated
                //  on the stack, don't muck around; get 10000 bytes for it.
                int nOutLen = Math.Max(10000, nBufSize * 6);

                byte[] abyOutBuffer = new byte[nOutLen];
                fixed(byte *lpOutBuffer = abyOutBuffer)
                {
                    lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0;

                    // call the wrapper sub-classes' DoConvert to let them do it.
                    Util.DebugWriteLine(className, "Calling DoConvert");
                    DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen);
                    byte[] baOut = new byte[nOutLen];
                    ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut);
#if DEBUG
                    Util.DebugWriteLine(className, "Output length " + nOutLen.ToString());
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut));
                    Util.DebugWriteLine(className, "Returning.");
#endif
                    rciOutput = nOutLen;
                    return(baOut);
                }
            }
        }
Esempio n. 7
0
        /// legacy data as a byte array as input, we need to treat it as a byte array.
        /// </summary>
        protected virtual unsafe string InternalConvertEx(EncodingForm eInEncodingForm,
                                                          byte[] baInput,
                                                          EncodingForm eOutEncodingForm,
                                                          NormalizeFlags eNormalizeOutput,
                                                          out int rciOutput,
                                                          bool bForward)
        {
            Util.DebugWriteLine(className, "(input bytes) BEGIN");
            Util.DebugWriteLine(className,
                                "eEncFormIn " + eInEncodingForm.ToString() + ", " +
                                "eEncFormOut " + eOutEncodingForm.ToString());
            if (baInput == null)
            {
                EncConverters.ThrowError(ErrStatus.IncompleteChar);
            }
            if (baInput.Length == 0)
            {
                rciOutput = 0;
                return("");
            }
            // if the user hasn't specified, then take the default case for the ConversionType:
            //  if L/RHS == eLegacy, then LegacyString
            //  if L/RHS == eUnicode, then UTF16
            CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm);

            // allow the converter engine's (and/or its COM wrapper) to do some preprocessing.
            EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified;

            PreConvert(
                eInEncodingForm,        // [in] form in the BSTR
                ref eFormEngineIn,      // [out] form the conversion engine wants, etc.
                eOutEncodingForm,
                ref eFormEngineOut,
                ref eNormalizeOutput,
                bForward);
            int nBufSize = baInput.Length;

            fixed(byte *lpInBuffer = baInput)
            {
                int nOutLen = Math.Max(10000, nBufSize * 6);

                byte[] abyOutBuffer = new byte[nOutLen];
                fixed(byte *lpOutBuffer = abyOutBuffer)
                {
                    lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0;

                    // call the wrapper sub-classes' DoConvert to let them do it.
                    Util.DebugWriteLine(className, "Calling DoConvert");
                    DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen);
                    Util.DebugWriteLine(className, "Output length " + nOutLen.ToString());

                    byte[] baOut = new byte[nOutLen];
                    ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut);
#if DEBUG
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut));
                    Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut) + "'");
#endif
                    string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm,
                                                              ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput,
                                                              out rciOutput, ref m_bDebugDisplayMode);
#if DEBUG
                    Util.DebugWriteLine(className, "normalized result '" + result + "'");
                    byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult));
                    baResult = System.Text.Encoding.Unicode.GetBytes(result);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult));
                    baResult = System.Text.Encoding.UTF8.GetBytes(result);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult));
                    Util.DebugWriteLine(className, "Returning.");
#endif
                    return(result);
                }
            }
        }
Esempio n. 8
0
        protected unsafe override void PreConvert
        (
            EncodingForm eInEncodingForm,
            ref EncodingForm eInFormEngine,
            EncodingForm eOutEncodingForm,
            ref EncodingForm eOutFormEngine,
            ref NormalizeFlags eNormalizeOutput,
            bool bForward
        )
        {
            // let the base class do it's thing first
            base.PreConvert(eInEncodingForm, ref eInFormEngine,
                            eOutEncodingForm, ref eOutFormEngine,
                            ref eNormalizeOutput, bForward);

            // If the user uses one of the *Byte forms, change that to the *String forms so
            //	the value matches what the TECkit engine is expecting (that is, the TECkit
            //	engine is expecting a value of '1' (=LegacyString) even if it comes in as
            //	LegacyBytes). It'll still get converted correctly later, but when create the
            //	the TECkit "converter" object, which happens during here, it is expecting
            //	to see the other value.
            if (eInEncodingForm == EncodingForm.LegacyBytes)
            {
                eInEncodingForm = EncodingForm.LegacyString;
            }
            else if (eInEncodingForm == EncodingForm.UTF8Bytes)
            {
                eInEncodingForm = EncodingForm.UTF8String;
            }

            if (eOutEncodingForm == EncodingForm.LegacyBytes)
            {
                eOutEncodingForm = EncodingForm.LegacyString;
            }
            else if (eOutEncodingForm == EncodingForm.UTF8Bytes)
            {
                eOutEncodingForm = EncodingForm.UTF8String;
            }

            // See if we have a converter already for this combination or whether we need to make a
            //  new one
            string strConverterKey = eInEncodingForm.ToString()
                                     + eOutEncodingForm.ToString()
                                     + eNormalizeOutput.ToString()
                                     + bForward.ToString();

            // If this is a compilable map (i.e. ImplType SIL.map), then see if the map file has changed
            bool bReload = false;

            if (m_bCompileable && !String.IsNullOrEmpty(m_strMapFileSpec))
            {
                // first make sure it's there and get the last time it was modified
                DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it.
                if (!DoesFileExist(m_strMapFileSpec, ref timeModified))
                {
                    EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strMapFileSpec);
                }

                // if it has been modified or it's not already loaded...
                if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey))
                {
                    // ... just remove this key if it existed (so we fall thru and do Load)
                    ResetConverter((IntPtr)m_mapConverters[strConverterKey]);
                    m_mapConverters.Remove(strConverterKey);
                    bReload = true;
                }
            }
            else if (IsFileLoaded())
            {
                // the tec file could also have changed out from underneath us (in which case we'd need to reload it).
                DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it.
                if (!DoesFileExist(m_strTecFileSpec, ref timeModified))
                {
                    EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strTecFileSpec);
                }

                // if it has been modified or it's not already loaded...
                if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey))
                {
                    m_baMapping = null; // triggers a reload
                    m_lhsFlags  = m_rhsFlags = 0;

                    // ... just remove this key if it existed (so we fall thru and do Load)
                    ResetConverter((IntPtr)m_mapConverters[strConverterKey]);
                    m_mapConverters.Remove(strConverterKey);
                    bReload = true;
                }
            }

            if (m_mapConverters.ContainsKey(strConverterKey))
            {
                m_converter = (IntPtr)m_mapConverters[strConverterKey];
            }
            else
            {
                int status = (int)ErrStatus.NoError;

                // load the map now
                Load(bReload);

                // is there no better way to do this?
                ushort eFormOut1 = System.Convert.ToUInt16((int)eOutEncodingForm);
                ushort eFormOut2 = System.Convert.ToUInt16((int)eNormalizeOutput);
                UInt16 eFormOut  = System.Convert.ToUInt16(eFormOut1 | eFormOut2);

                // make a converter for this new combination.
                Util.DebugWriteLine(this, "Creating TECkit converter: in " +
                                    eInEncodingForm.ToString() + ", out " +
                                    eOutEncodingForm.ToString());
                if (IsFileLoaded())
                {
                    fixed(byte *pbyMapping = m_baMapping)
                    {
                        status = TECkit_CreateConverter(
                            pbyMapping,
                            m_nMapSize,
                            (byte)((bForward) ? 1 : 0),
                            System.Convert.ToUInt16((int)eInEncodingForm),
                            eFormOut,
                            out m_converter
                            );
                    }
                }
                else
                {
                    status = TECkit_CreateConverter(
                        (byte *)0,
                        m_nMapSize,
                        (byte)((bForward) ? 1 : 0),
                        System.Convert.ToUInt16((int)eInEncodingForm),
                        eFormOut,
                        out m_converter
                        );
                }

                if (status == (int)ErrStatus.NoError)
                {
                    m_mapConverters[strConverterKey] = m_converter;
                }
                else
                {
                    EncConverters.ThrowError(status);
                }
            }

            // since TEC can handle output normalization directly (by requesting it here
            //  in the creation of the converter), reset the requesting flag so we won't
            //  attempt to do it later (all other converters that can't do implicit output
            //  normalization will *not* have reset the flag and then after their conversion,
            //  if the flag is still set, we'll call TEC to do it for them see
            //  ECNormalizeData.GetString).
            eNormalizeOutput = NormalizeFlags.None;
        }
Esempio n. 9
0
		protected unsafe override void PreConvert
			(
			EncodingForm        eInEncodingForm,
			ref EncodingForm    eInFormEngine,
			EncodingForm        eOutEncodingForm,
			ref EncodingForm    eOutFormEngine,
			ref NormalizeFlags  eNormalizeOutput,
			bool                bForward
			)
		{
			// let the base class do it's thing first
			base.PreConvert( eInEncodingForm, ref eInFormEngine,
							eOutEncodingForm, ref eOutFormEngine,
							ref eNormalizeOutput, bForward);

			// If the user uses one of the *Byte forms, change that to the *String forms so
			//	the value matches what the TECkit engine is expecting (that is, the TECkit
			//	engine is expecting a value of '1' (=LegacyString) even if it comes in as
			//	LegacyBytes). It'll still get converted correctly later, but when create the
			//	the TECkit "converter" object, which happens during here, it is expecting
			//	to see the other value.
			if( eInEncodingForm == EncodingForm.LegacyBytes )
				eInEncodingForm = EncodingForm.LegacyString;
			else if( eInEncodingForm == EncodingForm.UTF8Bytes )
				eInEncodingForm = EncodingForm.UTF8String;

			if( eOutEncodingForm == EncodingForm.LegacyBytes )
				eOutEncodingForm = EncodingForm.LegacyString;
			else if( eOutEncodingForm == EncodingForm.UTF8Bytes )
				eOutEncodingForm = EncodingForm.UTF8String;

			// See if we have a converter already for this combination or whether we need to make a
			//  new one
			string strConverterKey =  eInEncodingForm.ToString()
				+ eOutEncodingForm.ToString()
				+ eNormalizeOutput.ToString()
				+ bForward.ToString();

			// If this is a compilable map (i.e. ImplType SIL.map), then see if the map file has changed
			bool bReload = false;
			if (m_bCompileable && !String.IsNullOrEmpty(m_strMapFileSpec))
			{
				// first make sure it's there and get the last time it was modified
				DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it.
				if (!DoesFileExist(m_strMapFileSpec, ref timeModified))
					EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strMapFileSpec);

				// if it has been modified or it's not already loaded...
				if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey))
				{
					// ... just remove this key if it existed (so we fall thru and do Load)
					ResetConverter((Int32)m_mapConverters[strConverterKey]);
					m_mapConverters.Remove(strConverterKey);
					bReload = true;
				}
			}
			else if (IsFileLoaded())
			{
				// the tec file could also have changed out from underneath us (in which case we'd need to reload it).
				DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it.
				if (!DoesFileExist(m_strTecFileSpec, ref timeModified))
					EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strTecFileSpec);

				// if it has been modified or it's not already loaded...
				if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey))
				{
					m_baMapping = null; // triggers a reload
					m_lhsFlags = m_rhsFlags = 0;

					// ... just remove this key if it existed (so we fall thru and do Load)
					ResetConverter((Int32)m_mapConverters[strConverterKey]);
					m_mapConverters.Remove(strConverterKey);
					bReload = true;
				}
			}

			if( m_mapConverters.ContainsKey(strConverterKey) )
			{
				m_converter = (Int32)m_mapConverters[strConverterKey];
			}
			else
			{
				int status = (int)ErrStatus.NoError;

				// load the map now
				Load(bReload);

				// is there no better way to do this?
				ushort eFormOut1 = System.Convert.ToUInt16((int)eOutEncodingForm);
				ushort eFormOut2 = System.Convert.ToUInt16((int)eNormalizeOutput);
				UInt16 eFormOut = System.Convert.ToUInt16(eFormOut1 | eFormOut2);

				// make a converter for this new combination.
				fixed(Int32* converter = &m_converter)
				{
					if( IsFileLoaded() )
					{
						fixed(byte* pbyMapping = m_baMapping)
						{
							status = TECkit_CreateConverter(
										pbyMapping,
										m_nMapSize,
										(byte)((bForward) ? 1 : 0),
										System.Convert.ToUInt16((int)eInEncodingForm),
										eFormOut,
										(void*)converter
										);
						}
					}
					else
					{
						status = TECkit_CreateConverter(
									(byte*)0,
									m_nMapSize,
									(byte)((bForward) ? 1 : 0),
									System.Convert.ToUInt16((int)eInEncodingForm),
									eFormOut,
									(void*)converter
									);
					}
				}

				if( status == (int)ErrStatus.NoError )
				{
					m_mapConverters[strConverterKey] = m_converter;
				}
				else
					EncConverters.ThrowError(status);
			}

			// since TEC can handle output normalization directly (by requesting it here
			//  in the creation of the converter), reset the requesting flag so we won't
			//  attempt to do it later (all other converters that can't do implicit output
			//  normalization will *not* have reset the flag and then after their conversion,
			//  if the flag is still set, we'll call TEC to do it for them see
			//  ECNormalizeData.GetString).
			eNormalizeOutput = NormalizeFlags.None;
		}