private void buttonTest_Click(object sender, EventArgs e)
        {
            Util.DebugWriteLine(this, "BEGIN");
            IEncConverter aEC = InitializeEncConverter;

            if (aEC != null)
            {
                try
                {
                    aEC.DirectionForward = !checkBoxTestReverse.Checked;
                    //ecTextBoxOutput.Text = aEC.Convert(ecTextBoxInput.Text);
                    string result = aEC.Convert(ecTextBoxInput.Text);
                    Util.DebugWriteLine(this, "Putting in text box: '" + result + "'");
                    ecTextBoxOutput.Text = result;
                    //ecTextBoxOutput.Text = "Hello there!";
                }
                catch (Exception ex)
                {
                    MessageBox.Show(String.Format("Test failed! Reason: {0}", ex.Message),
                                    EncConverters.cstrCaption);
                }

                NormConversionType eType = (checkBoxTestReverse.Checked)
                    ? EncConverter.NormalizeLhsConversionType(ConversionType) : EncConverter.NormalizeRhsConversionType(ConversionType);

                if (eType == NormConversionType.eLegacy)
                {
                    UpdateLegacyCodes(ecTextBoxOutput.Text, m_aEC.CodePageOutput, richTextBoxHexOutput);
                }
                else
                {
                    UpdateUniCodes(ecTextBoxOutput.Text, richTextBoxHexOutput);
                }
            }
        }
        protected void TestTabInputChanged()
        {
            if (m_aEC == null)  // means it hasn't been set up yet.
            {
                return;
            }

            if (ecTextBoxInput.TextLength > 0)
            {
                buttonTest.Enabled = true;

                NormConversionType eType = (checkBoxTestReverse.Checked)
                    ? EncConverter.NormalizeRhsConversionType(ConversionType) : EncConverter.NormalizeLhsConversionType(ConversionType);

                if (eType == NormConversionType.eLegacy)
                {
                    UpdateLegacyCodes(ecTextBoxInput.Text, m_aEC.CodePageInput, richTextBoxHexInput);
                }
                else
                {
                    UpdateUniCodes(ecTextBoxInput.Text, richTextBoxHexInput);
                }
            }
            else
            {
                richTextBoxHexInput.Clear();
                buttonTest.Enabled = false;
            }

            // whenever the input changes, clear out the output
            ecTextBoxOutput.Text      = null;
            richTextBoxHexOutput.Text = null;
        }
        private void buttonTest_Click(object sender, EventArgs e)
        {
            IEncConverter aEC = InitializeEncConverter;

            if (aEC != null)
            {
                try
                {
                    aEC.DirectionForward = !checkBoxTestReverse.Checked;
                    ecTextBoxOutput.Text = aEC.Convert(ecTextBoxInput.Text);
                }
                catch (Exception ex)
                {
                    MessageBox.Show(String.Format("Test failed! Reason: {0}", ex.Message),
                                    EncConverters.cstrCaption);
                }

                NormConversionType eType = (checkBoxTestReverse.Checked)
                    ? EncConverter.NormalizeLhsConversionType(ConversionType) : EncConverter.NormalizeRhsConversionType(ConversionType);

                if (eType == NormConversionType.eLegacy)
                {
                    UpdateLegacyCodes(ecTextBoxOutput.Text, m_aEC.CodePageOutput, richTextBoxHexOutput);
                }
                else
                {
                    UpdateUniCodes(ecTextBoxOutput.Text, richTextBoxHexOutput);
                }
            }
        }
 protected bool IsLhsLegacy(IEncConverter aEC)
 {
     if (aEC.DirectionForward)
     {
         return(EncConverter.NormalizeLhsConversionType(aEC.ConversionType) == NormConversionType.eLegacy);
     }
     else
     {
         return(EncConverter.NormalizeRhsConversionType(aEC.ConversionType) == NormConversionType.eLegacy);
     }
 }
Exemple #5
0
        // this is the helper method that returns the input data normalized
        internal static unsafe byte *GetBytes(string strInput, int cnCountIn, EncodingForm eEncFormIn, int nCodePageIn, EncodingForm eFormEngineIn, byte *pBuf, ref int nBufSize, ref bool bDebugDisplayMode)
        {
            Util.DebugWriteLine(className, "BEGIN");
            Util.DebugWriteLine(className,
                                "eEncFormIn " + eEncFormIn.ToString() + ", " +
                                "eFormEngineIn " + eFormEngineIn.ToString());

            // if the form the user gave is not what the engine wants (and it isn't legacy
            //  since legacy forms are already handled later)...
            if ((eEncFormIn != eFormEngineIn) && !EncConverter.IsLegacyFormat(eEncFormIn))
            {
                // we can do some of the conversions ourself. For example, if the input form
                //  is UTF16 and the desired form is UTF8, then simply use CCUnicode8 below
                if ((eEncFormIn == EncodingForm.UTF16) && (eFormEngineIn == EncodingForm.UTF8Bytes))
                {
                    Util.DebugWriteLine(className, "using CCUnicode8");
                    eEncFormIn = (EncodingForm)CCUnicode8;
                }
                // we can also do the following one
                else if ((eEncFormIn == EncodingForm.UTF8String) && (eFormEngineIn == EncodingForm.UTF8Bytes))
                {
                    ; // i.e. don't have TECkit do this one...
                }
                else
                {
                    strInput   = EncConverters.UnicodeEncodingFormConvertEx(strInput, eEncFormIn, cnCountIn, eFormEngineIn, NormalizeFlags.None, out cnCountIn);
                    eEncFormIn = eFormEngineIn;
                }
            }

            int nInLen = 0;

            switch (eEncFormIn)
            {
            case EncodingForm.LegacyBytes:
            case EncodingForm.UTF8Bytes:
            {
                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;         // item count should be the number of bytes directly.
                }
                else
                {
                    // if the user didn't give the length (i.e. via ConvertEx), then get it
                    //  from the BSTR length. nInLen will be the # of bytes.
                    nInLen = strInput.Length * 2;
                }

                // these forms are for C++ apps that want to use the BSTR to transfer
                //  bytes rather than OLECHARs.
                nInLen = StringToByteStar(strInput, pBuf, nInLen, true);

                if (eEncFormIn == EncodingForm.LegacyBytes)
                {
                    DisplayDebugCharValues(pBuf, nInLen, "Received (LegacyBytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode);
                }
                else
                {
                    DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Received (UTF8Bytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode);
                }
                break;
            }

            case EncodingForm.LegacyString:
            {
                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;           // item count should be the number of bytes directly (after conversion below).
                }
                else
                {
                    nInLen = strInput.Length;         // the # of bytes will *be* the # of chars in the string after we're done.
                }
                DisplayDebugUCharValues(strInput, "Received (LegacyString) from client...", ref bDebugDisplayMode);

                // use a code page converter to narrowize using the input string
                // (but the 'easier' Add method will send 0; if so, then
                //  fallback to the original method.
                byte[] ba = null;

                // first check if it's a symbol font (sometimes the user
                //  incorrectly sends a few spaces first, so check the
                //  first couple of bytes. If it is (and the code page is 0), then
                //  change the code page to be CP_SYMBOL
                if ((nCodePageIn == 0) &&
                    (((strInput[0] & 0xF000) == 0xF000) ||
                     ((strInput.Length > 1) && ((strInput[1] & 0xF000) == 0xF000)) ||
                     ((strInput.Length > 2) && ((strInput[2] & 0xF000) == 0xF000))
                    )
                    )
                {
                    nCodePageIn = EncConverters.cnSymbolFontCodePage;
                }

#if __MonoCS__
                // Narrowizing by code page 0 doesn't seem to be what we want on Linux.
                // Treating it as a symbol font or stripping off the low byte works better.
                if (nCodePageIn == 0)
                {
                    ba = BruteForceNarrowize(strInput, nInLen);
                }
                else
#else
                if (true)
#endif
                {
                    // if it's a symbol or iso-8859 encoding, then we can handle just
                    //  taking the low byte (i.e. the catch case)
                    if ((nCodePageIn == EncConverters.cnSymbolFontCodePage) ||
                        (nCodePageIn == EncConverters.cnIso8859_1CodePage)
                        )
                    {
                        try
                        {
                            Encoding enc = Encoding.GetEncoding(nCodePageIn);
                            ba = enc.GetBytes(strInput);
                            Util.DebugWriteLine(className, "Narrowized by given code page.");
                        }
                        catch
                        {
                            ba = BruteForceNarrowize(strInput, nInLen);
                        }
                    }
                    else
                    {
                        // otherwise, simply use CP_ACP (or the default code page) to
                        //  narrowize it.
                        Util.DebugWriteLine(className, "Narrowizing by given code page.");
                        Encoding enc = Encoding.GetEncoding(nCodePageIn);
                        ba = enc.GetBytes(strInput);
                    }
                }


                // turn that byte array into a byte array...
                ByteArrToByteStar(ba, pBuf);

                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;         // item count should be the number of bytes directly.
                }
                else
                {
                    // if the user didn't give the length (i.e. via ConvertEx), then get it
                    //  from the BSTR length. nInLen will be the # of bytes.
                    nInLen = ba.Length;
                }

                DisplayDebugCharValues(pBuf, nInLen, "Sending (LegacyBytes) to Converter/DLL...", ref bDebugDisplayMode);
                break;
            }

            // this following form *must* be widened UTF8 via the default code page
            case EncodingForm.UTF8String:
            {
                DisplayDebugUCharValues(strInput, "Received (UTF8String) from client...", ref bDebugDisplayMode);

                // use a code page converter to narrowize using the input string
                Encoding enc = Encoding.Default;
                byte[]   ba  = enc.GetBytes(strInput);

                // turn that byte array into a byte array...
                ByteArrToByteStar(ba, pBuf);

                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;         // item count should be the number of bytes directly.
                }
                else
                {
                    // if the user didn't give the length (i.e. via ConvertEx), then get it
                    //  from the BSTR length. nInLen will be the # of bytes.
                    nInLen = ba.Length;
                }

                DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode);
                break;
            }

            // this is a special case for CC where the input was actually UTF16, but the
            //  CC DLL is expecting (usually) UTF8, so convert from UTF16->UTF8 narrow
            case (EncodingForm)CCUnicode8:
            {
                DisplayDebugUCharValues(strInput, "Received (UTF16) from client...", ref bDebugDisplayMode);

                UTF8Encoding enc = new UTF8Encoding();
                byte[]       ba  = enc.GetBytes(strInput);

                // turn that byte array into a byte array...
                ByteArrToByteStar(ba, pBuf);

                // since we've changed the format, we don't care how many UTF16 words came in
                nInLen = ba.Length;

                DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode);
                break;
            }

            case EncodingForm.UTF16:
            {
                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;           // item count should be the number of 16-bit words directly
                }
                else
                {
                    nInLen = strInput.Length;
                }

                DisplayDebugUCharValues(strInput, "Received (UTF16) from client and sending to Converter/DLL...", ref bDebugDisplayMode);

                // but this should be the count of bytes...
                nInLen *= 2;
                StringToByteStar(strInput, pBuf, nInLen, false);
                break;
            }

            case EncodingForm.UTF16BE:
            case EncodingForm.UTF32:
            case EncodingForm.UTF32BE:
            {
                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;         // item count is the number of Uni chars

                    // for UTF32, the converter's actually expecting the length to be twice
                    //  this much again.
                    if (eEncFormIn != EncodingForm.UTF16BE)
                    {
                        nInLen *= 2;
                    }
                }
                else
                {
                    nInLen = strInput.Length;
                }

                DisplayDebugUCharValues(pBuf, nInLen, "Received (UTF16BE/32/32BE) from client/Sending to Converter/DLL...", ref bDebugDisplayMode);

                // for the byte count, double it (possibly again)
                nInLen *= 2;
                StringToByteStar(strInput, pBuf, nInLen, false);
                break;
            }

            default:
                EncConverters.ThrowError(ErrStatus.InEncFormNotSupported);
                break;
            }

            pBuf[nInLen] = pBuf[nInLen + 1] = pBuf[nInLen + 2] = pBuf[nInLen + 3] = 0;
            nBufSize     = (int)nInLen;

            return(pBuf);
        }
Exemple #6
0
        internal static unsafe string GetString(byte *lpOutBuffer, int nOutLen, EncodingForm eOutEncodingForm, int nCodePageOut, EncodingForm eFormEngineOut, NormalizeFlags eNormalizeOutput, out int rciOutput, ref bool bDebugDisplayMode)
        {
            // null terminate the output and turn it into a (real) array of bytes
            Util.DebugWriteLine(className, "BEGIN");
            lpOutBuffer[nOutLen] = lpOutBuffer[nOutLen + 1] = lpOutBuffer[nOutLen + 2] = lpOutBuffer[nOutLen + 3] = 0;
            byte[] baOut = new byte[nOutLen];
            ByteStarToByteArr(lpOutBuffer, nOutLen, baOut);
            Util.DebugWriteLine(className, Util.getDisplayBytes("byte array", baOut));

            // check to see if the engine handled the given output form. If not, then see
            //  if it's a conversion we can easily do (otherwise we'll ask TEC to do the
            //  conversion for us (later) so that all engines can handle all possible
            //  output encoding forms.
            Util.DebugWriteLine(className,
                                "eOutEncodingForm " + eOutEncodingForm.ToString() + ", " +
                                "eFormEngineOut " + eFormEngineOut.ToString());
            if (eOutEncodingForm != eFormEngineOut)
            {
                if (EncConverter.IsLegacyFormat(eOutEncodingForm))
                {
                    if ((eFormEngineOut == EncodingForm.LegacyBytes) && (eOutEncodingForm == EncodingForm.LegacyString))
                    {
                        // in this case, just *pretend* the engine outputs LegacyString (the
                        //  LegacyString case below really means "convert LegacyBytes to
                        //  LegacyString)
                        eFormEngineOut = eOutEncodingForm;
                    }
                }
                else    // unicode forms
                {
                    // if the engine gives UTF8 and the client wants UTF16...
                    if ((eOutEncodingForm == EncodingForm.UTF16) && (eFormEngineOut == EncodingForm.UTF8Bytes))
                    {
                        // use the special form to convert it below
                        Util.DebugWriteLine(className, "using CCUnicode8");
                        eOutEncodingForm = eFormEngineOut = (EncodingForm)CCUnicode8;
                    }
                    // or vise versa
                    else if ((eFormEngineOut == EncodingForm.UTF16) &&
                             ((eOutEncodingForm == EncodingForm.UTF8Bytes) || (eOutEncodingForm == EncodingForm.UTF8String)))
                    {
                        // engine gave UTF16, but user wants a UTF8 flavor.
                        // Decoder d = Encoding.Unicode.GetChars(baOut);
                        // d.GetChars(
                        UTF8Encoding enc = new UTF8Encoding();
                        baOut          = enc.GetBytes(Encoding.Unicode.GetChars(baOut));
                        eFormEngineOut = eOutEncodingForm;
                        nOutLen        = baOut.Length;
                    }
                    // these conversions we can do ourself
                    else if ((eOutEncodingForm == EncodingForm.UTF8String) ||
                             (eOutEncodingForm == EncodingForm.UTF16))
                    {
#if _MSC_VER
                        // Doesn't this wipe out the distinction?
                        // On Linux we need to be able to convert the output from UTF32 to UTF16.
                        eFormEngineOut = eOutEncodingForm;
#endif
                    }
                }
            }

            int    nItems = 0, nCharsLen = 0;
            char[] caOut = null;
            switch (eFormEngineOut)
            {
            case EncodingForm.LegacyBytes:
            case EncodingForm.UTF8Bytes:
            {
                if (eFormEngineOut == EncodingForm.LegacyBytes)
                {
                    DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyBytes)...", ref bDebugDisplayMode);
                }
                else
                {
                    DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8Bytes)...", ref bDebugDisplayMode);
                }

                // stuff the returned 'bytes' into the BSTR as narrow characters rather than
                //  converting to wide
                nItems    = nOutLen;
                nCharsLen = (nOutLen + 1) / 2;
                caOut     = new char[nCharsLen];
                ByteArrToCharArr(baOut, caOut);
                break;
            }

            case EncodingForm.LegacyString:
            {
                DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyString)...", ref bDebugDisplayMode);

                nCharsLen = nItems = nOutLen;

#if __MonoCS__
                // Narrowizing by code page 0 doesn't seem to be what we want on Linux.
                // Treating it as a symbol font or stripping off the low byte works better.
                if (nCodePageOut == 0)
                {
                    caOut = BruteForceWiden(nCodePageOut, baOut, nCharsLen);
                }
                else
#else
                if (true)
#endif
                {
                    try
                    {
                        // this will throw (for some reason) when doing symbol fonts
                        //  (apparently, CP_SYMBOL is no longer supported).
                        caOut = Encoding.GetEncoding(nCodePageOut).GetChars(baOut);
                    }
                    catch
                    {
                        if ((nCodePageOut == EncConverters.cnSymbolFontCodePage) || (nCodePageOut == EncConverters.cnIso8859_1CodePage))
                        {
                            caOut = BruteForceWiden(nCodePageOut, baOut, nCharsLen);
                        }
                        else
                        {
                            throw;
                        }
                    }
                }

                break;
            }

            case EncodingForm.UTF16:
            {
                nCharsLen = nItems = (nOutLen / 2);

                DisplayDebugUCharValues(baOut, "Received (UTF16) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode);

                caOut = Encoding.Unicode.GetChars(baOut);
                break;
            }

            case EncodingForm.UTF8String:
            {
                DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8String)...", ref bDebugDisplayMode);

                // this encoding form is always encoded using the default code page.
                caOut = Encoding.Default.GetChars(baOut);

                nCharsLen = nItems = nOutLen;
                break;
            }

            case (EncodingForm)CCUnicode8:
            {
                DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode);

                caOut = Encoding.UTF8.GetChars(baOut);

                nCharsLen = nItems = caOut.Length;
                break;
            }

            case EncodingForm.UTF16BE:
            case EncodingForm.UTF32:
            case EncodingForm.UTF32BE:
            {
                nCharsLen = nItems = nOutLen / 2;

                DisplayDebugUCharValues(baOut, "Received (UTF16BE/32/32BE) back from Converter/DLL...", ref bDebugDisplayMode);

                caOut = new char[nCharsLen];
                ByteArrToCharArr(baOut, caOut);

                // for UTF32, it is half again as little in the item count.
                if (eFormEngineOut != EncodingForm.UTF16BE)
                {
                    nItems /= 2;
                }
                break;
            }

            default:
                EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported);
                break;
            }

#if !v22_AllowEmptyReturn
            if ((nCharsLen <= 0)
#if DEBUG
                || (nCharsLen != caOut.Length)
#endif
                )
            {
                EncConverters.ThrowError(ErrStatus.NoReturnDataBadOutForm);
            }
#endif

            // check to see if the engine handled the given output form. If not, then ask
            //  TEC to do the conversion for us so that all engines can handle all possible
            //  output encoding forms (e.g. caller requested utf32, but above CC could only
            //  give us utf16/8)
            // Also, if the caller wanted something other than "None" for the eNormalizeOutput,
            //  then we also have to call TEC for that as well (but I think this only makes
            //  sense if the output is utf16(be) or utf32(be))
            // p.s. if this had been a TEC converter, then the eNormalizeOutput flag would
            //  ahready have been reset to None (by this point), since we would have directly
            //  requested that normalized form when we created the converter--see
            //  TecEncConverter.PreConvert)
            string strOutput = new string(caOut);
#if DEBUG
            byte[] byteArray = Encoding.BigEndianUnicode.GetBytes(caOut);
            Util.DebugWriteLine(className, Util.getDisplayBytes("characters", byteArray));
            byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(strOutput);
            Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized strOutput in UTF16BE", baResult));
#endif
            if ((eFormEngineOut != eOutEncodingForm) ||
                (eNormalizeOutput != NormalizeFlags.None))
            {
                strOutput = EncConverters.UnicodeEncodingFormConvertEx(strOutput, eFormEngineOut, nItems, eOutEncodingForm, eNormalizeOutput, out nItems);
            }

            DisplayDebugUCharValues(strOutput, "Returning back to client...", ref bDebugDisplayMode);

            rciOutput = nItems;
            return(strOutput);
        }