Exemple #1
0
        protected override string InternalConvert
        (
            EncodingForm eInEncodingForm,
            string sInput,
            EncodingForm eOutEncodingForm,
            NormalizeFlags eNormalizeOutput,
            bool bForward
        )
        {
            // this routine is only called by one of the 'implicit' methods (e.g.
            //  ConvertToUnicode). For these "COM" standard methods, the length of the
            //  string is specified by the BSTR itself and always/only supports UTF-16-like
            //  (i.e. wide) data. So, pass 0 so that the function will determine the length
            //  from the BSTR itself (just in case the user happens to have a value of 0 in
            //  the data (i.e. it won't necessarily be null terminated... don't ask...
            Int32 iOutput = 0;

            return(InternalConvertEx
                   (
                       eInEncodingForm,
                       sInput,
                       0,
                       eOutEncodingForm,
                       eNormalizeOutput,
                       out iOutput,
                       bForward
                   ));
        }
Exemple #2
0
        protected override void PreConvert
        (
            EncodingForm eInEncodingForm,
            ref EncodingForm eInFormEngine,
            EncodingForm eOutEncodingForm,
            ref EncodingForm eOutFormEngine,
            ref NormalizeFlags eNormalizeOutput,
            bool bForward
        )
        {
            // let the base class do it's thing first
            base.PreConvert(eInEncodingForm, ref eInFormEngine,
                            eOutEncodingForm, ref eOutFormEngine,
                            ref eNormalizeOutput, bForward);

            eInFormEngine  = EncodingForm.LegacyBytes;
            eOutFormEngine = EncodingForm.LegacyBytes;

            // do the load at this point.
            m_bForward = bForward;              // keep track so we can see during DoConvert
            if (m_bForward)
            {
                LoadForward();
            }
            else
            {
                LoadReverse();
            }
        }
Exemple #3
0
        // Since each sub-class has to do basic input/output encoding format processing, they
        //  should all mostly come thru this and the next functions.
        protected virtual string InternalConvert
        (
            EncodingForm eInEncodingForm,
            string sInput,
            EncodingForm eOutEncodingForm,
            NormalizeFlags eNormalizeOutput,
            bool bForward
        )
        {
            Util.DebugWriteLine(className, "BEGIN");
            // this routine is only called by one of the 'implicit' methods (e.g.
            //  ConvertToUnicode). For these "COM standard" methods, the length of the string
            //  is specified by the BSTR itself and always/only supports UTF-16-like (i.e. wide)
            //  data. So, pass 0 so that the function will determine the length from the BSTR
            //  itself (just in case the user happens to have a value of 0 in the data (i.e.
            //  it won't necessarily be null terminated...
            int ciOutput = 0;

            return(InternalConvertEx
                   (
                       eInEncodingForm,
                       sInput,
                       0,
                       eOutEncodingForm,
                       eNormalizeOutput,
                       out ciOutput,
                       bForward
                   ));
        }
Exemple #4
0
        protected override void PreConvert
        (
            EncodingForm eInEncodingForm,
            ref EncodingForm eInFormEngine,
            EncodingForm eOutEncodingForm,
            ref EncodingForm eOutFormEngine,
            ref NormalizeFlags eNormalizeOutput,
            bool bForward
        )
        {
            // let the base class do it's thing first
            base.PreConvert(eInEncodingForm, ref eInFormEngine,
                            eOutEncodingForm, ref eOutFormEngine,
                            ref eNormalizeOutput, bForward);

            // this converter only deals with 'String' flavors, so if it's
            //  Unicode_to(_from)_Unicode, then we expect UTF-16 and if it's
            //  Legacy_to(_from)_Legacy, then we expect LegacyString
            if (m_bLegacy)
            {
                eInFormEngine = eOutFormEngine = EncodingForm.LegacyString;
            }
            else
            {
                eInFormEngine = eOutFormEngine = EncodingForm.UTF16;
            }

            // the bForward that comes here might be different from the IEncConverter->DirectionForward
            //  (if it came in from a call to ConvertEx), so use *this* value to determine the direction
            //  for the forthcoming conversion (DoConvert).
            m_bReverseLookup = !bForward;

            // check to see if the file(s) need to be (re-)loaded at this point.
            Load();
        }
        protected override void PreConvert
        (
            EncodingForm eInEncodingForm,
            ref EncodingForm eInFormEngine,
            EncodingForm eOutEncodingForm,
            ref EncodingForm eOutFormEngine,
            ref NormalizeFlags eNormalizeOutput,
            bool bForward
        )
        {
            // let the base class do its thing first
            base.PreConvert(eInEncodingForm, ref eInFormEngine,
                            eOutEncodingForm, ref eOutFormEngine,
                            ref eNormalizeOutput, bForward);

            if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode)
            {
                // We could use UTF-8 here, but wide data works just fine.
                // the windows version definitely needs UTF16, but for some reason _MSC_VER is not defined on windows (as I think Jim is expecting)
                //  so I'll use the opposite logic of 'if not mono'...
#if !__MonoCS__ // _MSC_VER
                Util.DebugWriteLine(this, "eInFormEngine UTF16");
                eInFormEngine = EncodingForm.UTF16;
#else
                Util.DebugWriteLine(this, "eInFormEngine UTF32");
                eInFormEngine = EncodingForm.UTF32;
#endif
            }
            else
            {
                // legacy
                Util.DebugWriteLine(this, "eInFormEngine LegacyBytes");
                eInFormEngine = EncodingForm.LegacyBytes;
            }

            if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode)
            {
#if !__MonoCS__ // _MSC_VER
                Util.DebugWriteLine(this, "eOutFormEngine UTF16");
                eOutFormEngine = EncodingForm.UTF16;
#else
                Util.DebugWriteLine(this, "eOutFormEngine UTF32");
                eOutFormEngine = EncodingForm.UTF32;
#endif
            }
            else
            {
                Util.DebugWriteLine(this, "eOutFormEngine LegacyBytes");
                eOutFormEngine = EncodingForm.LegacyBytes;
            }

            // do the load at this point
            Load();

            // then do the C++ encoding form settings
            CppPreConvert((int)eInFormEngine,
                          (int)eOutFormEngine,
                          (int)eNormalizeOutput, bForward);
        }
Exemple #6
0
        // we override this method from EncConverter so that we can call all of the step's
        //  convert functions in turn (i.e. for this one, it isn't sufficient to just
        //  provide a "DoConvert" method)
        // and we override this from CmpdEncConverter to we can add our bit of only calling
        //  the 2nd step (i.e. the fallback converter) if the 1st step doesn't change the
        //  string.
        protected override string InternalConvertEx
        (
            EncodingForm eInEncodingForm,
            string sInput,
            int ciInput,
            EncodingForm eOutEncodingForm,
            NormalizeFlags eNormalizeOutput,
            out int rciOutput,
            bool bForward
        )
        {
            if (CountConverters != 2)
            {
                EncConverters.ThrowError(ErrStatus.FallbackTwoStepsRequired);
            }

            IEncConverter rConverter = (IEncConverter)m_aEncConverter[0];

            if (rConverter == null)
            {
                EncConverters.ThrowError(ErrStatus.MissingConverter);
            }

            rConverter.Debug = Debug;
            bool bDirectionForward = (bForward) ? (bool)m_aDirectionForward[0] : !(bool)m_aDirectionForward[0];

            string strOutput = rConverter.ConvertEx(
                sInput,
                eInEncodingForm,
                ciInput,
                eOutEncodingForm,
                out rciOutput,
                eNormalizeOutput,
                bDirectionForward);

            // call the fallback if the string wasn't changed
            if (strOutput == sInput)
            {
                IEncConverter rFallbackConverter = (IEncConverter)m_aEncConverter[1];
                if (rFallbackConverter == null)
                {
                    EncConverters.ThrowError(ErrStatus.MissingConverter);
                }

                rFallbackConverter.Debug = Debug;
                bDirectionForward        = (bForward) ? (bool)m_aDirectionForward[1] : !(bool)m_aDirectionForward[1];

                strOutput = rFallbackConverter.ConvertEx(
                    sInput,
                    eInEncodingForm,
                    ciInput,
                    eOutEncodingForm,
                    out rciOutput,
                    eNormalizeOutput,
                    bDirectionForward);
            }

            return(strOutput);
        }
Exemple #7
0
        protected override unsafe void PreConvert(
            EncodingForm eInEncodingForm,
            ref EncodingForm eInFormEngine,
            EncodingForm eOutEncodingForm,
            ref EncodingForm eOutFormEngine,
            ref NormalizeFlags eNormalizeOutput,
            bool bForward)
        {
            // let the base class do it's thing first
            base.PreConvert(eInEncodingForm, ref eInFormEngine,
                            eOutEncodingForm, ref eOutFormEngine,
                            ref eNormalizeOutput, bForward);

            if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode)
            {
#if __MonoCS__
                // returning this value will cause the input Unicode data (of any form,
                // UTF16, BE, etc.) to be converted to UTF8 narrow bytes before calling
                // DoConvert.
                eInFormEngine = EncodingForm.UTF8Bytes;
#else
                eInFormEngine = EncodingForm.UTF16;
#endif
            }
            else
            {
                // legacy
                eInFormEngine = EncodingForm.LegacyBytes;
            }

            if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode)
            {
#if __MonoCS__
                eOutFormEngine = EncodingForm.UTF8Bytes;
#else
                eOutFormEngine = EncodingForm.UTF16;
#endif
            }
            else
            {
                eOutFormEngine = EncodingForm.LegacyBytes;
            }

            // do the load at this point.
            Load(ConverterIdentifier);

            // Finally, let the C++ code do its thing.
            int encInForm    = (int)eInEncodingForm;
            int encInEngine  = (int)eInFormEngine;
            int encOutForm   = (int)eOutEncodingForm;
            int encOutEngine = (int)eOutFormEngine;
            int normOutput   = (int)eNormalizeOutput;
            CppPreconvert(encInForm, ref encInEngine, encOutForm, ref encOutEngine,
                          ref normOutput, bForward, 0);
            eInFormEngine    = (EncodingForm)encInEngine;
            eOutFormEngine   = (EncodingForm)encOutEngine;
            eNormalizeOutput = (NormalizeFlags)normOutput;
        }
Exemple #8
0
        protected void CheckInitEncForms
        (
            bool bForward,
            ref EncodingForm eInEncodingForm,
            ref EncodingForm eOutEncodingForm
        )
        {
            // if the user hasn't specified, then take the default case for the ConversionType:
            //  if L/RHS == eLegacy, then LegacyString
            //  if L/RHS == eUnicode, then UTF16
            if (eInEncodingForm == EncodingForm.Unspecified)
            {
                NormConversionType eType;
                if (bForward)
                {
                    eType = NormalizeLhsConversionType(m_eConversionType);
                }
                else
                {
                    eType = NormalizeRhsConversionType(m_eConversionType);
                }

                if (eType == NormConversionType.eLegacy)
                {
                    eInEncodingForm = EncodingForm.LegacyString;
                }
                else                 // eUnicode
                {
                    eInEncodingForm = DefaultUnicodeEncForm(bForward, true);
                }
            }

            // do the same for the output form
            if (eOutEncodingForm == EncodingForm.Unspecified)
            {
                NormConversionType eType;
                if (bForward)
                {
                    eType = NormalizeRhsConversionType(m_eConversionType);
                }
                else
                {
                    eType = NormalizeLhsConversionType(m_eConversionType);
                }

                if (eType == NormConversionType.eLegacy)
                {
                    eOutEncodingForm = EncodingForm.LegacyString;
                }
                else                 // eUnicode
                {
                    eOutEncodingForm = DefaultUnicodeEncForm(bForward, false);
                }
            }

            CheckForBadForm(bForward, eInEncodingForm, eOutEncodingForm);
        }
Exemple #9
0
        protected override void PreConvert
        (
            EncodingForm eInEncodingForm,
            ref EncodingForm eInFormEngine,
            EncodingForm eOutEncodingForm,
            ref EncodingForm eOutFormEngine,
            ref NormalizeFlags eNormalizeOutput,
            bool bForward
        )
        {
            // let the base class do its thing first
            base.PreConvert(eInEncodingForm, ref eInFormEngine,
                            eOutEncodingForm, ref eOutFormEngine,
                            ref eNormalizeOutput, bForward);

            if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode)
            {
                if (Util.IsUnix)
                {
                    // returning this value will cause the input Unicode data (of any form, UTF16, BE, etc.)
                    //	to be converted to UTF8 narrow bytes before calling DoConvert.
                    eInFormEngine = EncodingForm.UTF8Bytes;
                }
                else
                {
                    eInFormEngine = EncodingForm.UTF16;
                }
            }
            else
            {
                // legacy
                eInFormEngine = EncodingForm.LegacyBytes;
            }

            // Output will be stored in a typical C# string, so eOutFormEngine will be UTF16,
            // even though the Perl script is writing UTF8 bytes to output.
            if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode)
            {
                if (Util.IsUnix)
                {
                    eOutFormEngine = EncodingForm.UTF8Bytes;
                }
                else
                {
                    eOutFormEngine = EncodingForm.UTF16;
                }
            }
            else
            {
                eOutFormEngine = EncodingForm.LegacyBytes;
            }

            // do the load at this point.
            Load(ConverterIdentifier);
        }
        protected override void PreConvert
        (
            EncodingForm eInEncodingForm,
            ref EncodingForm eInFormEngine,
            EncodingForm eOutEncodingForm,
            ref EncodingForm eOutFormEngine,
            ref NormalizeFlags eNormalizeOutput,
            bool bForward
        )
        {
            // let the base class do it's thing first
            base.PreConvert(eInEncodingForm, ref eInFormEngine,
                            eOutEncodingForm, ref eOutFormEngine,
                            ref eNormalizeOutput, bForward);

            // we have to know what the forward flag state is (and we can't use m_bForward because
            //  that might be different (e.g. if this was called from ConvertEx).
            m_bToWide = bForward;
            if (!IsLegacyFormat(eInEncodingForm) && IsLegacyFormat(eOutEncodingForm))
            {
                m_bToWide = !bForward;
            }

            // check if this is the special UTF8 code page, and if so, request that the engine
            //  form be UTF8Bytes (this is the one code page converter where both sides are
            //  Unicode.
            if (m_bToWide)
            {
                // going "to wide" means the output form required by the engine is UTF16.
                eOutFormEngine = EncodingForm.UTF16;

                if (m_nCodePage == CP_UTF8)
                {
                    eInFormEngine = EncodingForm.UTF8Bytes;
                }
                else
                {
                    eInFormEngine = EncodingForm.LegacyBytes;
                }
            }
            else
            {
                // going "from wide" means the input form required by the engine is UTF16.
                eInFormEngine = EncodingForm.UTF16;

                if (m_nCodePage == CP_UTF8)
                {
                    eOutFormEngine = EncodingForm.UTF8Bytes;
                }
                else if (IsLegacyFormat(eOutEncodingForm))
                {
                    eOutFormEngine = EncodingForm.LegacyString;
                }
            }
        }
        protected override void PreConvert(EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward)
        {
            base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward);

            _bForward = bForward;

            if (!IsLoaded)
            {
                Load();
            }
        }
		// we override this method from EncConverter so that we can call all of the step's
		//  convert functions in turn (i.e. for this one, it isn't sufficient to just
		//  provide a "DoConvert" method)
		// and we override this from CmpdEncConverter to we can add our bit of only calling
		//  the 2nd step (i.e. the fallback converter) if the 1st step doesn't change the
		//  string.
		protected override string InternalConvertEx
			(
			EncodingForm    eInEncodingForm,
			string			sInput,
			int             ciInput,
			EncodingForm    eOutEncodingForm,
			NormalizeFlags  eNormalizeOutput,
			out int         rciOutput,
			bool            bForward
			)
		{
			if( CountConverters != 2 )
				EncConverters.ThrowError(ErrStatus.FallbackTwoStepsRequired);

			IEncConverter rConverter = (IEncConverter)m_aEncConverter[0];
			if (rConverter == null)
				EncConverters.ThrowError(ErrStatus.MissingConverter);

			rConverter.Debug = Debug;
			bool bDirectionForward = (bForward) ? (bool)m_aDirectionForward[0] : !(bool)m_aDirectionForward[0];

			string strOutput = rConverter.ConvertEx(
				sInput,
				eInEncodingForm,
				ciInput,
				eOutEncodingForm,
				out rciOutput,
				eNormalizeOutput,
				bDirectionForward);

			// call the fallback if the string wasn't changed
			if( strOutput == sInput )
			{
				IEncConverter rFallbackConverter = (IEncConverter)m_aEncConverter[1];
				if (rFallbackConverter == null)
					EncConverters.ThrowError(ErrStatus.MissingConverter);

				rFallbackConverter.Debug = Debug;
				bDirectionForward = (bForward) ? (bool)m_aDirectionForward[1] : !(bool)m_aDirectionForward[1];

				strOutput = rFallbackConverter.ConvertEx(
					sInput,
					eInEncodingForm,
					ciInput,
					eOutEncodingForm,
					out rciOutput,
					eNormalizeOutput,
					bDirectionForward);
			}

			return strOutput;
		}
Exemple #13
0
		protected bool          m_bIsInRepository;      // indicates whether this converter is in the static repository (true) or not (false)
		#endregion Member Variable Definitions

		#region Public Interface
		/// <summary>
		/// The class constructor. </summary>
		public EncConverter(string sProgId, string sImplementType)
		{
			m_strProgramID = sProgId;
			m_strImplementType = sImplementType;
			m_lProcessType = (Int32)ProcessTypeFlags.DontKnow;
			m_eConversionType = ConvType.Legacy_to_from_Unicode;
			m_bForward = true;
			m_eEncodingInput = EncodingForm.Unspecified;
			m_eEncodingOutput = EncodingForm.Unspecified;
			m_eNormalizeOutput = NormalizeFlags.None;
			m_nCodePageInput = 0;
			m_nCodePageOutput = 0;
			m_bDebugDisplayMode = false;
			m_bIsInRepository = false;
		}
Exemple #14
0
        protected bool m_bIsInRepository;                       // indicates whether this converter is in the static repository (true) or not (false)
        #endregion Member Variable Definitions

        #region Public Interface
        /// <summary>
        /// The class constructor. </summary>
        public EncConverter(string sProgId, string sImplementType)
        {
            m_strProgramID      = sProgId;
            m_strImplementType  = sImplementType;
            m_lProcessType      = (Int32)ProcessTypeFlags.DontKnow;
            m_eConversionType   = ConvType.Legacy_to_from_Unicode;
            m_bForward          = true;
            m_eEncodingInput    = EncodingForm.Unspecified;
            m_eEncodingOutput   = EncodingForm.Unspecified;
            m_eNormalizeOutput  = NormalizeFlags.None;
            m_nCodePageInput    = 0;
            m_nCodePageOutput   = 0;
            m_bDebugDisplayMode = false;
            m_bIsInRepository   = false;
        }
Exemple #15
0
 protected virtual void PreConvert
 (
     EncodingForm eInEncodingForm,
     ref EncodingForm eInFormEngine,
     EncodingForm eOutEncodingForm,
     ref EncodingForm eOutFormEngine,
     ref NormalizeFlags eNormalizeOutput,
     bool bForward
 )
 {
     // by default, the form it comes in is okay for the engine (never really true, so
     //	each engine's COM wrapper must override this; but this is here to see what you
     //	must do). For example, for CC, the input must be UTF8Bytes for Unicode, so
     //	you'd set the eInFormEngine to UTF8Bytes.
     eInFormEngine  = eInEncodingForm;
     eOutFormEngine = eOutEncodingForm;
 }
        protected override void PreConvert
        (
            EncodingForm eInEncodingForm,
            ref EncodingForm eInFormEngine,
            EncodingForm eOutEncodingForm,
            ref EncodingForm eOutFormEngine,
            ref NormalizeFlags eNormalizeOutput,
            bool bForward
        )
        {
            // let the base class do it's thing first
            base.PreConvert(eInEncodingForm, ref eInFormEngine,
                            eOutEncodingForm, ref eOutFormEngine,
                            ref eNormalizeOutput, bForward);

            // The CC DLL (conversion engine) usually works in UTF8 for Unicode. As a future
            //	enhancement, it might be possible to get a (marked) value from the repository
            //	telling us what form to use (which would be UTF8Bytes by default and could be
            //	something else if the user developed a UTF32 cc table--using the xYYYY syntax
            //	rather than the uXXXX syntax). But for now, assume that all CC tables that
            //	use Unicode want UTF8.
            if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode)
            {
                // returning this value will cause the input Unicode data (of any form, UTF16, BE, etc.)
                //	to be converted to UTF8 narrow bytes before calling DoConvert.
                eInFormEngine = EncodingForm.UTF8Bytes;
            }
            else
            {
                // legacy
                eInFormEngine = EncodingForm.LegacyBytes;
            }

            if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode)
            {
                eOutFormEngine = EncodingForm.UTF8Bytes;
            }
            else
            {
                eOutFormEngine = EncodingForm.LegacyBytes;
            }

            // do the load at this point.
            Load(ConverterIdentifier);
        }
Exemple #17
0
        protected override void PreConvert
        (
            EncodingForm eInEncodingForm,
            ref EncodingForm eInFormEngine,
            EncodingForm eOutEncodingForm,
            ref EncodingForm eOutFormEngine,
            ref NormalizeFlags eNormalizeOutput,
            bool bForward
        )
        {
            // let the base class do its thing first
            base.PreConvert(eInEncodingForm, ref eInFormEngine,
                            eOutEncodingForm, ref eOutFormEngine,
                            ref eNormalizeOutput, bForward);

            // do the load at this point.
            Load(ConverterIdentifier);
        }
Exemple #18
0
		// This function is the meat of the conversion process. It is really long, which
		//	normally wouldn't be a virtue (especially as an "in-line" function), but in an
		//	effort to save memory fragmentation by using stack memory to buffer the input
		//	and output data, I'm using the alloca memory allocation function. Because of this
		//	it can't be allocated in some subroutine and returned to a calling program (or the
		//	stack will have erased them), so it has to be one big fat long function...
		//	The basic structure is:
		//
		//	o	Check Input Data
		//	o	Give the sub-class (via PreConvert) the opportunity to load tables and do
		//		any special preprocessing it needs to ahead of the actual conversion
		//	o	Possibly call the TECkit COM interface to convert Unicode flavors that the
		//		engine (for this conversion) might not support (indicated via PreConvert)
		//	o	Normalize the input data to a byte array based on it's input EncodingForm
		//	o		Allocate (on the stack) a buffer for the output data (min 10000 bytes)
		//	o		Call the subclass (via DoConvert) to do the actual conversion.
		//	o	Normalize the output data to match the requested output EncodingForm (including
		//		possibly calling the TECkit COM interface).
		//	o	Return the resultant BSTR and size of items to the output pointer variables.
		//
		protected virtual unsafe string InternalConvertEx
			(
			EncodingForm    eInEncodingForm,
			string			sInput,
			int             ciInput,
			EncodingForm    eOutEncodingForm,
			NormalizeFlags  eNormalizeOutput,
			out int         rciOutput,
			bool            bForward
			)
		{
			if( sInput == null )
				EncConverters.ThrowError(ErrStatus.IncompleteChar);

			// if the user hasn't specified, then take the default case for the ConversionType:
			//  if L/RHS == eLegacy, then LegacyString
			//  if L/RHS == eUnicode, then UTF16
			CheckInitEncForms
				(
				bForward,
				ref eInEncodingForm,
				ref eOutEncodingForm
				);

			// allow the converter engine's (and/or its COM wrapper) to do some preprocessing.
			EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified;
			PreConvert
				(
				eInEncodingForm,	// [in] form in the BSTR
				ref eFormEngineIn,		// [out] form the conversion engine wants, etc.
				eOutEncodingForm,
				ref eFormEngineOut,
				ref eNormalizeOutput,
				bForward
				);

			// get enough space for us to normalize the input data (6x ought to be enough)
			int nBufSize = sInput.Length * 6;
			byte[] abyInBuffer = new byte[nBufSize];
			fixed (byte* lpInBuffer = abyInBuffer)
			{
				// use a helper class to normalize the data to the format needed by the engine
				ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm,
					((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer,
					ref nBufSize, ref m_bDebugDisplayMode);

				// get some space for the converter to fill with, but since this is allocated
				//  on the stack, don't muck around; get 10000 bytes for it.
				int nOutLen = Math.Max(10000, nBufSize * 6);
				byte[] abyOutBuffer = new byte[nOutLen];
				fixed (byte* lpOutBuffer = abyOutBuffer)
				{
					lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0;

					// call the wrapper sub-classes' DoConvert to let them do it.
					DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen);

					return ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm,
						((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput,
						out rciOutput, ref m_bDebugDisplayMode);
				}
			}
		}
Exemple #19
0
 // [DispId(18)]
 public virtual string ConvertEx(string sInput, EncodingForm inEnc, int ciInput, EncodingForm outEnc, out int ciOutput, NormalizeFlags eNormalizeOutput, bool bForward)
 {
     return(InternalConvertEx(inEnc, sInput, ciInput, outEnc, eNormalizeOutput, out ciOutput, bForward));
 }
Exemple #20
0
		protected virtual void PreConvert
			(
			EncodingForm        eInEncodingForm,
			ref EncodingForm    eInFormEngine,
			EncodingForm        eOutEncodingForm,
			ref EncodingForm    eOutFormEngine,
			ref NormalizeFlags  eNormalizeOutput,
			bool                bForward
			)
		{
			// by default, the form it comes in is okay for the engine (never really true, so
			//	each engine's COM wrapper must override this; but this is here to see what you
			//	must do). For example, for CC, the input must be UTF8Bytes for Unicode, so
			//	you'd set the eInFormEngine to UTF8Bytes.
			eInFormEngine = eInEncodingForm;
			eOutFormEngine = eOutEncodingForm;
		}
Exemple #21
0
		protected override void PreConvert
			(
			EncodingForm        eInEncodingForm,
			ref EncodingForm    eInFormEngine,
			EncodingForm        eOutEncodingForm,
			ref EncodingForm    eOutFormEngine,
			ref NormalizeFlags  eNormalizeOutput,
			bool                bForward
			)
		{
			// let the base class do it's thing first
			base.PreConvert( eInEncodingForm, ref eInFormEngine,
				eOutEncodingForm, ref eOutFormEngine,
				ref eNormalizeOutput, bForward);

			// we have to know what the forward flag state is (and we can't use m_bForward because
			//	that might be different (e.g. if this was called from ConvertEx).
			m_bToWide = bForward;

			// check if this is the special UTF8 code page, and if so, request that the engine
			//	form be UTF8Bytes (this is the one code page converter where both sides are
			//	Unicode.
			if( m_bToWide )
			{
				// going "to wide" means the output form required by the engine is UTF16.
				eOutFormEngine = EncodingForm.UTF16;

				if( m_nCodePage == CP_UTF8 )
					eInFormEngine = EncodingForm.UTF8Bytes;
			}
			else
			{
				// going "from wide" means the input form required by the engine is UTF16.
				eInFormEngine = EncodingForm.UTF16;

				if( m_nCodePage == CP_UTF8 )
					eOutFormEngine = EncodingForm.UTF8Bytes;
			}
		}
Exemple #22
0
		// this is the helper method that returns the input data normalized
		internal static unsafe byte* GetBytes(string strInput, int cnCountIn, EncodingForm eEncFormIn, int nCodePageIn, EncodingForm eFormEngineIn, byte* pBuf, ref int nBufSize, ref bool bDebugDisplayMode)
		{
			// if the form the user gave is not what the engine wants (and it isn't legacy
			//  since legacy forms are already handled later)...
			if ((eEncFormIn != eFormEngineIn) && !EncConverter.IsLegacyFormat(eEncFormIn))
			{
				// we can do some of the conversions ourself. For example, if the input form
				//	is UTF16 and the desired form is UTF8, then simply use CCUnicode8 below
				if ((eEncFormIn == EncodingForm.UTF16) && (eFormEngineIn == EncodingForm.UTF8Bytes))
				{
					eEncFormIn = (EncodingForm)CCUnicode8;
				}
				// we can also do the following one
				else if ((eEncFormIn == EncodingForm.UTF8String) && (eFormEngineIn == EncodingForm.UTF8Bytes))
				{
					; // i.e. don't have TECkit do this one...
				}
				else
				{
					strInput = EncConverters.UnicodeEncodingFormConvertEx(strInput, eEncFormIn, cnCountIn, eFormEngineIn, NormalizeFlags.None, out cnCountIn);
					eEncFormIn = eFormEngineIn;
				}
			}

			int nInLen = 0;
			switch (eEncFormIn)
			{
				case EncodingForm.LegacyBytes:
				case EncodingForm.UTF8Bytes:
					{
						if (cnCountIn != 0)
							nInLen = cnCountIn; // item count should be the number of bytes directly.
						else
							// if the user didn't give the length (i.e. via ConvertEx), then get it
							//	from the BSTR length. nInLen will be the # of bytes.
							nInLen = strInput.Length * 2;

						// these forms are for C++ apps that want to use the BSTR to transfer
						//  bytes rather than OLECHARs.
						nInLen = StringToByteStar(strInput, pBuf, nInLen);

						if (eEncFormIn == EncodingForm.LegacyBytes)
							DisplayDebugCharValues(pBuf, nInLen, "Received (LegacyBytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode);
						else
							DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Received (UTF8Bytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode);
						break;
					}
				case EncodingForm.LegacyString:
					{
						if (cnCountIn != 0)
							nInLen = cnCountIn;   // item count should be the number of bytes directly (after conversion below).
						else
							nInLen = strInput.Length; // the # of bytes will *be* the # of chars in the string after we're done.

						DisplayDebugUCharValues(strInput, "Received (LegacyString) from client...", ref bDebugDisplayMode);

						// use a code page converter to narrowize using the input string
						// (but the 'easier' Add method will send 0; if so, then
						//	fallback to the original method.
						byte[] ba = null;

						// first check if it's a symbol font (sometimes the user
						//	incorrectly sends a few spaces first, so check the
						//	first couple of bytes. If it is (and the code page is 0), then
						//  change the code page to be CP_SYMBOL
						if ((nCodePageIn == 0)
							&& (((strInput[0] & 0xF000) == 0xF000)
								|| ((strInput.Length > 1) && ((strInput[1] & 0xF000) == 0xF000))
								|| ((strInput.Length > 2) && ((strInput[2] & 0xF000) == 0xF000))
								)
							)
						{
							nCodePageIn = EncConverters.cnSymbolFontCodePage;
						}

						// if it's a symbol or iso-8859 encoding, then we can handle just
						//  taking the low byte (i.e. the catch case)
						if ((nCodePageIn == EncConverters.cnSymbolFontCodePage)
							|| (nCodePageIn == EncConverters.cnIso8859_1CodePage)
						)
						{
							try
							{
								Encoding enc = Encoding.GetEncoding(nCodePageIn);
								ba = enc.GetBytes(strInput);
							}
							catch
							{
								// for some reason, symbol fonts don't appear to be supported in
								//	.Net... Use cpIso8859 as the fallback
								// oops: cp8859 won't work for symbol data, so if GetBytes
								//  fails, just go back to stripping out the low byte as we had it
								//  originally. This'll work for both 8859 and symbol
								ba = new byte[nInLen];
								for (int i = 0; i < nInLen; i++)
									ba[i] = (byte)(strInput[i] & 0xFF);
							}
						}
						else
						{
							// otherwise, simply use CP_ACP (or the default code page) to
							//	narrowize it.
							Encoding enc = Encoding.GetEncoding(nCodePageIn);
							ba = enc.GetBytes(strInput);
						}

						// turn that byte array into a byte array...
						ByteArrToByteStar(ba, pBuf);

						if (cnCountIn != 0)
							nInLen = cnCountIn; // item count should be the number of bytes directly.
						else
							// if the user didn't give the length (i.e. via ConvertEx), then get it
							//	from the BSTR length. nInLen will be the # of bytes.
							nInLen = ba.Length;

						DisplayDebugCharValues(pBuf, nInLen, "Sending (LegacyBytes) to Converter/DLL...", ref bDebugDisplayMode);
						break;
					}
				// this following form *must* be widened UTF8 via the default code page
				case EncodingForm.UTF8String:
					{
						DisplayDebugUCharValues(strInput, "Received (UTF8String) from client...", ref bDebugDisplayMode);

						// use a code page converter to narrowize using the input string
						Encoding enc = Encoding.Default;
						byte[] ba = enc.GetBytes(strInput);

						// turn that byte array into a byte array...
						ByteArrToByteStar(ba, pBuf);

						if (cnCountIn != 0)
							nInLen = cnCountIn; // item count should be the number of bytes directly.
						else
							// if the user didn't give the length (i.e. via ConvertEx), then get it
							//	from the BSTR length. nInLen will be the # of bytes.
							nInLen = ba.Length;

						DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode);
						break;
					}
				// this is a special case for CC where the input was actually UTF16, but the
				//	CC DLL is expecting (usually) UTF8, so convert from UTF16->UTF8 narrow
				case (EncodingForm)CCUnicode8:
					{
						DisplayDebugUCharValues(strInput, "Received (UTF16) from client...", ref bDebugDisplayMode);

						UTF8Encoding enc = new UTF8Encoding();
						byte[] ba = enc.GetBytes(strInput);

						// turn that byte array into a byte array...
						ByteArrToByteStar(ba, pBuf);

						// since we've changed the format, we don't care how many UTF16 words came in
						nInLen = ba.Length;

						DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode);
						break;
					}
				case EncodingForm.UTF16:
					{
						if (cnCountIn != 0)
							nInLen = cnCountIn;   // item count should be the number of 16-bit words directly
						else
							nInLen = strInput.Length;

						DisplayDebugUCharValues(strInput, "Received (UTF16) from client and sending to Converter/DLL...", ref bDebugDisplayMode);

						// but this should be the count of bytes...
						nInLen *= 2;
						StringToByteStar(strInput, pBuf, nInLen);
						break;
					}
				case EncodingForm.UTF16BE:
				case EncodingForm.UTF32:
				case EncodingForm.UTF32BE:
					{
						if (cnCountIn != 0)
						{
							nInLen = cnCountIn; // item count is the number of Uni chars

							// for UTF32, the converter's actually expecting the length to be twice
							//	this much again.
							if (eEncFormIn != EncodingForm.UTF16BE)
								nInLen *= 2;
						}
						else
						{
							nInLen = strInput.Length;
						}

						DisplayDebugUCharValues(pBuf, nInLen, "Received (UTF16BE/32/32BE) from client/Sending to Converter/DLL...", ref bDebugDisplayMode);

						// for the byte count, double it (possibly again)
						nInLen *= 2;
						StringToByteStar(strInput, pBuf, nInLen);
						break;
					}

				default:
					EncConverters.ThrowError(ErrStatus.InEncFormNotSupported);
					break;
			}

			pBuf[nInLen] = pBuf[nInLen + 1] = pBuf[nInLen + 2] = pBuf[nInLen + 3] = 0;
			nBufSize = (int)nInLen;

			return pBuf;
		}
Exemple #23
0
 public static bool IsLegacyFormat(EncodingForm eForm)
 {
     return((eForm == EncodingForm.LegacyString) ||
            (eForm == EncodingForm.LegacyBytes));
 }
Exemple #24
0
        // This function is the meat of the conversion process. It is really long, which
        //	normally wouldn't be a virtue (especially as an "in-line" function), but in an
        //	effort to save memory fragmentation by using stack memory to buffer the input
        //	and output data, I'm using the alloca memory allocation function. Because of this
        //	it can't be allocated in some subroutine and returned to a calling program (or the
        //	stack will have erased them), so it has to be one big fat long function...
        //	The basic structure is:
        //
        //	o	Check Input Data
        //  o   Give the sub-class (via PreConvert) the opportunity to load tables and do
        //      any special preprocessing it needs to ahead of the actual conversion
        //  o   Possibly call the TECkit COM interface to convert Unicode flavors that the
        //      engine (for this conversion) might not support (indicated via PreConvert)
        //  o   Normalize the input data to a byte array based on it's input EncodingForm
        //  o       Allocate (on the stack) a buffer for the output data (min 10000 bytes)
        //  o       Call the subclass (via DoConvert) to do the actual conversion.
        //  o   Normalize the output data to match the requested output EncodingForm (including
        //      possibly calling the TECkit COM interface).
        //  o   Return the resultant BSTR and size of items to the output pointer variables.
        //
        protected virtual unsafe string InternalConvertEx
        (
            EncodingForm eInEncodingForm,
            string sInput,
            int ciInput,
            EncodingForm eOutEncodingForm,
            NormalizeFlags eNormalizeOutput,
            out int rciOutput,
            bool bForward
        )
        {
            Util.DebugWriteLine(className, "BEGIN");
            Util.DebugWriteLine(className,
                                "eEncFormIn " + eInEncodingForm.ToString() + ", " +
                                "eEncFormOut " + eOutEncodingForm.ToString());
            if (sInput == null)
            {
                EncConverters.ThrowError(ErrStatus.IncompleteChar);
            }
            if (sInput.Length == 0)
            {
                rciOutput = 0;
                return("");
            }

#if DEBUG && __MonoCS__
// for debugging only BEGIN
            //byte[] baIn = System.Text.Encoding.UTF8.GetBytes(sInput);            // works
            byte[] baIn = System.Text.Encoding.BigEndianUnicode.GetBytes(sInput);  // easier to read
            Util.DebugWriteLine(className, Util.getDisplayBytes("Input BigEndianUnicode", baIn));
            baIn = System.Text.Encoding.Unicode.GetBytes(sInput);
            Util.DebugWriteLine(className, Util.getDisplayBytes("Input Unicode", baIn));

            int     nInLen = sInput.Length;
            byte [] baIn2  = new byte[nInLen];
            for (int i = 0; i < nInLen; i++)
            {
                baIn2[i] = (byte)(sInput[i] & 0xFF);
            }
            Util.DebugWriteLine(className, Util.getDisplayBytes("Input Narrowized", baIn2));

/*
 *          System.Text.Encoding encFrom = System.Text.Encoding.GetEncoding(12000);
 *          System.Text.Encoding encTo   = System.Text.Encoding.UTF8;
 *
 *          // Perform the conversion from one encoding to the other.
 *          Util.DebugWriteLine(className, "Starting with " + baIn.Length.ToString() + " bytes.");
 *          byte[] baOut2 = System.Text.Encoding.Convert(encFrom, encTo, baIn);
 *          Util.DebugWriteLine(className, "Converted to " + baOut2.Length.ToString() + " bytes.");
 *          string resultString = System.Text.Encoding.Default.GetString(baOut2, 0, baOut2.Length);
 *          Util.DebugWriteLine(className, "Test output '" + resultString + "'");
 */
// for debugging only END
#endif

            // if the user hasn't specified, then take the default case for the ConversionType:
            //  if L/RHS == eLegacy, then LegacyString
            //  if L/RHS == eUnicode, then UTF16
            CheckInitEncForms
            (
                bForward,
                ref eInEncodingForm,
                ref eOutEncodingForm
            );

            // allow the converter engine's (and/or its COM wrapper) to do some preprocessing.
            EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified;
            PreConvert
            (
                eInEncodingForm,    // [in] form in the BSTR
                ref eFormEngineIn,  // [out] form the conversion engine wants, etc.
                eOutEncodingForm,
                ref eFormEngineOut,
                ref eNormalizeOutput,
                bForward
            );

            // get enough space for us to normalize the input data (6x ought to be enough)
            int    nBufSize    = sInput.Length * 6;
            byte[] abyInBuffer = new byte[nBufSize];
            fixed(byte *lpInBuffer = abyInBuffer)
            {
                // use a helper class to normalize the data to the format needed by the engine
                Util.DebugWriteLine(className, "Calling GetBytes");
                ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm,
                                         ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer,
                                         ref nBufSize, ref m_bDebugDisplayMode);
#if DEBUG && __MonoCS__
                byte[] baOut = new byte[nBufSize];
                ECNormalizeData.ByteStarToByteArr(lpInBuffer, nBufSize, baOut);
                Util.DebugWriteLine(className, Util.getDisplayBytes("Input Bytes", baOut));
#endif

                // get some space for the converter to fill with, but since this is allocated
                //  on the stack, don't muck around; get 10000 bytes for it.
                int    nOutLen      = Math.Max(10000, nBufSize * 6);
                byte[] abyOutBuffer = new byte[nOutLen];
                fixed(byte *lpOutBuffer = abyOutBuffer)
                {
                    lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0;

                    // call the wrapper sub-classes' DoConvert to let them do it.
                    Util.DebugWriteLine(className, "Calling DoConvert");
                    DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen);
#if DEBUG && __MonoCS__
                    Util.DebugWriteLine(className, "Output length " + nOutLen.ToString());
                    byte[] baOut2 = new byte[nOutLen];
                    ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut2);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut2));
                    Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut2) + "'");
#endif
                    string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm,
                                                              ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput,
                                                              out rciOutput, ref m_bDebugDisplayMode);
#if DEBUG && __MonoCS__
                    Util.DebugWriteLine(className, "normalized result '" + result + "'");
                    byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult));
                    baResult = System.Text.Encoding.Unicode.GetBytes(result);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult));
                    baResult = System.Text.Encoding.UTF8.GetBytes(result);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult));
                    Util.DebugWriteLine(className, "Returning.");
#endif
                    return(result);
                }
            }
        }
        protected override void PreConvert
        (
            EncodingForm eInEncodingForm,
            ref EncodingForm eInFormEngine,
            EncodingForm eOutEncodingForm,
            ref EncodingForm eOutFormEngine,
            ref NormalizeFlags eNormalizeOutput,
            bool bForward
        )
        {
            // let the base class do its thing first
            base.PreConvert(eInEncodingForm, ref eInFormEngine,
                            eOutEncodingForm, ref eOutFormEngine,
                            ref eNormalizeOutput, bForward);

            if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode)
            {
                // We could use UTF-8 here, but wide data works just fine.
                // the Windows version definitely needs UTF16.
                if (Util.IsUnix)
                {
                    Util.DebugWriteLine(this, "eInFormEngine UTF32");
                    eInFormEngine = EncodingForm.UTF32;
                }
                else
                {
                    Util.DebugWriteLine(this, "eInFormEngine UTF16");
                    eInFormEngine = EncodingForm.UTF16;
                }
            }
            else
            {
                // legacy
                Util.DebugWriteLine(this, "eInFormEngine LegacyBytes");
                eInFormEngine = EncodingForm.LegacyBytes;
            }

            if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode)
            {
                if (Util.IsUnix)
                {
                    Util.DebugWriteLine(this, "eOutFormEngine UTF32");
                    eOutFormEngine = EncodingForm.UTF32;
                }
                else
                {
                    Util.DebugWriteLine(this, "eOutFormEngine UTF16");
                    eOutFormEngine = EncodingForm.UTF16;
                }
            }
            else
            {
                Util.DebugWriteLine(this, "eOutFormEngine LegacyBytes");
                eOutFormEngine = EncodingForm.LegacyBytes;
            }

            // do the load at this point
            Load();

            // then do the C++ encoding form settings
            CppPreConvert((int)eInFormEngine,
                          (int)eOutFormEngine,
                          (int)eNormalizeOutput, bForward);
        }
Exemple #26
0
		protected void CheckForBadForm
			(
			bool            bForward,
			EncodingForm    inEnc,
			EncodingForm    outEnc
			)
		{
			if( EncConverters.IsUnidirectional(m_eConversionType) && !bForward )
			{
				EncConverters.ThrowError(ErrStatus.InvalidConversionType);
			}
			else
			{
				bool bLhsUnicode = (NormalizeLhsConversionType(m_eConversionType) == NormConversionType.eUnicode);
				bool bRhsUnicode = (NormalizeRhsConversionType(m_eConversionType) == NormConversionType.eUnicode);
				if( bForward )
				{
					if( bLhsUnicode )
					{
						if( IsLegacyFormat(inEnc) )
							EncConverters.ThrowError(ErrStatus.InEncFormNotSupported);
					}
					else    // !bLhsUnicode
					{
						if( !IsLegacyFormat(inEnc) )
							EncConverters.ThrowError(ErrStatus.InEncFormNotSupported);
					}
					if( bRhsUnicode )
					{
						if( IsLegacyFormat(outEnc) )
							EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported);
					}
					else    // !bRhsUnicode
					{
						if( !IsLegacyFormat(outEnc) )
							EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported);
					}
				}
				else    // reverse
				{
					if( bLhsUnicode )
					{
						if( IsLegacyFormat(outEnc) )
							EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported);
					}
					else    // !bLhsUnicode
					{
						if( !IsLegacyFormat(outEnc) )
							EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported);
					}
					if( bRhsUnicode )
					{
						if( IsLegacyFormat(inEnc) )
							EncConverters.ThrowError(ErrStatus.InEncFormNotSupported);
					}
					else    // !bRhsUnicode
					{
						if( !IsLegacyFormat(inEnc) )
							EncConverters.ThrowError(ErrStatus.InEncFormNotSupported);
					}
				}
			}
		}
Exemple #27
0
        // This function is the meat of the conversion process. It is really long, which
        //	normally wouldn't be a virtue (especially as an "in-line" function), but in an
        //	effort to save memory fragmentation by using stack memory to buffer the input
        //	and output data, I'm using the alloca memory allocation function. Because of this
        //	it can't be allocated in some subroutine and returned to a calling program (or the
        //	stack will have erased them), so it has to be one big fat long function...
        //	The basic structure is:
        //
        //	o	Check Input Data
        //	o	Give the sub-class (via PreConvert) the opportunity to load tables and do
        //		any special preprocessing it needs to ahead of the actual conversion
        //	o	Possibly call the TECkit COM interface to convert Unicode flavors that the
        //		engine (for this conversion) might not support (indicated via PreConvert)
        //	o	Normalize the input data to a byte array based on it's input EncodingForm
        //	o		Allocate (on the stack) a buffer for the output data (min 10000 bytes)
        //	o		Call the subclass (via DoConvert) to do the actual conversion.
        //	o	Normalize the output data to match the requested output EncodingForm (including
        //		possibly calling the TECkit COM interface).
        //	o	Return the resultant BSTR and size of items to the output pointer variables.
        //
        protected virtual unsafe string InternalConvertEx
        (
            EncodingForm eInEncodingForm,
            string sInput,
            int ciInput,
            EncodingForm eOutEncodingForm,
            NormalizeFlags eNormalizeOutput,
            out int rciOutput,
            bool bForward
        )
        {
            if (sInput == null)
            {
                EncConverters.ThrowError(ErrStatus.IncompleteChar);
            }

            // if the user hasn't specified, then take the default case for the ConversionType:
            //  if L/RHS == eLegacy, then LegacyString
            //  if L/RHS == eUnicode, then UTF16
            CheckInitEncForms
            (
                bForward,
                ref eInEncodingForm,
                ref eOutEncodingForm
            );

            // allow the converter engine's (and/or its COM wrapper) to do some preprocessing.
            EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified;

            PreConvert
            (
                eInEncodingForm,                        // [in] form in the BSTR
                ref eFormEngineIn,                      // [out] form the conversion engine wants, etc.
                eOutEncodingForm,
                ref eFormEngineOut,
                ref eNormalizeOutput,
                bForward
            );

            // get enough space for us to normalize the input data (6x ought to be enough)
            int nBufSize = sInput.Length * 6;

            byte[] abyInBuffer = new byte[nBufSize];
            fixed(byte *lpInBuffer = abyInBuffer)
            {
                // use a helper class to normalize the data to the format needed by the engine
                ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm,
                                         ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer,
                                         ref nBufSize, ref m_bDebugDisplayMode);

                // get some space for the converter to fill with, but since this is allocated
                //  on the stack, don't muck around; get 10000 bytes for it.
                int nOutLen = Math.Max(10000, nBufSize * 6);

                byte[] abyOutBuffer = new byte[nOutLen];
                fixed(byte *lpOutBuffer = abyOutBuffer)
                {
                    lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0;

                    // call the wrapper sub-classes' DoConvert to let them do it.
                    DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen);

                    return(ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm,
                                                     ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput,
                                                     out rciOutput, ref m_bDebugDisplayMode));
                }
            }
        }
Exemple #28
0
        internal static unsafe string GetString(byte *lpOutBuffer, int nOutLen, EncodingForm eOutEncodingForm, int nCodePageOut, EncodingForm eFormEngineOut, NormalizeFlags eNormalizeOutput, out int rciOutput, ref bool bDebugDisplayMode)
        {
            // null terminate the output and turn it into a (real) array of bytes
            Util.DebugWriteLine(className, "BEGIN");
            lpOutBuffer[nOutLen] = lpOutBuffer[nOutLen + 1] = lpOutBuffer[nOutLen + 2] = lpOutBuffer[nOutLen + 3] = 0;
            byte[] baOut = new byte[nOutLen];
            ByteStarToByteArr(lpOutBuffer, nOutLen, baOut);
            Util.DebugWriteLine(className, Util.getDisplayBytes("byte array", baOut));

            // check to see if the engine handled the given output form. If not, then see
            //  if it's a conversion we can easily do (otherwise we'll ask TEC to do the
            //  conversion for us (later) so that all engines can handle all possible
            //  output encoding forms.
            Util.DebugWriteLine(className,
                                "eOutEncodingForm " + eOutEncodingForm.ToString() + ", " +
                                "eFormEngineOut " + eFormEngineOut.ToString());
            if (eOutEncodingForm != eFormEngineOut)
            {
                if (EncConverter.IsLegacyFormat(eOutEncodingForm))
                {
                    if ((eFormEngineOut == EncodingForm.LegacyBytes) && (eOutEncodingForm == EncodingForm.LegacyString))
                    {
                        // in this case, just *pretend* the engine outputs LegacyString (the
                        //  LegacyString case below really means "convert LegacyBytes to
                        //  LegacyString)
                        eFormEngineOut = eOutEncodingForm;
                    }
                }
                else    // unicode forms
                {
                    // if the engine gives UTF8 and the client wants UTF16...
                    if ((eOutEncodingForm == EncodingForm.UTF16) && (eFormEngineOut == EncodingForm.UTF8Bytes))
                    {
                        // use the special form to convert it below
                        Util.DebugWriteLine(className, "using CCUnicode8");
                        eOutEncodingForm = eFormEngineOut = (EncodingForm)CCUnicode8;
                    }
                    // or vise versa
                    else if ((eFormEngineOut == EncodingForm.UTF16) &&
                             ((eOutEncodingForm == EncodingForm.UTF8Bytes) || (eOutEncodingForm == EncodingForm.UTF8String)))
                    {
                        // engine gave UTF16, but user wants a UTF8 flavor.
                        // Decoder d = Encoding.Unicode.GetChars(baOut);
                        // d.GetChars(
                        UTF8Encoding enc = new UTF8Encoding();
                        baOut          = enc.GetBytes(Encoding.Unicode.GetChars(baOut));
                        eFormEngineOut = eOutEncodingForm;
                        nOutLen        = baOut.Length;
                    }
                    // these conversions we can do ourself
                    else if ((eOutEncodingForm == EncodingForm.UTF8String) ||
                             (eOutEncodingForm == EncodingForm.UTF16))
                    {
#if _MSC_VER
                        // Doesn't this wipe out the distinction?
                        // On Linux we need to be able to convert the output from UTF32 to UTF16.
                        eFormEngineOut = eOutEncodingForm;
#endif
                    }
                }
            }

            int    nItems = 0, nCharsLen = 0;
            char[] caOut = null;
            switch (eFormEngineOut)
            {
            case EncodingForm.LegacyBytes:
            case EncodingForm.UTF8Bytes:
            {
                if (eFormEngineOut == EncodingForm.LegacyBytes)
                {
                    DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyBytes)...", ref bDebugDisplayMode);
                }
                else
                {
                    DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8Bytes)...", ref bDebugDisplayMode);
                }

                // stuff the returned 'bytes' into the BSTR as narrow characters rather than
                //  converting to wide
                nItems    = nOutLen;
                nCharsLen = (nOutLen + 1) / 2;
                caOut     = new char[nCharsLen];
                ByteArrToCharArr(baOut, caOut);
                break;
            }

            case EncodingForm.LegacyString:
            {
                DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyString)...", ref bDebugDisplayMode);

                nCharsLen = nItems = nOutLen;

#if __MonoCS__
                // Narrowizing by code page 0 doesn't seem to be what we want on Linux.
                // Treating it as a symbol font or stripping off the low byte works better.
                if (nCodePageOut == 0)
                {
                    caOut = BruteForceWiden(nCodePageOut, baOut, nCharsLen);
                }
                else
#else
                if (true)
#endif
                {
                    try
                    {
                        // this will throw (for some reason) when doing symbol fonts
                        //  (apparently, CP_SYMBOL is no longer supported).
                        caOut = Encoding.GetEncoding(nCodePageOut).GetChars(baOut);
                    }
                    catch
                    {
                        if ((nCodePageOut == EncConverters.cnSymbolFontCodePage) || (nCodePageOut == EncConverters.cnIso8859_1CodePage))
                        {
                            caOut = BruteForceWiden(nCodePageOut, baOut, nCharsLen);
                        }
                        else
                        {
                            throw;
                        }
                    }
                }

                break;
            }

            case EncodingForm.UTF16:
            {
                nCharsLen = nItems = (nOutLen / 2);

                DisplayDebugUCharValues(baOut, "Received (UTF16) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode);

                caOut = Encoding.Unicode.GetChars(baOut);
                break;
            }

            case EncodingForm.UTF8String:
            {
                DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8String)...", ref bDebugDisplayMode);

                // this encoding form is always encoded using the default code page.
                caOut = Encoding.Default.GetChars(baOut);

                nCharsLen = nItems = nOutLen;
                break;
            }

            case (EncodingForm)CCUnicode8:
            {
                DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode);

                caOut = Encoding.UTF8.GetChars(baOut);

                nCharsLen = nItems = caOut.Length;
                break;
            }

            case EncodingForm.UTF16BE:
            case EncodingForm.UTF32:
            case EncodingForm.UTF32BE:
            {
                nCharsLen = nItems = nOutLen / 2;

                DisplayDebugUCharValues(baOut, "Received (UTF16BE/32/32BE) back from Converter/DLL...", ref bDebugDisplayMode);

                caOut = new char[nCharsLen];
                ByteArrToCharArr(baOut, caOut);

                // for UTF32, it is half again as little in the item count.
                if (eFormEngineOut != EncodingForm.UTF16BE)
                {
                    nItems /= 2;
                }
                break;
            }

            default:
                EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported);
                break;
            }

#if !v22_AllowEmptyReturn
            if ((nCharsLen <= 0)
#if DEBUG
                || (nCharsLen != caOut.Length)
#endif
                )
            {
                EncConverters.ThrowError(ErrStatus.NoReturnDataBadOutForm);
            }
#endif

            // check to see if the engine handled the given output form. If not, then ask
            //  TEC to do the conversion for us so that all engines can handle all possible
            //  output encoding forms (e.g. caller requested utf32, but above CC could only
            //  give us utf16/8)
            // Also, if the caller wanted something other than "None" for the eNormalizeOutput,
            //  then we also have to call TEC for that as well (but I think this only makes
            //  sense if the output is utf16(be) or utf32(be))
            // p.s. if this had been a TEC converter, then the eNormalizeOutput flag would
            //  ahready have been reset to None (by this point), since we would have directly
            //  requested that normalized form when we created the converter--see
            //  TecEncConverter.PreConvert)
            string strOutput = new string(caOut);
#if DEBUG
            byte[] byteArray = Encoding.BigEndianUnicode.GetBytes(caOut);
            Util.DebugWriteLine(className, Util.getDisplayBytes("characters", byteArray));
            byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(strOutput);
            Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized strOutput in UTF16BE", baResult));
#endif
            if ((eFormEngineOut != eOutEncodingForm) ||
                (eNormalizeOutput != NormalizeFlags.None))
            {
                strOutput = EncConverters.UnicodeEncodingFormConvertEx(strOutput, eFormEngineOut, nItems, eOutEncodingForm, eNormalizeOutput, out nItems);
            }

            DisplayDebugUCharValues(strOutput, "Returning back to client...", ref bDebugDisplayMode);

            rciOutput = nItems;
            return(strOutput);
        }
Exemple #29
0
		public static bool IsLegacyFormat(EncodingForm eForm)
		{
			return ((eForm == EncodingForm.LegacyString)
				||  (eForm == EncodingForm.LegacyBytes));
		}
		protected override void PreConvert
			(
			EncodingForm eInEncodingForm,
			ref EncodingForm eInFormEngine,
			EncodingForm eOutEncodingForm,
			ref EncodingForm eOutFormEngine,
			ref NormalizeFlags eNormalizeOutput,
			bool bForward
			)
		{
			// let the base class do it's thing first
			base.PreConvert(eInEncodingForm, ref eInFormEngine,
				eOutEncodingForm, ref eOutFormEngine,
				ref eNormalizeOutput, bForward);

			eInFormEngine = EncodingForm.LegacyBytes;
			eOutFormEngine = EncodingForm.LegacyBytes;

			// do the load at this point.
			m_bForward = bForward;  // keep track so we can see during DoConvert
			if (m_bForward)
				LoadForward();
			else
				LoadReverse();
		}
Exemple #31
0
        /// legacy data as a byte array as input, we need to treat it as a byte array.
        /// </summary>
        protected virtual unsafe string InternalConvertEx(EncodingForm eInEncodingForm,
                                                          byte[] baInput,
                                                          EncodingForm eOutEncodingForm,
                                                          NormalizeFlags eNormalizeOutput,
                                                          out int rciOutput,
                                                          bool bForward)
        {
            Util.DebugWriteLine(className, "(input bytes) BEGIN");
            Util.DebugWriteLine(className,
                                "eEncFormIn " + eInEncodingForm.ToString() + ", " +
                                "eEncFormOut " + eOutEncodingForm.ToString());
            if (baInput == null)
            {
                EncConverters.ThrowError(ErrStatus.IncompleteChar);
            }
            if (baInput.Length == 0)
            {
                rciOutput = 0;
                return("");
            }
            // if the user hasn't specified, then take the default case for the ConversionType:
            //  if L/RHS == eLegacy, then LegacyString
            //  if L/RHS == eUnicode, then UTF16
            CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm);

            // allow the converter engine's (and/or its COM wrapper) to do some preprocessing.
            EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified;

            PreConvert(
                eInEncodingForm,        // [in] form in the BSTR
                ref eFormEngineIn,      // [out] form the conversion engine wants, etc.
                eOutEncodingForm,
                ref eFormEngineOut,
                ref eNormalizeOutput,
                bForward);
            int nBufSize = baInput.Length;

            fixed(byte *lpInBuffer = baInput)
            {
                int nOutLen = Math.Max(10000, nBufSize * 6);

                byte[] abyOutBuffer = new byte[nOutLen];
                fixed(byte *lpOutBuffer = abyOutBuffer)
                {
                    lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0;

                    // call the wrapper sub-classes' DoConvert to let them do it.
                    Util.DebugWriteLine(className, "Calling DoConvert");
                    DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen);
                    Util.DebugWriteLine(className, "Output length " + nOutLen.ToString());

                    byte[] baOut = new byte[nOutLen];
                    ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut);
#if DEBUG
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut));
                    Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut) + "'");
#endif
                    string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm,
                                                              ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput,
                                                              out rciOutput, ref m_bDebugDisplayMode);
#if DEBUG
                    Util.DebugWriteLine(className, "normalized result '" + result + "'");
                    byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult));
                    baResult = System.Text.Encoding.Unicode.GetBytes(result);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult));
                    baResult = System.Text.Encoding.UTF8.GetBytes(result);
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult));
                    Util.DebugWriteLine(className, "Returning.");
#endif
                    return(result);
                }
            }
        }
Exemple #32
0
        /// <summary>
        /// If we're returning legacy data as a byte array, we need to return it as a byte array.
        /// </summary>
        /// <returns>
        protected virtual unsafe byte[] InternalConvertEx(EncodingForm eInEncodingForm,
                                                          string sInput,
                                                          EncodingForm eOutEncodingForm,
                                                          NormalizeFlags eNormalizeOutput,
                                                          out int rciOutput,
                                                          bool bForward)
        {
            Util.DebugWriteLine(className, "(output bytes) BEGIN");
            Util.DebugWriteLine(className,
                                "eEncFormIn " + eInEncodingForm.ToString() + ", " +
                                "eEncFormOut " + eOutEncodingForm.ToString());
            if (sInput == null)
            {
                EncConverters.ThrowError(ErrStatus.IncompleteChar);
            }
            Util.DebugWriteLine(className, "sInput.Length() is " + sInput.Length.ToString() + ".");
            if (sInput.Length == 0)
            {
                // this section added 11/10/2011 by Jim K
                rciOutput = 0;
                return(new byte[0]);
            }
            Util.DebugWriteLine(className, "sInput is " + sInput + ".");
            // if the user hasn't specified, then take the default case for the ConversionType:
            //  if L/RHS == eLegacy, then LegacyString
            //  if L/RHS == eUnicode, then UTF16
            CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm);

            // allow the converter engine's (and/or its COM wrapper) to do some preprocessing.
            EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified;

            PreConvert(
                eInEncodingForm,    // [in] form in the BSTR
                ref eFormEngineIn,  // [out] form the conversion engine wants, etc.
                eOutEncodingForm,
                ref eFormEngineOut,
                ref eNormalizeOutput,
                bForward);
            // get enough space for us to normalize the input data (6x ought to be enough)
            int nBufSize = sInput.Length * 6;

            byte[] abyInBuffer = new byte[nBufSize];
            fixed(byte *lpInBuffer = abyInBuffer)
            {
                // use a helper class to normalize the data to the format needed by the engine
                Util.DebugWriteLine(className, "Calling GetBytes");
                ECNormalizeData.GetBytes(sInput, sInput.Length, eInEncodingForm,
                                         ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer,
                                         ref nBufSize, ref m_bDebugDisplayMode);

                // get some space for the converter to fill with, but since this is allocated
                //  on the stack, don't muck around; get 10000 bytes for it.
                int nOutLen = Math.Max(10000, nBufSize * 6);

                byte[] abyOutBuffer = new byte[nOutLen];
                fixed(byte *lpOutBuffer = abyOutBuffer)
                {
                    lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0;

                    // call the wrapper sub-classes' DoConvert to let them do it.
                    Util.DebugWriteLine(className, "Calling DoConvert");
                    DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen);
                    byte[] baOut = new byte[nOutLen];
                    ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut);
#if DEBUG
                    Util.DebugWriteLine(className, "Output length " + nOutLen.ToString());
                    Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut));
                    Util.DebugWriteLine(className, "Returning.");
#endif
                    rciOutput = nOutLen;
                    return(baOut);
                }
            }
        }
Exemple #33
0
		protected void CheckInitEncForms
			(
			bool                bForward,
			ref EncodingForm    eInEncodingForm,
			ref EncodingForm    eOutEncodingForm
			)
		{
			// if the user hasn't specified, then take the default case for the ConversionType:
			//  if L/RHS == eLegacy, then LegacyString
			//  if L/RHS == eUnicode, then UTF16
			if( eInEncodingForm == EncodingForm.Unspecified )
			{
				NormConversionType eType;
				if( bForward )
					eType = NormalizeLhsConversionType(m_eConversionType);
				else
					eType = NormalizeRhsConversionType(m_eConversionType);

				if( eType == NormConversionType.eLegacy )
					eInEncodingForm = EncodingForm.LegacyString;
				else // eUnicode
					eInEncodingForm = DefaultUnicodeEncForm(bForward,true);
			}

			// do the same for the output form
			if( eOutEncodingForm == EncodingForm.Unspecified )
			{
				NormConversionType eType;
				if( bForward )
					eType = NormalizeRhsConversionType(m_eConversionType);
				else
					eType = NormalizeLhsConversionType(m_eConversionType);

				if( eType == NormConversionType.eLegacy )
					eOutEncodingForm = EncodingForm.LegacyString;
				else // eUnicode
					eOutEncodingForm = DefaultUnicodeEncForm(bForward,false);
			}

			CheckForBadForm(bForward, eInEncodingForm, eOutEncodingForm);
		}
Exemple #34
0
 protected void CheckForBadForm
 (
     bool bForward,
     EncodingForm inEnc,
     EncodingForm outEnc
 )
 {
     if (EncConverters.IsUnidirectional(m_eConversionType) && !bForward)
     {
         EncConverters.ThrowError(ErrStatus.InvalidConversionType);
     }
     else
     {
         bool bLhsUnicode = (NormalizeLhsConversionType(m_eConversionType) == NormConversionType.eUnicode);
         bool bRhsUnicode = (NormalizeRhsConversionType(m_eConversionType) == NormConversionType.eUnicode);
         if (bForward)
         {
             if (bLhsUnicode)
             {
                 if (IsLegacyFormat(inEnc))
                 {
                     EncConverters.ThrowError(ErrStatus.InEncFormNotSupported);
                 }
             }
             else    // !bLhsUnicode
             {
                 if (!IsLegacyFormat(inEnc))
                 {
                     EncConverters.ThrowError(ErrStatus.InEncFormNotSupported);
                 }
             }
             if (bRhsUnicode)
             {
                 if (IsLegacyFormat(outEnc))
                 {
                     EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported);
                 }
             }
             else    // !bRhsUnicode
             {
                 if (!IsLegacyFormat(outEnc))
                 {
                     EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported);
                 }
             }
         }
         else    // reverse
         {
             if (bLhsUnicode)
             {
                 if (IsLegacyFormat(outEnc))
                 {
                     EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported);
                 }
             }
             else    // !bLhsUnicode
             {
                 if (!IsLegacyFormat(outEnc))
                 {
                     EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported);
                 }
             }
             if (bRhsUnicode)
             {
                 if (IsLegacyFormat(inEnc))
                 {
                     EncConverters.ThrowError(ErrStatus.InEncFormNotSupported);
                 }
             }
             else    // !bRhsUnicode
             {
                 if (!IsLegacyFormat(inEnc))
                 {
                     EncConverters.ThrowError(ErrStatus.InEncFormNotSupported);
                 }
             }
         }
     }
 }
Exemple #35
0
        // we override this method from EncConverter so that we can call all of the step's
        //  convert functions in turn (i.e. for this one, it isn't sufficient to just
        //  provide a "DoConvert" method)
        protected override string InternalConvertEx
        (
            EncodingForm eInEncodingForm,
            string sInput,
            int ciInput,
            EncodingForm eOutEncodingForm,
            NormalizeFlags eNormalizeOutput,
            out int rciOutput,
            bool bForward
        )
        {
            // setup common items for both directions.
            int                nSize     = m_aEncConverter.Count;
            string             strOutput = null;
            EncodingForm       inForm    = eInEncodingForm;
            Int32              ciOutput  = 0;
            IEncConverter      rConverter;
            NormalizeFlags     eNormalizeFlags;
            ConvType           eConversionType;
            EncodingForm       outForm;
            NormConversionType eType;
            bool               bDirectionForward;
            int                i;

            try
            {
                if (bForward)
                {
                    for (i = 0; i < nSize; i++)
                    {
                        rConverter = (IEncConverter)m_aEncConverter[i];
                        if (rConverter == null)
                        {
                            EncConverters.ThrowError(ErrStatus.MissingConverter);
                        }

                        rConverter.Debug = Debug;
                        eNormalizeFlags  = (NormalizeFlags)m_aNormalizeOutput[i];
                        if (i == (nSize - 1))
                        {
                            eNormalizeFlags = eNormalizeOutput;
                        }

                        eConversionType   = rConverter.ConversionType;
                        bDirectionForward = (bool)m_aDirectionForward[i];

                        // if this is the last one, then use the user's requested output format
                        if (i == (nSize - 1))
                        {
                            outForm = eOutEncodingForm;
                        }
                        else
                        {
                            if (bDirectionForward)
                            {
                                eType = NormalizeRhsConversionType(eConversionType);
                            }
                            else
                            {
                                eType = NormalizeLhsConversionType(eConversionType);
                            }

                            if (eType == NormConversionType.eLegacy)
                            {
                                outForm = EncodingForm.LegacyBytes;
                            }
                            else
                            {
                                outForm = EncodingForm.Unspecified;
                            }
                        }

                        strOutput = rConverter.ConvertEx(
                            sInput,
                            inForm,
                            ciInput,
                            outForm,
                            out ciOutput,
                            eNormalizeFlags,
                            bDirectionForward);

                        // setup input for the next step
                        sInput  = strOutput;
                        inForm  = outForm;
                        ciInput = ciOutput;

                        // it's possible the user cancelled the debug mode so get it back
                        Debug = rConverter.Debug;
                    }
                }
                else                    // reverse
                {
                    for (i = nSize - 1; i >= 0; i--)
                    {
                        rConverter = (IEncConverter)m_aEncConverter[i];
                        if (rConverter == null)
                        {
                            EncConverters.ThrowError(ErrStatus.MissingConverter);
                        }

                        rConverter.Debug = Debug;

                        eNormalizeFlags = (NormalizeFlags)m_aNormalizeOutput[i];
                        if (i == 0)
                        {
                            eNormalizeFlags = eNormalizeOutput;
                        }

                        eConversionType = rConverter.ConversionType;

                        // the direction is the opposite of what the user said in
                        //  reverse mode.
                        bDirectionForward = !(bool)m_aDirectionForward[i];

                        // if this is the last one, then use the user's requested output format
                        if (i == 0)
                        {
                            outForm = eOutEncodingForm;
                        }
                        else
                        {
                            if (bDirectionForward)
                            {
                                eType = NormalizeRhsConversionType(eConversionType);
                            }
                            else
                            {
                                eType = NormalizeRhsConversionType(eConversionType);
                            }

                            if (eType == NormConversionType.eLegacy)
                            {
                                outForm = EncodingForm.LegacyBytes;
                            }
                            else
                            {
                                outForm = EncodingForm.Unspecified;
                            }
                        }

                        strOutput = rConverter.ConvertEx(
                            sInput,
                            inForm,
                            ciInput,
                            outForm,
                            out ciOutput,
                            eNormalizeFlags,
                            bDirectionForward);

                        // setup input for the next step
                        sInput  = strOutput;
                        inForm  = outForm;
                        ciInput = ciOutput;

                        // it's possible the user cancelled the debug mode so get it back
                        Debug = rConverter.Debug;
                    }
                }
            }
            catch (ApplicationException e)
            {
                throw e;
            }

            rciOutput = ciOutput;
            return(strOutput);
        }
Exemple #36
0
		// [DispId(18)]
		public virtual string ConvertEx(string sInput, EncodingForm inEnc, int ciInput, EncodingForm outEnc, out int ciOutput, NormalizeFlags eNormalizeOutput, bool bForward)
		{
			return InternalConvertEx(inEnc, sInput, ciInput, outEnc, eNormalizeOutput, out ciOutput, bForward);
		}
Exemple #37
0
        protected unsafe override void PreConvert
        (
            EncodingForm eInEncodingForm,
            ref EncodingForm eInFormEngine,
            EncodingForm eOutEncodingForm,
            ref EncodingForm eOutFormEngine,
            ref NormalizeFlags eNormalizeOutput,
            bool bForward
        )
        {
            // let the base class do it's thing first
            base.PreConvert(eInEncodingForm, ref eInFormEngine,
                            eOutEncodingForm, ref eOutFormEngine,
                            ref eNormalizeOutput, bForward);

            // If the user uses one of the *Byte forms, change that to the *String forms so
            //	the value matches what the TECkit engine is expecting (that is, the TECkit
            //	engine is expecting a value of '1' (=LegacyString) even if it comes in as
            //	LegacyBytes). It'll still get converted correctly later, but when create the
            //	the TECkit "converter" object, which happens during here, it is expecting
            //	to see the other value.
            if (eInEncodingForm == EncodingForm.LegacyBytes)
            {
                eInEncodingForm = EncodingForm.LegacyString;
            }
            else if (eInEncodingForm == EncodingForm.UTF8Bytes)
            {
                eInEncodingForm = EncodingForm.UTF8String;
            }

            if (eOutEncodingForm == EncodingForm.LegacyBytes)
            {
                eOutEncodingForm = EncodingForm.LegacyString;
            }
            else if (eOutEncodingForm == EncodingForm.UTF8Bytes)
            {
                eOutEncodingForm = EncodingForm.UTF8String;
            }

            // See if we have a converter already for this combination or whether we need to make a
            //  new one
            string strConverterKey = eInEncodingForm.ToString()
                                     + eOutEncodingForm.ToString()
                                     + eNormalizeOutput.ToString()
                                     + bForward.ToString();

            // If this is a compilable map (i.e. ImplType SIL.map), then see if the map file has changed
            bool bReload = false;

            if (m_bCompileable && !String.IsNullOrEmpty(m_strMapFileSpec))
            {
                // first make sure it's there and get the last time it was modified
                DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it.
                if (!DoesFileExist(m_strMapFileSpec, ref timeModified))
                {
                    EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strMapFileSpec);
                }

                // if it has been modified or it's not already loaded...
                if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey))
                {
                    // ... just remove this key if it existed (so we fall thru and do Load)
                    ResetConverter((IntPtr)m_mapConverters[strConverterKey]);
                    m_mapConverters.Remove(strConverterKey);
                    bReload = true;
                }
            }
            else if (IsFileLoaded())
            {
                // the tec file could also have changed out from underneath us (in which case we'd need to reload it).
                DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it.
                if (!DoesFileExist(m_strTecFileSpec, ref timeModified))
                {
                    EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strTecFileSpec);
                }

                // if it has been modified or it's not already loaded...
                if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey))
                {
                    m_baMapping = null; // triggers a reload
                    m_lhsFlags  = m_rhsFlags = 0;

                    // ... just remove this key if it existed (so we fall thru and do Load)
                    ResetConverter((IntPtr)m_mapConverters[strConverterKey]);
                    m_mapConverters.Remove(strConverterKey);
                    bReload = true;
                }
            }

            if (m_mapConverters.ContainsKey(strConverterKey))
            {
                m_converter = (IntPtr)m_mapConverters[strConverterKey];
            }
            else
            {
                int status = (int)ErrStatus.NoError;

                // load the map now
                Load(bReload);

                // is there no better way to do this?
                ushort eFormOut1 = System.Convert.ToUInt16((int)eOutEncodingForm);
                ushort eFormOut2 = System.Convert.ToUInt16((int)eNormalizeOutput);
                UInt16 eFormOut  = System.Convert.ToUInt16(eFormOut1 | eFormOut2);

                // make a converter for this new combination.
                Util.DebugWriteLine(this, "Creating TECkit converter: in " +
                                    eInEncodingForm.ToString() + ", out " +
                                    eOutEncodingForm.ToString());
                if (IsFileLoaded())
                {
                    fixed(byte *pbyMapping = m_baMapping)
                    {
                        status = TECkit_CreateConverter(
                            pbyMapping,
                            m_nMapSize,
                            (byte)((bForward) ? 1 : 0),
                            System.Convert.ToUInt16((int)eInEncodingForm),
                            eFormOut,
                            out m_converter
                            );
                    }
                }
                else
                {
                    status = TECkit_CreateConverter(
                        (byte *)0,
                        m_nMapSize,
                        (byte)((bForward) ? 1 : 0),
                        System.Convert.ToUInt16((int)eInEncodingForm),
                        eFormOut,
                        out m_converter
                        );
                }

                if (status == (int)ErrStatus.NoError)
                {
                    m_mapConverters[strConverterKey] = m_converter;
                }
                else
                {
                    EncConverters.ThrowError(status);
                }
            }

            // since TEC can handle output normalization directly (by requesting it here
            //  in the creation of the converter), reset the requesting flag so we won't
            //  attempt to do it later (all other converters that can't do implicit output
            //  normalization will *not* have reset the flag and then after their conversion,
            //  if the flag is still set, we'll call TEC to do it for them see
            //  ECNormalizeData.GetString).
            eNormalizeOutput = NormalizeFlags.None;
        }
Exemple #38
0
		internal static unsafe string GetString(byte* lpOutBuffer, int nOutLen, EncodingForm eOutEncodingForm, int nCodePageOut, EncodingForm eFormEngineOut, NormalizeFlags eNormalizeOutput, out int rciOutput, ref bool bDebugDisplayMode)
		{
			// null terminate the output and turn it into a (real) array of bytes
			lpOutBuffer[nOutLen] = lpOutBuffer[nOutLen + 1] = lpOutBuffer[nOutLen + 2] = lpOutBuffer[nOutLen + 3] = 0;
			byte[] baOut = new byte[nOutLen];
			ByteStarToByteArr(lpOutBuffer, nOutLen, baOut);

			// check to see if the engine handled the given output form. If not, then see
			//	if it's a conversion we can easily do (otherwise we'll ask TEC to do the
			//	conversion for us (later) so that all engines can handle all possible
			//	output encoding forms.
			if (eOutEncodingForm != eFormEngineOut)
			{
				if (EncConverter.IsLegacyFormat(eOutEncodingForm))
				{
					if ((eFormEngineOut == EncodingForm.LegacyBytes) && (eOutEncodingForm == EncodingForm.LegacyString))
					{
						// in this case, just *pretend* the engine outputs LegacyString (the
						//  LegacyString case below really means "convert LegacyBytes to
						//  LegacyString)
						eFormEngineOut = eOutEncodingForm;
					}
				}
				else    // unicode forms
				{
					// if the engine gives UTF8 and the client wants UTF16...
					if ((eOutEncodingForm == EncodingForm.UTF16) && (eFormEngineOut == EncodingForm.UTF8Bytes))
					{
						// use the special form to convert it below
						eOutEncodingForm = eFormEngineOut = (EncodingForm)CCUnicode8;
					}
					// or vise versa
					else if ((eFormEngineOut == EncodingForm.UTF16)
						&& ((eOutEncodingForm == EncodingForm.UTF8Bytes) || (eOutEncodingForm == EncodingForm.UTF8String)))
					{
						// engine gave UTF16, but user wants a UTF8 flavor.
						// Decoder d = Encoding.Unicode.GetChars(baOut);
						// d.GetChars(
						UTF8Encoding enc = new UTF8Encoding();
						baOut = enc.GetBytes(Encoding.Unicode.GetChars(baOut));
						eFormEngineOut = eOutEncodingForm;
						nOutLen = baOut.Length;
					}
					// these conversions we can do ourself
					else if ((eOutEncodingForm == EncodingForm.UTF8String)
						|| (eOutEncodingForm == EncodingForm.UTF16))
					{
						eFormEngineOut = eOutEncodingForm;
					}
				}
			}

			int nItems = 0, nCharsLen = 0;
			char[] caOut = null;
			switch (eFormEngineOut)
			{
				case EncodingForm.LegacyBytes:
				case EncodingForm.UTF8Bytes:
					{
						if (eFormEngineOut == EncodingForm.LegacyBytes)
							DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyBytes)...", ref bDebugDisplayMode);
						else
							DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8Bytes)...", ref bDebugDisplayMode);

						// stuff the returned 'bytes' into the BSTR as narrow characters rather than
						//	converting to wide
						nItems = nOutLen;
						nCharsLen = (nOutLen + 1) / 2;
						caOut = new char[nCharsLen];
						ByteArrToCharArr(baOut, caOut);
						break;
					}
				case EncodingForm.LegacyString:
					{
						DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyString)...", ref bDebugDisplayMode);

						nCharsLen = nItems = nOutLen;

						try
						{
							// this will throw (for some reason) when doing symbol fonts
							//  (apparently, CP_SYMBOL is no longer supported).
							caOut = Encoding.GetEncoding(nCodePageOut).GetChars(baOut);
						}
						catch
						{
							if ((nCodePageOut == EncConverters.cnSymbolFontCodePage) || (nCodePageOut == EncConverters.cnIso8859_1CodePage))
							{
								char chMask = (char)0;
								if (nCodePageOut == EncConverters.cnSymbolFontCodePage)
									chMask = (char)0xF000;

								// do it the 'hard way'
								caOut = new char[nCharsLen];
								for (int i = 0; i < nCharsLen; i++)
									caOut[i] = (char)(baOut[i] | chMask);
							}
							else
								throw;
						}

						break;
					}
				case EncodingForm.UTF16:
					{
						nCharsLen = nItems = (nOutLen / 2);

						DisplayDebugUCharValues(baOut, "Received (UTF16) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode);

						caOut = Encoding.Unicode.GetChars(baOut);
						break;
					}
				case EncodingForm.UTF8String:
					{
						DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8String)...", ref bDebugDisplayMode);

						// this encoding form is always encoded using the default code page.
						caOut = Encoding.Default.GetChars(baOut);

						nCharsLen = nItems = nOutLen;
						break;
					}
				case (EncodingForm)CCUnicode8:
					{
						DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode);

						caOut = Encoding.UTF8.GetChars(baOut);

						nCharsLen = nItems = caOut.Length;
						break;
					}
				case EncodingForm.UTF16BE:
				case EncodingForm.UTF32:
				case EncodingForm.UTF32BE:
					{
						nCharsLen = nItems = nOutLen / 2;

						DisplayDebugUCharValues(baOut, "Received (UTF16BE/32/32BE) back from Converter/DLL...", ref bDebugDisplayMode);

						caOut = new char[nCharsLen];
						ByteArrToCharArr(baOut, caOut);

						// for UTF32, it is half again as little in the item count.
						if (eFormEngineOut != EncodingForm.UTF16BE)
							nItems /= 2;
						break;
					}
				default:
					EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported);
					break;
			}

#if !v22_AllowEmptyReturn
			if ((nCharsLen <= 0)
#if DEBUG
				|| (nCharsLen != caOut.Length)
#endif
)
			{
				EncConverters.ThrowError(ErrStatus.NoReturnDataBadOutForm);
			}
#endif

			// check to see if the engine handled the given output form. If not, then ask
			//	TEC to do the conversion for us so that all engines can handle all possible
			//	output encoding forms (e.g. caller requested utf32, but above CC could only
			//  give us utf16/8)
			// Also, if the caller wanted something other than "None" for the eNormalizeOutput,
			//  then we also have to call TEC for that as well (but I think this only makes
			//  sense if the output is utf16(be) or utf32(be))
			// p.s. if this had been a TEC converter, then the eNormalizeOutput flag would
			//  ahready have been reset to None (by this point), since we would have directly
			//  requested that normalized form when we created the converter--see
			//  TecEncConverter.PreConvert)
			string strOutput = new string(caOut);
			if ((eFormEngineOut != eOutEncodingForm)
				|| (eNormalizeOutput != NormalizeFlags.None))
			{
				strOutput = EncConverters.UnicodeEncodingFormConvertEx(strOutput, eFormEngineOut, nItems, eOutEncodingForm, eNormalizeOutput, out nItems);
			}

			DisplayDebugUCharValues(strOutput, "Returning back to client...", ref bDebugDisplayMode);

			rciOutput = nItems;
			return strOutput;
		}
Exemple #39
0
		protected override string InternalConvert
			(
			EncodingForm    eInEncodingForm,
			string			sInput,
			EncodingForm    eOutEncodingForm,
			NormalizeFlags  eNormalizeOutput,
			bool            bForward
			)
		{
			// this routine is only called by one of the 'implicit' methods (e.g.
			//  ConvertToUnicode). For these "COM" standard methods, the length of the
			//  string is specified by the BSTR itself and always/only supports UTF-16-like
			//  (i.e. wide) data. So, pass 0 so that the function will determine the length
			//  from the BSTR itself (just in case the user happens to have a value of 0 in
			//  the data (i.e. it won't necessarily be null terminated... don't ask...
			Int32 iOutput = 0;
			return InternalConvertEx
				(
				eInEncodingForm,
				sInput,
				0,
				eOutEncodingForm,
				eNormalizeOutput,
				out iOutput,
				bForward
				);
		}
Exemple #40
0
		// we override this method from EncConverter so that we can call all of the step's
		//  convert functions in turn (i.e. for this one, it isn't sufficient to just
		//  provide a "DoConvert" method)
		protected override string InternalConvertEx
			(
			EncodingForm    eInEncodingForm,
			string			sInput,
			int             ciInput,
			EncodingForm    eOutEncodingForm,
			NormalizeFlags  eNormalizeOutput,
			out int         rciOutput,
			bool            bForward
			)
		{
			// setup common items for both directions.
			int nSize = m_aEncConverter.Count;
			string strOutput = null;
			EncodingForm inForm = eInEncodingForm;
			Int32 ciOutput = 0;
			IEncConverter rConverter;
			NormalizeFlags eNormalizeFlags;
			ConvType eConversionType;
			EncodingForm outForm;
			NormConversionType eType;
			bool bDirectionForward;
			int i;

			try
			{
				if( bForward )
				{
					for(i = 0; i < nSize; i++ )
					{
						rConverter = (IEncConverter)m_aEncConverter[i];
						if (rConverter == null)
							EncConverters.ThrowError(ErrStatus.MissingConverter);

						rConverter.Debug = Debug;
						eNormalizeFlags = (NormalizeFlags)m_aNormalizeOutput[i];
						if( i == (nSize-1) )
							eNormalizeFlags = eNormalizeOutput;

						eConversionType = rConverter.ConversionType;
						bDirectionForward = (bool)m_aDirectionForward[i];

						// if this is the last one, then use the user's requested output format
						if( i == (nSize-1) )
						{
							outForm = eOutEncodingForm;
						}
						else
						{
							if( bDirectionForward )
								eType = NormalizeRhsConversionType(eConversionType);
							else
								eType = NormalizeLhsConversionType(eConversionType);

							if( eType == NormConversionType.eLegacy)
								outForm = EncodingForm.LegacyBytes;
							else
								outForm = EncodingForm.Unspecified;
						}

						strOutput = rConverter.ConvertEx(
							sInput,
							inForm,
							ciInput,
							outForm,
							out ciOutput,
							eNormalizeFlags,
							bDirectionForward);

						// setup input for the next step
						sInput = strOutput;
						inForm = outForm;
						ciInput = ciOutput;

						// it's possible the user cancelled the debug mode so get it back
						Debug = rConverter.Debug;
					}
				}
				else    // reverse
				{
					for(i = nSize-1; i >= 0; i-- )
					{
						rConverter = (IEncConverter)m_aEncConverter[i];
						if (rConverter == null)
							EncConverters.ThrowError(ErrStatus.MissingConverter);

						rConverter.Debug = Debug;

						eNormalizeFlags = (NormalizeFlags)m_aNormalizeOutput[i];
						if( i == 0 )
							eNormalizeFlags = eNormalizeOutput;

						eConversionType = rConverter.ConversionType;

						// the direction is the opposite of what the user said in
						//  reverse mode.
						bDirectionForward = !(bool)m_aDirectionForward[i];

						// if this is the last one, then use the user's requested output format
						if( i == 0 )
							outForm = eOutEncodingForm;
						else
						{
							if( bDirectionForward )
								eType = NormalizeRhsConversionType(eConversionType);
							else
								eType = NormalizeRhsConversionType(eConversionType);

							if( eType == NormConversionType.eLegacy )
								outForm = EncodingForm.LegacyBytes;
							else
								outForm = EncodingForm.Unspecified;
						}

						strOutput = rConverter.ConvertEx(
							sInput,
							inForm,
							ciInput,
							outForm,
							out ciOutput,
							eNormalizeFlags,
							bDirectionForward);

						// setup input for the next step
						sInput = strOutput;
						inForm = outForm;
						ciInput = ciOutput;

						// it's possible the user cancelled the debug mode so get it back
						Debug = rConverter.Debug;
					}
				}
			}
			catch(ApplicationException e)
			{
				throw e;
			}

			rciOutput = ciOutput;
			return strOutput;
		}
Exemple #41
0
		protected unsafe override void PreConvert
			(
			EncodingForm        eInEncodingForm,
			ref EncodingForm    eInFormEngine,
			EncodingForm        eOutEncodingForm,
			ref EncodingForm    eOutFormEngine,
			ref NormalizeFlags  eNormalizeOutput,
			bool                bForward
			)
		{
			// let the base class do it's thing first
			base.PreConvert( eInEncodingForm, ref eInFormEngine,
							eOutEncodingForm, ref eOutFormEngine,
							ref eNormalizeOutput, bForward);

			// If the user uses one of the *Byte forms, change that to the *String forms so
			//	the value matches what the TECkit engine is expecting (that is, the TECkit
			//	engine is expecting a value of '1' (=LegacyString) even if it comes in as
			//	LegacyBytes). It'll still get converted correctly later, but when create the
			//	the TECkit "converter" object, which happens during here, it is expecting
			//	to see the other value.
			if( eInEncodingForm == EncodingForm.LegacyBytes )
				eInEncodingForm = EncodingForm.LegacyString;
			else if( eInEncodingForm == EncodingForm.UTF8Bytes )
				eInEncodingForm = EncodingForm.UTF8String;

			if( eOutEncodingForm == EncodingForm.LegacyBytes )
				eOutEncodingForm = EncodingForm.LegacyString;
			else if( eOutEncodingForm == EncodingForm.UTF8Bytes )
				eOutEncodingForm = EncodingForm.UTF8String;

			// See if we have a converter already for this combination or whether we need to make a
			//  new one
			string strConverterKey =  eInEncodingForm.ToString()
				+ eOutEncodingForm.ToString()
				+ eNormalizeOutput.ToString()
				+ bForward.ToString();

			// If this is a compilable map (i.e. ImplType SIL.map), then see if the map file has changed
			bool bReload = false;
			if (m_bCompileable && !String.IsNullOrEmpty(m_strMapFileSpec))
			{
				// first make sure it's there and get the last time it was modified
				DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it.
				if (!DoesFileExist(m_strMapFileSpec, ref timeModified))
					EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strMapFileSpec);

				// if it has been modified or it's not already loaded...
				if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey))
				{
					// ... just remove this key if it existed (so we fall thru and do Load)
					ResetConverter((Int32)m_mapConverters[strConverterKey]);
					m_mapConverters.Remove(strConverterKey);
					bReload = true;
				}
			}
			else if (IsFileLoaded())
			{
				// the tec file could also have changed out from underneath us (in which case we'd need to reload it).
				DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it.
				if (!DoesFileExist(m_strTecFileSpec, ref timeModified))
					EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strTecFileSpec);

				// if it has been modified or it's not already loaded...
				if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey))
				{
					m_baMapping = null; // triggers a reload
					m_lhsFlags = m_rhsFlags = 0;

					// ... just remove this key if it existed (so we fall thru and do Load)
					ResetConverter((Int32)m_mapConverters[strConverterKey]);
					m_mapConverters.Remove(strConverterKey);
					bReload = true;
				}
			}

			if( m_mapConverters.ContainsKey(strConverterKey) )
			{
				m_converter = (Int32)m_mapConverters[strConverterKey];
			}
			else
			{
				int status = (int)ErrStatus.NoError;

				// load the map now
				Load(bReload);

				// is there no better way to do this?
				ushort eFormOut1 = System.Convert.ToUInt16((int)eOutEncodingForm);
				ushort eFormOut2 = System.Convert.ToUInt16((int)eNormalizeOutput);
				UInt16 eFormOut = System.Convert.ToUInt16(eFormOut1 | eFormOut2);

				// make a converter for this new combination.
				fixed(Int32* converter = &m_converter)
				{
					if( IsFileLoaded() )
					{
						fixed(byte* pbyMapping = m_baMapping)
						{
							status = TECkit_CreateConverter(
										pbyMapping,
										m_nMapSize,
										(byte)((bForward) ? 1 : 0),
										System.Convert.ToUInt16((int)eInEncodingForm),
										eFormOut,
										(void*)converter
										);
						}
					}
					else
					{
						status = TECkit_CreateConverter(
									(byte*)0,
									m_nMapSize,
									(byte)((bForward) ? 1 : 0),
									System.Convert.ToUInt16((int)eInEncodingForm),
									eFormOut,
									(void*)converter
									);
					}
				}

				if( status == (int)ErrStatus.NoError )
				{
					m_mapConverters[strConverterKey] = m_converter;
				}
				else
					EncConverters.ThrowError(status);
			}

			// since TEC can handle output normalization directly (by requesting it here
			//  in the creation of the converter), reset the requesting flag so we won't
			//  attempt to do it later (all other converters that can't do implicit output
			//  normalization will *not* have reset the flag and then after their conversion,
			//  if the flag is still set, we'll call TEC to do it for them see
			//  ECNormalizeData.GetString).
			eNormalizeOutput = NormalizeFlags.None;
		}
Exemple #42
0
        // this is the helper method that returns the input data normalized
        internal static unsafe byte *GetBytes(string strInput, int cnCountIn, EncodingForm eEncFormIn, int nCodePageIn, EncodingForm eFormEngineIn, byte *pBuf, ref int nBufSize, ref bool bDebugDisplayMode)
        {
            Util.DebugWriteLine(className, "BEGIN");
            Util.DebugWriteLine(className,
                                "eEncFormIn " + eEncFormIn.ToString() + ", " +
                                "eFormEngineIn " + eFormEngineIn.ToString());

            // if the form the user gave is not what the engine wants (and it isn't legacy
            //  since legacy forms are already handled later)...
            if ((eEncFormIn != eFormEngineIn) && !EncConverter.IsLegacyFormat(eEncFormIn))
            {
                // we can do some of the conversions ourself. For example, if the input form
                //  is UTF16 and the desired form is UTF8, then simply use CCUnicode8 below
                if ((eEncFormIn == EncodingForm.UTF16) && (eFormEngineIn == EncodingForm.UTF8Bytes))
                {
                    Util.DebugWriteLine(className, "using CCUnicode8");
                    eEncFormIn = (EncodingForm)CCUnicode8;
                }
                // we can also do the following one
                else if ((eEncFormIn == EncodingForm.UTF8String) && (eFormEngineIn == EncodingForm.UTF8Bytes))
                {
                    ; // i.e. don't have TECkit do this one...
                }
                else
                {
                    strInput   = EncConverters.UnicodeEncodingFormConvertEx(strInput, eEncFormIn, cnCountIn, eFormEngineIn, NormalizeFlags.None, out cnCountIn);
                    eEncFormIn = eFormEngineIn;
                }
            }

            int nInLen = 0;

            switch (eEncFormIn)
            {
            case EncodingForm.LegacyBytes:
            case EncodingForm.UTF8Bytes:
            {
                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;         // item count should be the number of bytes directly.
                }
                else
                {
                    // if the user didn't give the length (i.e. via ConvertEx), then get it
                    //  from the BSTR length. nInLen will be the # of bytes.
                    nInLen = strInput.Length * 2;
                }

                // these forms are for C++ apps that want to use the BSTR to transfer
                //  bytes rather than OLECHARs.
                nInLen = StringToByteStar(strInput, pBuf, nInLen, true);

                if (eEncFormIn == EncodingForm.LegacyBytes)
                {
                    DisplayDebugCharValues(pBuf, nInLen, "Received (LegacyBytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode);
                }
                else
                {
                    DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Received (UTF8Bytes) from client and sending to Converter/DLL...", ref bDebugDisplayMode);
                }
                break;
            }

            case EncodingForm.LegacyString:
            {
                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;           // item count should be the number of bytes directly (after conversion below).
                }
                else
                {
                    nInLen = strInput.Length;         // the # of bytes will *be* the # of chars in the string after we're done.
                }
                DisplayDebugUCharValues(strInput, "Received (LegacyString) from client...", ref bDebugDisplayMode);

                // use a code page converter to narrowize using the input string
                // (but the 'easier' Add method will send 0; if so, then
                //  fallback to the original method.
                byte[] ba = null;

                // first check if it's a symbol font (sometimes the user
                //  incorrectly sends a few spaces first, so check the
                //  first couple of bytes. If it is (and the code page is 0), then
                //  change the code page to be CP_SYMBOL
                if ((nCodePageIn == 0) &&
                    (((strInput[0] & 0xF000) == 0xF000) ||
                     ((strInput.Length > 1) && ((strInput[1] & 0xF000) == 0xF000)) ||
                     ((strInput.Length > 2) && ((strInput[2] & 0xF000) == 0xF000))
                    )
                    )
                {
                    nCodePageIn = EncConverters.cnSymbolFontCodePage;
                }

#if __MonoCS__
                // Narrowizing by code page 0 doesn't seem to be what we want on Linux.
                // Treating it as a symbol font or stripping off the low byte works better.
                if (nCodePageIn == 0)
                {
                    ba = BruteForceNarrowize(strInput, nInLen);
                }
                else
#else
                if (true)
#endif
                {
                    // if it's a symbol or iso-8859 encoding, then we can handle just
                    //  taking the low byte (i.e. the catch case)
                    if ((nCodePageIn == EncConverters.cnSymbolFontCodePage) ||
                        (nCodePageIn == EncConverters.cnIso8859_1CodePage)
                        )
                    {
                        try
                        {
                            Encoding enc = Encoding.GetEncoding(nCodePageIn);
                            ba = enc.GetBytes(strInput);
                            Util.DebugWriteLine(className, "Narrowized by given code page.");
                        }
                        catch
                        {
                            ba = BruteForceNarrowize(strInput, nInLen);
                        }
                    }
                    else
                    {
                        // otherwise, simply use CP_ACP (or the default code page) to
                        //  narrowize it.
                        Util.DebugWriteLine(className, "Narrowizing by given code page.");
                        Encoding enc = Encoding.GetEncoding(nCodePageIn);
                        ba = enc.GetBytes(strInput);
                    }
                }


                // turn that byte array into a byte array...
                ByteArrToByteStar(ba, pBuf);

                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;         // item count should be the number of bytes directly.
                }
                else
                {
                    // if the user didn't give the length (i.e. via ConvertEx), then get it
                    //  from the BSTR length. nInLen will be the # of bytes.
                    nInLen = ba.Length;
                }

                DisplayDebugCharValues(pBuf, nInLen, "Sending (LegacyBytes) to Converter/DLL...", ref bDebugDisplayMode);
                break;
            }

            // this following form *must* be widened UTF8 via the default code page
            case EncodingForm.UTF8String:
            {
                DisplayDebugUCharValues(strInput, "Received (UTF8String) from client...", ref bDebugDisplayMode);

                // use a code page converter to narrowize using the input string
                Encoding enc = Encoding.Default;
                byte[]   ba  = enc.GetBytes(strInput);

                // turn that byte array into a byte array...
                ByteArrToByteStar(ba, pBuf);

                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;         // item count should be the number of bytes directly.
                }
                else
                {
                    // if the user didn't give the length (i.e. via ConvertEx), then get it
                    //  from the BSTR length. nInLen will be the # of bytes.
                    nInLen = ba.Length;
                }

                DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode);
                break;
            }

            // this is a special case for CC where the input was actually UTF16, but the
            //  CC DLL is expecting (usually) UTF8, so convert from UTF16->UTF8 narrow
            case (EncodingForm)CCUnicode8:
            {
                DisplayDebugUCharValues(strInput, "Received (UTF16) from client...", ref bDebugDisplayMode);

                UTF8Encoding enc = new UTF8Encoding();
                byte[]       ba  = enc.GetBytes(strInput);

                // turn that byte array into a byte array...
                ByteArrToByteStar(ba, pBuf);

                // since we've changed the format, we don't care how many UTF16 words came in
                nInLen = ba.Length;

                DisplayDebugUCharValuesFromUTF8(pBuf, nInLen, "Sending (UTF8Bytes) to Converter/DLL...", ref bDebugDisplayMode);
                break;
            }

            case EncodingForm.UTF16:
            {
                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;           // item count should be the number of 16-bit words directly
                }
                else
                {
                    nInLen = strInput.Length;
                }

                DisplayDebugUCharValues(strInput, "Received (UTF16) from client and sending to Converter/DLL...", ref bDebugDisplayMode);

                // but this should be the count of bytes...
                nInLen *= 2;
                StringToByteStar(strInput, pBuf, nInLen, false);
                break;
            }

            case EncodingForm.UTF16BE:
            case EncodingForm.UTF32:
            case EncodingForm.UTF32BE:
            {
                if (cnCountIn != 0)
                {
                    nInLen = cnCountIn;         // item count is the number of Uni chars

                    // for UTF32, the converter's actually expecting the length to be twice
                    //  this much again.
                    if (eEncFormIn != EncodingForm.UTF16BE)
                    {
                        nInLen *= 2;
                    }
                }
                else
                {
                    nInLen = strInput.Length;
                }

                DisplayDebugUCharValues(pBuf, nInLen, "Received (UTF16BE/32/32BE) from client/Sending to Converter/DLL...", ref bDebugDisplayMode);

                // for the byte count, double it (possibly again)
                nInLen *= 2;
                StringToByteStar(strInput, pBuf, nInLen, false);
                break;
            }

            default:
                EncConverters.ThrowError(ErrStatus.InEncFormNotSupported);
                break;
            }

            pBuf[nInLen] = pBuf[nInLen + 1] = pBuf[nInLen + 2] = pBuf[nInLen + 3] = 0;
            nBufSize     = (int)nInLen;

            return(pBuf);
        }
Exemple #43
0
		protected override void PreConvert
			(
			EncodingForm        eInEncodingForm,
			ref EncodingForm    eInFormEngine,
			EncodingForm        eOutEncodingForm,
			ref EncodingForm    eOutFormEngine,
			ref NormalizeFlags  eNormalizeOutput,
			bool                bForward
			)
		{
			// let the base class do it's thing first
			base.PreConvert( eInEncodingForm, ref eInFormEngine,
				eOutEncodingForm, ref eOutFormEngine,
				ref eNormalizeOutput, bForward);

			// this converter only deals with 'String' flavors, so if it's
			//  Unicode_to(_from)_Unicode, then we expect UTF-16 and if it's
			//  Legacy_to(_from)_Legacy, then we expect LegacyString
			if( m_bLegacy )
				eInFormEngine = eOutFormEngine = EncodingForm.LegacyString;
			else
				eInFormEngine = eOutFormEngine = EncodingForm.UTF16;

			// the bForward that comes here might be different from the IEncConverter->DirectionForward
			//  (if it came in from a call to ConvertEx), so use *this* value to determine the direction
			//  for the forthcoming conversion (DoConvert).
			m_bReverseLookup = !bForward;

			// check to see if the file(s) need to be (re-)loaded at this point.
			Load();
		}