protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); eInFormEngine = EncodingForm.LegacyBytes; eOutFormEngine = EncodingForm.LegacyBytes; // do the load at this point. m_bForward = bForward; // keep track so we can see during DoConvert if (m_bForward) { LoadForward(); } else { LoadReverse(); } }
protected override string InternalConvert ( EncodingForm eInEncodingForm, string sInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, bool bForward ) { // this routine is only called by one of the 'implicit' methods (e.g. // ConvertToUnicode). For these "COM" standard methods, the length of the // string is specified by the BSTR itself and always/only supports UTF-16-like // (i.e. wide) data. So, pass 0 so that the function will determine the length // from the BSTR itself (just in case the user happens to have a value of 0 in // the data (i.e. it won't necessarily be null terminated... don't ask... Int32 iOutput = 0; return(InternalConvertEx ( eInEncodingForm, sInput, 0, eOutEncodingForm, eNormalizeOutput, out iOutput, bForward )); }
// Since each sub-class has to do basic input/output encoding format processing, they // should all mostly come thru this and the next functions. protected virtual string InternalConvert ( EncodingForm eInEncodingForm, string sInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, bool bForward ) { Util.DebugWriteLine(className, "BEGIN"); // this routine is only called by one of the 'implicit' methods (e.g. // ConvertToUnicode). For these "COM standard" methods, the length of the string // is specified by the BSTR itself and always/only supports UTF-16-like (i.e. wide) // data. So, pass 0 so that the function will determine the length from the BSTR // itself (just in case the user happens to have a value of 0 in the data (i.e. // it won't necessarily be null terminated... int ciOutput = 0; return(InternalConvertEx ( eInEncodingForm, sInput, 0, eOutEncodingForm, eNormalizeOutput, out ciOutput, bForward )); }
protected void InitFromIEncConverter(IEncConverter aEC, bool bInputOutputAlreadySet) { m_strEncConverterName = aEC.Name; m_bDirectionForward = aEC.DirectionForward; m_eNormalForm = aEC.NormalizeOutput; // in some cases (e.g. the SelectConverter dialog), the input/ouput code page values // are already set from the proper perspective and don't need to be reversed as in the other cases // (see CodePageInput below). So swap them if they need to be (so the later swap will put them // back to what they were at the start -- sorry, I know this is confusing) if (!m_bDirectionForward && bInputOutputAlreadySet) { var aEcInputCodePage = aEC.CodePageOutput; aEC.CodePageOutput = aEC.CodePageInput; aEC.CodePageInput = aEcInputCodePage; } // if the user intends for this to be a temporary converter, it won't be available to // subsequent calls of "GetEncConverter" unless we add it to this particular instance // of the local repository object. if (EncConverters[m_strEncConverterName] == null) { EncConverters.Add(m_strEncConverterName, aEC); } }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // this converter only deals with 'String' flavors, so if it's // Unicode_to(_from)_Unicode, then we expect UTF-16 and if it's // Legacy_to(_from)_Legacy, then we expect LegacyString if (m_bLegacy) { eInFormEngine = eOutFormEngine = EncodingForm.LegacyString; } else { eInFormEngine = eOutFormEngine = EncodingForm.UTF16; } // the bForward that comes here might be different from the IEncConverter->DirectionForward // (if it came in from a call to ConvertEx), so use *this* value to determine the direction // for the forthcoming conversion (DoConvert). m_bReverseLookup = !bForward; // check to see if the file(s) need to be (re-)loaded at this point. Load(); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do its thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode) { // We could use UTF-8 here, but wide data works just fine. // the windows version definitely needs UTF16, but for some reason _MSC_VER is not defined on windows (as I think Jim is expecting) // so I'll use the opposite logic of 'if not mono'... #if !__MonoCS__ // _MSC_VER Util.DebugWriteLine(this, "eInFormEngine UTF16"); eInFormEngine = EncodingForm.UTF16; #else Util.DebugWriteLine(this, "eInFormEngine UTF32"); eInFormEngine = EncodingForm.UTF32; #endif } else { // legacy Util.DebugWriteLine(this, "eInFormEngine LegacyBytes"); eInFormEngine = EncodingForm.LegacyBytes; } if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode) { #if !__MonoCS__ // _MSC_VER Util.DebugWriteLine(this, "eOutFormEngine UTF16"); eOutFormEngine = EncodingForm.UTF16; #else Util.DebugWriteLine(this, "eOutFormEngine UTF32"); eOutFormEngine = EncodingForm.UTF32; #endif } else { Util.DebugWriteLine(this, "eOutFormEngine LegacyBytes"); eOutFormEngine = EncodingForm.LegacyBytes; } // do the load at this point Load(); // then do the C++ encoding form settings CppPreConvert((int)eInFormEngine, (int)eOutFormEngine, (int)eNormalizeOutput, bForward); }
// we override this method from EncConverter so that we can call all of the step's // convert functions in turn (i.e. for this one, it isn't sufficient to just // provide a "DoConvert" method) // and we override this from CmpdEncConverter to we can add our bit of only calling // the 2nd step (i.e. the fallback converter) if the 1st step doesn't change the // string. protected override string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { if (CountConverters != 2) { EncConverters.ThrowError(ErrStatus.FallbackTwoStepsRequired); } IEncConverter rConverter = (IEncConverter)m_aEncConverter[0]; if (rConverter == null) { EncConverters.ThrowError(ErrStatus.MissingConverter); } rConverter.Debug = Debug; bool bDirectionForward = (bForward) ? (bool)m_aDirectionForward[0] : !(bool)m_aDirectionForward[0]; string strOutput = rConverter.ConvertEx( sInput, eInEncodingForm, ciInput, eOutEncodingForm, out rciOutput, eNormalizeOutput, bDirectionForward); // call the fallback if the string wasn't changed if (strOutput == sInput) { IEncConverter rFallbackConverter = (IEncConverter)m_aEncConverter[1]; if (rFallbackConverter == null) { EncConverters.ThrowError(ErrStatus.MissingConverter); } rFallbackConverter.Debug = Debug; bDirectionForward = (bForward) ? (bool)m_aDirectionForward[1] : !(bool)m_aDirectionForward[1]; strOutput = rFallbackConverter.ConvertEx( sInput, eInEncodingForm, ciInput, eOutEncodingForm, out rciOutput, eNormalizeOutput, bDirectionForward); } return(strOutput); }
protected override unsafe void PreConvert( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode) { #if __MonoCS__ // returning this value will cause the input Unicode data (of any form, // UTF16, BE, etc.) to be converted to UTF8 narrow bytes before calling // DoConvert. eInFormEngine = EncodingForm.UTF8Bytes; #else eInFormEngine = EncodingForm.UTF16; #endif } else { // legacy eInFormEngine = EncodingForm.LegacyBytes; } if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode) { #if __MonoCS__ eOutFormEngine = EncodingForm.UTF8Bytes; #else eOutFormEngine = EncodingForm.UTF16; #endif } else { eOutFormEngine = EncodingForm.LegacyBytes; } // do the load at this point. Load(ConverterIdentifier); // Finally, let the C++ code do its thing. int encInForm = (int)eInEncodingForm; int encInEngine = (int)eInFormEngine; int encOutForm = (int)eOutEncodingForm; int encOutEngine = (int)eOutFormEngine; int normOutput = (int)eNormalizeOutput; CppPreconvert(encInForm, ref encInEngine, encOutForm, ref encOutEngine, ref normOutput, bForward, 0); eInFormEngine = (EncodingForm)encInEngine; eOutFormEngine = (EncodingForm)encOutEngine; eNormalizeOutput = (NormalizeFlags)normOutput; }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // we have to know what the forward flag state is (and we can't use m_bForward because // that might be different (e.g. if this was called from ConvertEx). m_bToWide = bForward; if (!IsLegacyFormat(eInEncodingForm) && IsLegacyFormat(eOutEncodingForm)) { m_bToWide = !bForward; } // check if this is the special UTF8 code page, and if so, request that the engine // form be UTF8Bytes (this is the one code page converter where both sides are // Unicode. if (m_bToWide) { // going "to wide" means the output form required by the engine is UTF16. eOutFormEngine = EncodingForm.UTF16; if (m_nCodePage == CP_UTF8) { eInFormEngine = EncodingForm.UTF8Bytes; } else { eInFormEngine = EncodingForm.LegacyBytes; } } else { // going "from wide" means the input form required by the engine is UTF16. eInFormEngine = EncodingForm.UTF16; if (m_nCodePage == CP_UTF8) { eOutFormEngine = EncodingForm.UTF8Bytes; } else if (IsLegacyFormat(eOutEncodingForm)) { eOutFormEngine = EncodingForm.LegacyString; } } }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do its thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode) { if (Util.IsUnix) { // returning this value will cause the input Unicode data (of any form, UTF16, BE, etc.) // to be converted to UTF8 narrow bytes before calling DoConvert. eInFormEngine = EncodingForm.UTF8Bytes; } else { eInFormEngine = EncodingForm.UTF16; } } else { // legacy eInFormEngine = EncodingForm.LegacyBytes; } // Output will be stored in a typical C# string, so eOutFormEngine will be UTF16, // even though the Perl script is writing UTF8 bytes to output. if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode) { if (Util.IsUnix) { eOutFormEngine = EncodingForm.UTF8Bytes; } else { eOutFormEngine = EncodingForm.UTF16; } } else { eOutFormEngine = EncodingForm.LegacyBytes; } // do the load at this point. Load(ConverterIdentifier); }
// this routine is just to create a converter spec for internal virtual string AdjustConverterSpecProperty(string strMapping, bool bDirectionForward, NormalizeFlags normalizeOutput) { string strPast = ConverterIdentifier; if( !String.IsNullOrEmpty(strPast) ) strPast += " + \n"; // put the mapping name for this step strPast += FormatConverterStep(strMapping, bDirectionForward, normalizeOutput); return strPast; }
protected void InitFromIEncConverter(IEncConverter aEC) { m_strEncConverterName = aEC.Name; m_bDirectionForward = aEC.DirectionForward; m_eNormalForm = aEC.NormalizeOutput; // if the user intends for this to be a temporary converter, it won't be available to // subsequent calls of "GetEncConverter" unless we add it to this particular instance // of the local repository object. if (EncConverters[m_strEncConverterName] == null) EncConverters.Add(m_strEncConverterName, aEC); }
// we override this method from EncConverter so that we can call all of the step's // convert functions in turn (i.e. for this one, it isn't sufficient to just // provide a "DoConvert" method) // and we override this from CmpdEncConverter to we can add our bit of only calling // the 2nd step (i.e. the fallback converter) if the 1st step doesn't change the // string. protected override string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { if( CountConverters != 2 ) EncConverters.ThrowError(ErrStatus.FallbackTwoStepsRequired); IEncConverter rConverter = (IEncConverter)m_aEncConverter[0]; if (rConverter == null) EncConverters.ThrowError(ErrStatus.MissingConverter); rConverter.Debug = Debug; bool bDirectionForward = (bForward) ? (bool)m_aDirectionForward[0] : !(bool)m_aDirectionForward[0]; string strOutput = rConverter.ConvertEx( sInput, eInEncodingForm, ciInput, eOutEncodingForm, out rciOutput, eNormalizeOutput, bDirectionForward); // call the fallback if the string wasn't changed if( strOutput == sInput ) { IEncConverter rFallbackConverter = (IEncConverter)m_aEncConverter[1]; if (rFallbackConverter == null) EncConverters.ThrowError(ErrStatus.MissingConverter); rFallbackConverter.Debug = Debug; bDirectionForward = (bForward) ? (bool)m_aDirectionForward[1] : !(bool)m_aDirectionForward[1]; strOutput = rFallbackConverter.ConvertEx( sInput, eInEncodingForm, ciInput, eOutEncodingForm, out rciOutput, eNormalizeOutput, bDirectionForward); } return strOutput; }
protected void InitFromIEncConverter(IEncConverter aEC) { m_strEncConverterName = aEC.Name; m_bDirectionForward = aEC.DirectionForward; m_eNormalForm = aEC.NormalizeOutput; // if the user intends for this to be a temporary converter, it won't be available to // subsequent calls of "GetEncConverter" unless we add it to this particular instance // of the local repository object. if (EncConverters[m_strEncConverterName] == null) { EncConverters.Add(m_strEncConverterName, aEC); } }
protected bool m_bIsInRepository; // indicates whether this converter is in the static repository (true) or not (false) #endregion Member Variable Definitions #region Public Interface /// <summary> /// The class constructor. </summary> public EncConverter(string sProgId, string sImplementType) { m_strProgramID = sProgId; m_strImplementType = sImplementType; m_lProcessType = (Int32)ProcessTypeFlags.DontKnow; m_eConversionType = ConvType.Legacy_to_from_Unicode; m_bForward = true; m_eEncodingInput = EncodingForm.Unspecified; m_eEncodingOutput = EncodingForm.Unspecified; m_eNormalizeOutput = NormalizeFlags.None; m_nCodePageInput = 0; m_nCodePageOutput = 0; m_bDebugDisplayMode = false; m_bIsInRepository = false; }
protected virtual void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // by default, the form it comes in is okay for the engine (never really true, so // each engine's COM wrapper must override this; but this is here to see what you // must do). For example, for CC, the input must be UTF8Bytes for Unicode, so // you'd set the eInFormEngine to UTF8Bytes. eInFormEngine = eInEncodingForm; eOutFormEngine = eOutEncodingForm; }
protected string NormalizeString(string value, NormalizeFlags normalizeFlags) { if (value == null) { return(null); } if (normalizeFlags.HasFlag(NormalizeFlags.RemoveDiacritics)) { value = RemoveDiacritics(value); } if (normalizeFlags.HasFlag(NormalizeFlags.NormalizeUnicode)) { value = NormalizeUnicode(value); } if (normalizeFlags.HasFlag(NormalizeFlags.StripSymbols)) { value = StripSymbols(value); } if (normalizeFlags.HasFlag(NormalizeFlags.StripMultiWhitespace)) { value = StripMultiWhitespace(value); } if (normalizeFlags.HasFlag(NormalizeFlags.ToUpper)) { value = ToUpper(value); } else if (normalizeFlags.HasFlag(NormalizeFlags.ToLower)) { value = ToLower(value); } else if (normalizeFlags.HasFlag(NormalizeFlags.ToTitleCase)) { value = Thread.CurrentThread.CurrentCulture.TextInfo.ToTitleCase(ToLower(value)); } if (normalizeFlags.HasFlag(NormalizeFlags.Trim)) { value = value.Trim(); } return(value); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // The CC DLL (conversion engine) usually works in UTF8 for Unicode. As a future // enhancement, it might be possible to get a (marked) value from the repository // telling us what form to use (which would be UTF8Bytes by default and could be // something else if the user developed a UTF32 cc table--using the xYYYY syntax // rather than the uXXXX syntax). But for now, assume that all CC tables that // use Unicode want UTF8. if (NormalizeLhsConversionType(ConversionType) == NormConversionType.eUnicode) { // returning this value will cause the input Unicode data (of any form, UTF16, BE, etc.) // to be converted to UTF8 narrow bytes before calling DoConvert. eInFormEngine = EncodingForm.UTF8Bytes; } else { // legacy eInFormEngine = EncodingForm.LegacyBytes; } if (NormalizeRhsConversionType(ConversionType) == NormConversionType.eUnicode) { eOutFormEngine = EncodingForm.UTF8Bytes; } else { eOutFormEngine = EncodingForm.LegacyBytes; } // do the load at this point. Load(ConverterIdentifier); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do its thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // do the load at this point. Load(ConverterIdentifier); }
protected void UpdateNormalizationCellValue(DataGridViewRow theRow, NormalizeFlags eNormalizeFlag) { string strNormalizeValue; switch (eNormalizeFlag) { case NormalizeFlags.FullyComposed: strNormalizeValue = "Fully Composed"; break; case NormalizeFlags.FullyDecomposed: strNormalizeValue = "Fully Decomposed"; break; default: strNormalizeValue = "None"; break; } theRow.Cells[cnNormalizationColumn].Value = strNormalizeValue; }
/// <summary> /// If we're returning legacy data as a byte array, we need to return it as a byte array. /// </summary> /// <returns> protected virtual unsafe byte[] InternalConvertEx(EncodingForm eInEncodingForm, string sInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward) { Util.DebugWriteLine(className, "(output bytes) BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (sInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } Util.DebugWriteLine(className, "sInput.Length() is " + sInput.Length.ToString() + "."); if (sInput.Length == 0) { // this section added 11/10/2011 by Jim K rciOutput = 0; return(new byte[0]); } Util.DebugWriteLine(className, "sInput is " + sInput + "."); // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed(byte *lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine Util.DebugWriteLine(className, "Calling GetBytes"); ECNormalizeData.GetBytes(sInput, sInput.Length, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); byte[] baOut = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); #if DEBUG Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut)); Util.DebugWriteLine(className, "Returning."); #endif rciOutput = nOutLen; return(baOut); } } }
/// legacy data as a byte array as input, we need to treat it as a byte array. /// </summary> protected virtual unsafe string InternalConvertEx(EncodingForm eInEncodingForm, byte[] baInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward) { Util.DebugWriteLine(className, "(input bytes) BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (baInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } if (baInput.Length == 0) { rciOutput = 0; return(""); } // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward); int nBufSize = baInput.Length; fixed(byte *lpInBuffer = baInput) { int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); byte[] baOut = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); #if DEBUG Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut)); Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut) + "'"); #endif string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode); #if DEBUG Util.DebugWriteLine(className, "normalized result '" + result + "'"); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult)); baResult = System.Text.Encoding.Unicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult)); baResult = System.Text.Encoding.UTF8.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult)); Util.DebugWriteLine(className, "Returning."); #endif return(result); } } }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert( eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // we have to know what the forward flag state is (and we can't use m_bForward because // that might be different (e.g. if this was called from ConvertEx). m_bToWide = bForward; // check if this is the special UTF8 code page, and if so, request that the engine // form be UTF8Bytes (this is the one code page converter where both sides are // Unicode. if( m_bToWide ) { // going "to wide" means the output form required by the engine is UTF16. eOutFormEngine = EncodingForm.UTF16; if( m_nCodePage == CP_UTF8 ) eInFormEngine = EncodingForm.UTF8Bytes; } else { // going "from wide" means the input form required by the engine is UTF16. eInFormEngine = EncodingForm.UTF16; if( m_nCodePage == CP_UTF8 ) eOutFormEngine = EncodingForm.UTF8Bytes; } }
public DirectableEncConverter(string strEncConverterName, bool bDirectionForward, NormalizeFlags eNormalForm) { m_strEncConverterName = strEncConverterName; m_bDirectionForward = bDirectionForward; m_eNormalForm = eNormalForm; }
internal static unsafe string GetString(byte* lpOutBuffer, int nOutLen, EncodingForm eOutEncodingForm, int nCodePageOut, EncodingForm eFormEngineOut, NormalizeFlags eNormalizeOutput, out int rciOutput, ref bool bDebugDisplayMode) { // null terminate the output and turn it into a (real) array of bytes lpOutBuffer[nOutLen] = lpOutBuffer[nOutLen + 1] = lpOutBuffer[nOutLen + 2] = lpOutBuffer[nOutLen + 3] = 0; byte[] baOut = new byte[nOutLen]; ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); // check to see if the engine handled the given output form. If not, then see // if it's a conversion we can easily do (otherwise we'll ask TEC to do the // conversion for us (later) so that all engines can handle all possible // output encoding forms. if (eOutEncodingForm != eFormEngineOut) { if (EncConverter.IsLegacyFormat(eOutEncodingForm)) { if ((eFormEngineOut == EncodingForm.LegacyBytes) && (eOutEncodingForm == EncodingForm.LegacyString)) { // in this case, just *pretend* the engine outputs LegacyString (the // LegacyString case below really means "convert LegacyBytes to // LegacyString) eFormEngineOut = eOutEncodingForm; } } else // unicode forms { // if the engine gives UTF8 and the client wants UTF16... if ((eOutEncodingForm == EncodingForm.UTF16) && (eFormEngineOut == EncodingForm.UTF8Bytes)) { // use the special form to convert it below eOutEncodingForm = eFormEngineOut = (EncodingForm)CCUnicode8; } // or vise versa else if ((eFormEngineOut == EncodingForm.UTF16) && ((eOutEncodingForm == EncodingForm.UTF8Bytes) || (eOutEncodingForm == EncodingForm.UTF8String))) { // engine gave UTF16, but user wants a UTF8 flavor. // Decoder d = Encoding.Unicode.GetChars(baOut); // d.GetChars( UTF8Encoding enc = new UTF8Encoding(); baOut = enc.GetBytes(Encoding.Unicode.GetChars(baOut)); eFormEngineOut = eOutEncodingForm; nOutLen = baOut.Length; } // these conversions we can do ourself else if ((eOutEncodingForm == EncodingForm.UTF8String) || (eOutEncodingForm == EncodingForm.UTF16)) { eFormEngineOut = eOutEncodingForm; } } } int nItems = 0, nCharsLen = 0; char[] caOut = null; switch (eFormEngineOut) { case EncodingForm.LegacyBytes: case EncodingForm.UTF8Bytes: { if (eFormEngineOut == EncodingForm.LegacyBytes) DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyBytes)...", ref bDebugDisplayMode); else DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8Bytes)...", ref bDebugDisplayMode); // stuff the returned 'bytes' into the BSTR as narrow characters rather than // converting to wide nItems = nOutLen; nCharsLen = (nOutLen + 1) / 2; caOut = new char[nCharsLen]; ByteArrToCharArr(baOut, caOut); break; } case EncodingForm.LegacyString: { DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyString)...", ref bDebugDisplayMode); nCharsLen = nItems = nOutLen; try { // this will throw (for some reason) when doing symbol fonts // (apparently, CP_SYMBOL is no longer supported). caOut = Encoding.GetEncoding(nCodePageOut).GetChars(baOut); } catch { if ((nCodePageOut == EncConverters.cnSymbolFontCodePage) || (nCodePageOut == EncConverters.cnIso8859_1CodePage)) { char chMask = (char)0; if (nCodePageOut == EncConverters.cnSymbolFontCodePage) chMask = (char)0xF000; // do it the 'hard way' caOut = new char[nCharsLen]; for (int i = 0; i < nCharsLen; i++) caOut[i] = (char)(baOut[i] | chMask); } else throw; } break; } case EncodingForm.UTF16: { nCharsLen = nItems = (nOutLen / 2); DisplayDebugUCharValues(baOut, "Received (UTF16) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode); caOut = Encoding.Unicode.GetChars(baOut); break; } case EncodingForm.UTF8String: { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8String)...", ref bDebugDisplayMode); // this encoding form is always encoded using the default code page. caOut = Encoding.Default.GetChars(baOut); nCharsLen = nItems = nOutLen; break; } case (EncodingForm)CCUnicode8: { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode); caOut = Encoding.UTF8.GetChars(baOut); nCharsLen = nItems = caOut.Length; break; } case EncodingForm.UTF16BE: case EncodingForm.UTF32: case EncodingForm.UTF32BE: { nCharsLen = nItems = nOutLen / 2; DisplayDebugUCharValues(baOut, "Received (UTF16BE/32/32BE) back from Converter/DLL...", ref bDebugDisplayMode); caOut = new char[nCharsLen]; ByteArrToCharArr(baOut, caOut); // for UTF32, it is half again as little in the item count. if (eFormEngineOut != EncodingForm.UTF16BE) nItems /= 2; break; } default: EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); break; } #if !v22_AllowEmptyReturn if ((nCharsLen <= 0) #if DEBUG || (nCharsLen != caOut.Length) #endif ) { EncConverters.ThrowError(ErrStatus.NoReturnDataBadOutForm); } #endif // check to see if the engine handled the given output form. If not, then ask // TEC to do the conversion for us so that all engines can handle all possible // output encoding forms (e.g. caller requested utf32, but above CC could only // give us utf16/8) // Also, if the caller wanted something other than "None" for the eNormalizeOutput, // then we also have to call TEC for that as well (but I think this only makes // sense if the output is utf16(be) or utf32(be)) // p.s. if this had been a TEC converter, then the eNormalizeOutput flag would // ahready have been reset to None (by this point), since we would have directly // requested that normalized form when we created the converter--see // TecEncConverter.PreConvert) string strOutput = new string(caOut); if ((eFormEngineOut != eOutEncodingForm) || (eNormalizeOutput != NormalizeFlags.None)) { strOutput = EncConverters.UnicodeEncodingFormConvertEx(strOutput, eFormEngineOut, nItems, eOutEncodingForm, eNormalizeOutput, out nItems); } DisplayDebugUCharValues(strOutput, "Returning back to client...", ref bDebugDisplayMode); rciOutput = nItems; return strOutput; }
public DatahubNormalizer(NormalizeFlags normalizeFlags) : base(normalizeFlags) { }
// this method is called either when the user clicks the "Apply" or "OK" buttons *OR* if she // tries to switch to the Test or Advanced tab. This is the dialog's one opportunity // to make sure that the user has correctly configured a legitimate converter. protected override bool OnApply() { // for compound converter, there must be at least one step int nRowCount = dataGridViewSteps.Rows.Count - 1; if (nRowCount > 0) { m_astrStepFriendlyNames = new string[nRowCount]; m_abDirectionForwards = new bool[nRowCount]; m_aeNormalizeFlags = new NormalizeFlags[nRowCount]; for (int i = 0; i < nRowCount; i++) { DataGridViewRow theRow = dataGridViewSteps.Rows[i]; IEncConverter aEC = (IEncConverter)theRow.Tag; // check to see if the user actually configured a converter at this row (might have just checked the reverse box) if (aEC == null) { MessageBox.Show(this, String.Format(@"No Converter selected for step {0}. Click where it says '{1}' to choose a converter for this step", i + 1, cstrClickMeMessage), EncConverters.cstrCaption); return false; } m_astrStepFriendlyNames[i] = aEC.Name; // don't use the cell value as that may have been truncated m_abDirectionForwards[i] = !(bool)theRow.Cells[cnDirectionReverseColumn].Value; string strNormalizeValue = (string)theRow.Cells[cnNormalizationColumn].Value; NormalizeFlags eNormalizeFlag = NormalizeFlags.None; if (strNormalizeValue == cstrNormalizationFullyComposed) eNormalizeFlag = NormalizeFlags.FullyComposed; else if (strNormalizeValue == cstrNormalizationFullyDecomposed) eNormalizeFlag = NormalizeFlags.FullyDecomposed; m_aeNormalizeFlags[i] = eNormalizeFlag; // grab the beginning and ending encoding ids as well (in case we're creating a default friendly name) if (i == 0) // first step LhsEncodingId = (m_abDirectionForwards[i]) ? aEC.LeftEncodingID : aEC.RightEncodingID; if (i == (nRowCount - 1)) // last step RhsEncodingId = (m_abDirectionForwards[i]) ? aEC.RightEncodingID : aEC.LeftEncodingID; // also adjust the flag saying whether all steps are bidirectional m_bAllStepsAreBidirectional &= !EncConverters.IsUnidirectional(aEC.ConversionType); } } else return false; if (tabControl.SelectedTab == tabPageSetup) { // only do these message boxes if we're on the Setup tab itself, because if this OnApply // is being called as a result of the user switching to the Test tab, that code will // already put up an error message and we don't need two error messages. if (dataGridViewSteps.Rows.Count < 1) { MessageBox.Show(this, "You must add at least one step!", EncConverters.cstrCaption); return false; } else { for (int i = 0; i < nRowCount; i++) { if (cstrClickMeMessage == m_astrStepFriendlyNames[i]) { MessageBox.Show(this, String.Format("The step in row '{0}' has not been configured. Delete that row first or choose a converter for it", i + 1), EncConverters.cstrCaption); return false; } } } } try { return base.OnApply(); } catch (Exception ex) { MessageBox.Show(this, String.Format("Failed to add compound converter! {0}{0}{1}", Environment.NewLine, ex.Message), EncConverters.cstrCaption); } return false; }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert(eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); eInFormEngine = EncodingForm.LegacyBytes; eOutFormEngine = EncodingForm.LegacyBytes; // do the load at this point. m_bForward = bForward; // keep track so we can see during DoConvert if (m_bForward) LoadForward(); else LoadReverse(); }
public ISOCountryNormalizer(NormalizeFlags normalizeFlags, NormalizationForm normalizationForm) { //http://unicode.org/reports/tr15/#Norm_Forms this.NormalizationForm = normalizationForm; this.NormalizeFlags = normalizeFlags; }
// This function is the meat of the conversion process. It is really long, which // normally wouldn't be a virtue (especially as an "in-line" function), but in an // effort to save memory fragmentation by using stack memory to buffer the input // and output data, I'm using the alloca memory allocation function. Because of this // it can't be allocated in some subroutine and returned to a calling program (or the // stack will have erased them), so it has to be one big fat long function... // The basic structure is: // // o Check Input Data // o Give the sub-class (via PreConvert) the opportunity to load tables and do // any special preprocessing it needs to ahead of the actual conversion // o Possibly call the TECkit COM interface to convert Unicode flavors that the // engine (for this conversion) might not support (indicated via PreConvert) // o Normalize the input data to a byte array based on it's input EncodingForm // o Allocate (on the stack) a buffer for the output data (min 10000 bytes) // o Call the subclass (via DoConvert) to do the actual conversion. // o Normalize the output data to match the requested output EncodingForm (including // possibly calling the TECkit COM interface). // o Return the resultant BSTR and size of items to the output pointer variables. // protected virtual unsafe string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { Util.DebugWriteLine(className, "BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (sInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } if (sInput.Length == 0) { rciOutput = 0; return(""); } #if DEBUG && __MonoCS__ // for debugging only BEGIN //byte[] baIn = System.Text.Encoding.UTF8.GetBytes(sInput); // works byte[] baIn = System.Text.Encoding.BigEndianUnicode.GetBytes(sInput); // easier to read Util.DebugWriteLine(className, Util.getDisplayBytes("Input BigEndianUnicode", baIn)); baIn = System.Text.Encoding.Unicode.GetBytes(sInput); Util.DebugWriteLine(className, Util.getDisplayBytes("Input Unicode", baIn)); int nInLen = sInput.Length; byte [] baIn2 = new byte[nInLen]; for (int i = 0; i < nInLen; i++) { baIn2[i] = (byte)(sInput[i] & 0xFF); } Util.DebugWriteLine(className, Util.getDisplayBytes("Input Narrowized", baIn2)); /* * System.Text.Encoding encFrom = System.Text.Encoding.GetEncoding(12000); * System.Text.Encoding encTo = System.Text.Encoding.UTF8; * * // Perform the conversion from one encoding to the other. * Util.DebugWriteLine(className, "Starting with " + baIn.Length.ToString() + " bytes."); * byte[] baOut2 = System.Text.Encoding.Convert(encFrom, encTo, baIn); * Util.DebugWriteLine(className, "Converted to " + baOut2.Length.ToString() + " bytes."); * string resultString = System.Text.Encoding.Default.GetString(baOut2, 0, baOut2.Length); * Util.DebugWriteLine(className, "Test output '" + resultString + "'"); */ // for debugging only END #endif // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms ( bForward, ref eInEncodingForm, ref eOutEncodingForm ); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert ( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward ); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed(byte *lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine Util.DebugWriteLine(className, "Calling GetBytes"); ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); #if DEBUG && __MonoCS__ byte[] baOut = new byte[nBufSize]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nBufSize, baOut); Util.DebugWriteLine(className, Util.getDisplayBytes("Input Bytes", baOut)); #endif // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); #if DEBUG && __MonoCS__ Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); byte[] baOut2 = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut2); Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut2)); Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut2) + "'"); #endif string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode); #if DEBUG && __MonoCS__ Util.DebugWriteLine(className, "normalized result '" + result + "'"); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult)); baResult = System.Text.Encoding.Unicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult)); baResult = System.Text.Encoding.UTF8.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult)); Util.DebugWriteLine(className, "Returning."); #endif return(result); } } }
// this routine is just to create a converter spec for protected virtual string FormatConverterStep(string strMapping, bool bDirectionForward, NormalizeFlags normalizeOutput) { string str = strMapping; // indicate if it's 'reversed' if( !bDirectionForward ) str += cstrDirectionReversed; // indicate if there's any special normalization switch(normalizeOutput) { case NormalizeFlags.FullyComposed: { str += cstrNormalizationFullyComposed; break; } case NormalizeFlags.FullyDecomposed: { str += cstrNormalizationFullyDecomposed; break; } } return str; }
public RipeNormalizer(NormalizeFlags normalizeFlags) : base(normalizeFlags) { }
public DatahubNormalizer(NormalizeFlags normalizeFlags, NormalizationForm normalizationForm) : base(normalizeFlags, normalizationForm) { }
public RipeNormalizer(NormalizeFlags normalizeFlags, NormalizationForm normalizationForm) : base(normalizeFlags, normalizationForm) { }
// This function is the meat of the conversion process. It is really long, which // normally wouldn't be a virtue (especially as an "in-line" function), but in an // effort to save memory fragmentation by using stack memory to buffer the input // and output data, I'm using the alloca memory allocation function. Because of this // it can't be allocated in some subroutine and returned to a calling program (or the // stack will have erased them), so it has to be one big fat long function... // The basic structure is: // // o Check Input Data // o Give the sub-class (via PreConvert) the opportunity to load tables and do // any special preprocessing it needs to ahead of the actual conversion // o Possibly call the TECkit COM interface to convert Unicode flavors that the // engine (for this conversion) might not support (indicated via PreConvert) // o Normalize the input data to a byte array based on it's input EncodingForm // o Allocate (on the stack) a buffer for the output data (min 10000 bytes) // o Call the subclass (via DoConvert) to do the actual conversion. // o Normalize the output data to match the requested output EncodingForm (including // possibly calling the TECkit COM interface). // o Return the resultant BSTR and size of items to the output pointer variables. // protected virtual unsafe string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { if( sInput == null ) EncConverters.ThrowError(ErrStatus.IncompleteChar); // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms ( bForward, ref eInEncodingForm, ref eOutEncodingForm ); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert ( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward ); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed (byte* lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed (byte* lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); return ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode); } } }
// [DispId(18)] public virtual string ConvertEx(string sInput, EncodingForm inEnc, int ciInput, EncodingForm outEnc, out int ciOutput, NormalizeFlags eNormalizeOutput, bool bForward) { return(InternalConvertEx(inEnc, sInput, ciInput, outEnc, eNormalizeOutput, out ciOutput, bForward)); }
// [DispId(18)] public virtual string ConvertEx(string sInput, EncodingForm inEnc, int ciInput, EncodingForm outEnc, out int ciOutput, NormalizeFlags eNormalizeOutput, bool bForward) { return InternalConvertEx(inEnc, sInput, ciInput, outEnc, eNormalizeOutput, out ciOutput, bForward); }
protected unsafe override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert( eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // If the user uses one of the *Byte forms, change that to the *String forms so // the value matches what the TECkit engine is expecting (that is, the TECkit // engine is expecting a value of '1' (=LegacyString) even if it comes in as // LegacyBytes). It'll still get converted correctly later, but when create the // the TECkit "converter" object, which happens during here, it is expecting // to see the other value. if( eInEncodingForm == EncodingForm.LegacyBytes ) eInEncodingForm = EncodingForm.LegacyString; else if( eInEncodingForm == EncodingForm.UTF8Bytes ) eInEncodingForm = EncodingForm.UTF8String; if( eOutEncodingForm == EncodingForm.LegacyBytes ) eOutEncodingForm = EncodingForm.LegacyString; else if( eOutEncodingForm == EncodingForm.UTF8Bytes ) eOutEncodingForm = EncodingForm.UTF8String; // See if we have a converter already for this combination or whether we need to make a // new one string strConverterKey = eInEncodingForm.ToString() + eOutEncodingForm.ToString() + eNormalizeOutput.ToString() + bForward.ToString(); // If this is a compilable map (i.e. ImplType SIL.map), then see if the map file has changed bool bReload = false; if (m_bCompileable && !String.IsNullOrEmpty(m_strMapFileSpec)) { // first make sure it's there and get the last time it was modified DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it. if (!DoesFileExist(m_strMapFileSpec, ref timeModified)) EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strMapFileSpec); // if it has been modified or it's not already loaded... if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey)) { // ... just remove this key if it existed (so we fall thru and do Load) ResetConverter((Int32)m_mapConverters[strConverterKey]); m_mapConverters.Remove(strConverterKey); bReload = true; } } else if (IsFileLoaded()) { // the tec file could also have changed out from underneath us (in which case we'd need to reload it). DateTime timeModified = DateTime.Now; // don't care really, but have to initialize it. if (!DoesFileExist(m_strTecFileSpec, ref timeModified)) EncConverters.ThrowError(ErrStatus.CantOpenReadMap, m_strTecFileSpec); // if it has been modified or it's not already loaded... if ((timeModified > m_timeModifiedTec) && m_mapConverters.ContainsKey(strConverterKey)) { m_baMapping = null; // triggers a reload m_lhsFlags = m_rhsFlags = 0; // ... just remove this key if it existed (so we fall thru and do Load) ResetConverter((Int32)m_mapConverters[strConverterKey]); m_mapConverters.Remove(strConverterKey); bReload = true; } } if( m_mapConverters.ContainsKey(strConverterKey) ) { m_converter = (Int32)m_mapConverters[strConverterKey]; } else { int status = (int)ErrStatus.NoError; // load the map now Load(bReload); // is there no better way to do this? ushort eFormOut1 = System.Convert.ToUInt16((int)eOutEncodingForm); ushort eFormOut2 = System.Convert.ToUInt16((int)eNormalizeOutput); UInt16 eFormOut = System.Convert.ToUInt16(eFormOut1 | eFormOut2); // make a converter for this new combination. fixed(Int32* converter = &m_converter) { if( IsFileLoaded() ) { fixed(byte* pbyMapping = m_baMapping) { status = TECkit_CreateConverter( pbyMapping, m_nMapSize, (byte)((bForward) ? 1 : 0), System.Convert.ToUInt16((int)eInEncodingForm), eFormOut, (void*)converter ); } } else { status = TECkit_CreateConverter( (byte*)0, m_nMapSize, (byte)((bForward) ? 1 : 0), System.Convert.ToUInt16((int)eInEncodingForm), eFormOut, (void*)converter ); } } if( status == (int)ErrStatus.NoError ) { m_mapConverters[strConverterKey] = m_converter; } else EncConverters.ThrowError(status); } // since TEC can handle output normalization directly (by requesting it here // in the creation of the converter), reset the requesting flag so we won't // attempt to do it later (all other converters that can't do implicit output // normalization will *not* have reset the flag and then after their conversion, // if the flag is still set, we'll call TEC to do it for them see // ECNormalizeData.GetString). eNormalizeOutput = NormalizeFlags.None; }
public ISOCountryNormalizer(NormalizeFlags normalizeFlags) : this(normalizeFlags, NormalizationForm.FormC) { }
protected override string InternalConvert ( EncodingForm eInEncodingForm, string sInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, bool bForward ) { // this routine is only called by one of the 'implicit' methods (e.g. // ConvertToUnicode). For these "COM" standard methods, the length of the // string is specified by the BSTR itself and always/only supports UTF-16-like // (i.e. wide) data. So, pass 0 so that the function will determine the length // from the BSTR itself (just in case the user happens to have a value of 0 in // the data (i.e. it won't necessarily be null terminated... don't ask... Int32 iOutput = 0; return InternalConvertEx ( eInEncodingForm, sInput, 0, eOutEncodingForm, eNormalizeOutput, out iOutput, bForward ); }
// we override this method from EncConverter so that we can call all of the step's // convert functions in turn (i.e. for this one, it isn't sufficient to just // provide a "DoConvert" method) protected override string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { // setup common items for both directions. int nSize = m_aEncConverter.Count; string strOutput = null; EncodingForm inForm = eInEncodingForm; Int32 ciOutput = 0; IEncConverter rConverter; NormalizeFlags eNormalizeFlags; ConvType eConversionType; EncodingForm outForm; NormConversionType eType; bool bDirectionForward; int i; try { if( bForward ) { for(i = 0; i < nSize; i++ ) { rConverter = (IEncConverter)m_aEncConverter[i]; if (rConverter == null) EncConverters.ThrowError(ErrStatus.MissingConverter); rConverter.Debug = Debug; eNormalizeFlags = (NormalizeFlags)m_aNormalizeOutput[i]; if( i == (nSize-1) ) eNormalizeFlags = eNormalizeOutput; eConversionType = rConverter.ConversionType; bDirectionForward = (bool)m_aDirectionForward[i]; // if this is the last one, then use the user's requested output format if( i == (nSize-1) ) { outForm = eOutEncodingForm; } else { if( bDirectionForward ) eType = NormalizeRhsConversionType(eConversionType); else eType = NormalizeLhsConversionType(eConversionType); if( eType == NormConversionType.eLegacy) outForm = EncodingForm.LegacyBytes; else outForm = EncodingForm.Unspecified; } strOutput = rConverter.ConvertEx( sInput, inForm, ciInput, outForm, out ciOutput, eNormalizeFlags, bDirectionForward); // setup input for the next step sInput = strOutput; inForm = outForm; ciInput = ciOutput; // it's possible the user cancelled the debug mode so get it back Debug = rConverter.Debug; } } else // reverse { for(i = nSize-1; i >= 0; i-- ) { rConverter = (IEncConverter)m_aEncConverter[i]; if (rConverter == null) EncConverters.ThrowError(ErrStatus.MissingConverter); rConverter.Debug = Debug; eNormalizeFlags = (NormalizeFlags)m_aNormalizeOutput[i]; if( i == 0 ) eNormalizeFlags = eNormalizeOutput; eConversionType = rConverter.ConversionType; // the direction is the opposite of what the user said in // reverse mode. bDirectionForward = !(bool)m_aDirectionForward[i]; // if this is the last one, then use the user's requested output format if( i == 0 ) outForm = eOutEncodingForm; else { if( bDirectionForward ) eType = NormalizeRhsConversionType(eConversionType); else eType = NormalizeRhsConversionType(eConversionType); if( eType == NormConversionType.eLegacy ) outForm = EncodingForm.LegacyBytes; else outForm = EncodingForm.Unspecified; } strOutput = rConverter.ConvertEx( sInput, inForm, ciInput, outForm, out ciOutput, eNormalizeFlags, bDirectionForward); // setup input for the next step sInput = strOutput; inForm = outForm; ciInput = ciOutput; // it's possible the user cancelled the debug mode so get it back Debug = rConverter.Debug; } } } catch(ApplicationException e) { throw e; } rciOutput = ciOutput; return strOutput; }
// this method is called either when the user clicks the "Apply" or "OK" buttons *OR* if she // tries to switch to the Test or Advanced tab. This is the dialog's one opportunity // to make sure that the user has correctly configured a legitimate converter. protected override bool OnApply() { // for compound converter, there must be at least one step int nRowCount = dataGridViewSteps.Rows.Count - 1; if (nRowCount > 0) { m_astrStepFriendlyNames = new string[nRowCount]; m_abDirectionForwards = new bool[nRowCount]; m_aeNormalizeFlags = new NormalizeFlags[nRowCount]; for (int i = 0; i < nRowCount; i++) { DataGridViewRow theRow = dataGridViewSteps.Rows[i]; IEncConverter aEC = (IEncConverter)theRow.Tag; // check to see if the user actually configured a converter at this row (might have just checked the reverse box) if (aEC == null) { MessageBox.Show(this, String.Format(@"No Converter selected for step {0}. Click where it says '{1}' to choose a converter for this step", i + 1, cstrClickMeMessage), EncConverters.cstrCaption); return(false); } m_astrStepFriendlyNames[i] = aEC.Name; // don't use the cell value as that may have been truncated m_abDirectionForwards[i] = !(bool)theRow.Cells[cnDirectionReverseColumn].Value; string strNormalizeValue = (string)theRow.Cells[cnNormalizationColumn].Value; NormalizeFlags eNormalizeFlag = NormalizeFlags.None; if (strNormalizeValue == cstrNormalizationFullyComposed) { eNormalizeFlag = NormalizeFlags.FullyComposed; } else if (strNormalizeValue == cstrNormalizationFullyDecomposed) { eNormalizeFlag = NormalizeFlags.FullyDecomposed; } m_aeNormalizeFlags[i] = eNormalizeFlag; // grab the beginning and ending encoding ids as well (in case we're creating a default friendly name) if (i == 0) // first step { LhsEncodingId = (m_abDirectionForwards[i]) ? aEC.LeftEncodingID : aEC.RightEncodingID; } if (i == (nRowCount - 1)) // last step { RhsEncodingId = (m_abDirectionForwards[i]) ? aEC.RightEncodingID : aEC.LeftEncodingID; } // also adjust the flag saying whether all steps are bidirectional m_bAllStepsAreBidirectional &= !EncConverters.IsUnidirectional(aEC.ConversionType); } } else { return(false); } if (tabControl.SelectedTab == tabPageSetup) { // only do these message boxes if we're on the Setup tab itself, because if this OnApply // is being called as a result of the user switching to the Test tab, that code will // already put up an error message and we don't need two error messages. if (dataGridViewSteps.Rows.Count < 1) { MessageBox.Show(this, "You must add at least one step!", EncConverters.cstrCaption); return(false); } else { for (int i = 0; i < nRowCount; i++) { if (cstrClickMeMessage == m_astrStepFriendlyNames[i]) { MessageBox.Show(this, String.Format("The step in row '{0}' has not been configured. Delete that row first or choose a converter for it", i + 1), EncConverters.cstrCaption); return(false); } } } } try { return(base.OnApply()); } catch (Exception ex) { MessageBox.Show(this, String.Format("Failed to add compound converter! {0}{0}{1}", Environment.NewLine, ex.Message), EncConverters.cstrCaption); } return(false); }
public virtual void AddConverterStep(IEncConverter rConvert, bool bDirectionForward, NormalizeFlags eNormalizeOutput) { m_aEncConverter.Add(rConvert); m_aDirectionForward.Add(bDirectionForward); m_aNormalizeOutput.Add(eNormalizeOutput); }
internal static unsafe string GetString(byte *lpOutBuffer, int nOutLen, EncodingForm eOutEncodingForm, int nCodePageOut, EncodingForm eFormEngineOut, NormalizeFlags eNormalizeOutput, out int rciOutput, ref bool bDebugDisplayMode) { // null terminate the output and turn it into a (real) array of bytes Util.DebugWriteLine(className, "BEGIN"); lpOutBuffer[nOutLen] = lpOutBuffer[nOutLen + 1] = lpOutBuffer[nOutLen + 2] = lpOutBuffer[nOutLen + 3] = 0; byte[] baOut = new byte[nOutLen]; ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); Util.DebugWriteLine(className, Util.getDisplayBytes("byte array", baOut)); // check to see if the engine handled the given output form. If not, then see // if it's a conversion we can easily do (otherwise we'll ask TEC to do the // conversion for us (later) so that all engines can handle all possible // output encoding forms. Util.DebugWriteLine(className, "eOutEncodingForm " + eOutEncodingForm.ToString() + ", " + "eFormEngineOut " + eFormEngineOut.ToString()); if (eOutEncodingForm != eFormEngineOut) { if (EncConverter.IsLegacyFormat(eOutEncodingForm)) { if ((eFormEngineOut == EncodingForm.LegacyBytes) && (eOutEncodingForm == EncodingForm.LegacyString)) { // in this case, just *pretend* the engine outputs LegacyString (the // LegacyString case below really means "convert LegacyBytes to // LegacyString) eFormEngineOut = eOutEncodingForm; } } else // unicode forms { // if the engine gives UTF8 and the client wants UTF16... if ((eOutEncodingForm == EncodingForm.UTF16) && (eFormEngineOut == EncodingForm.UTF8Bytes)) { // use the special form to convert it below Util.DebugWriteLine(className, "using CCUnicode8"); eOutEncodingForm = eFormEngineOut = (EncodingForm)CCUnicode8; } // or vise versa else if ((eFormEngineOut == EncodingForm.UTF16) && ((eOutEncodingForm == EncodingForm.UTF8Bytes) || (eOutEncodingForm == EncodingForm.UTF8String))) { // engine gave UTF16, but user wants a UTF8 flavor. // Decoder d = Encoding.Unicode.GetChars(baOut); // d.GetChars( UTF8Encoding enc = new UTF8Encoding(); baOut = enc.GetBytes(Encoding.Unicode.GetChars(baOut)); eFormEngineOut = eOutEncodingForm; nOutLen = baOut.Length; } // these conversions we can do ourself else if ((eOutEncodingForm == EncodingForm.UTF8String) || (eOutEncodingForm == EncodingForm.UTF16)) { #if _MSC_VER // Doesn't this wipe out the distinction? // On Linux we need to be able to convert the output from UTF32 to UTF16. eFormEngineOut = eOutEncodingForm; #endif } } } int nItems = 0, nCharsLen = 0; char[] caOut = null; switch (eFormEngineOut) { case EncodingForm.LegacyBytes: case EncodingForm.UTF8Bytes: { if (eFormEngineOut == EncodingForm.LegacyBytes) { DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyBytes)...", ref bDebugDisplayMode); } else { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8Bytes)...", ref bDebugDisplayMode); } // stuff the returned 'bytes' into the BSTR as narrow characters rather than // converting to wide nItems = nOutLen; nCharsLen = (nOutLen + 1) / 2; caOut = new char[nCharsLen]; ByteArrToCharArr(baOut, caOut); break; } case EncodingForm.LegacyString: { DisplayDebugCharValues(baOut, "Received (LegacyBytes) back from Converter/DLL (returning as LegacyString)...", ref bDebugDisplayMode); nCharsLen = nItems = nOutLen; #if __MonoCS__ // Narrowizing by code page 0 doesn't seem to be what we want on Linux. // Treating it as a symbol font or stripping off the low byte works better. if (nCodePageOut == 0) { caOut = BruteForceWiden(nCodePageOut, baOut, nCharsLen); } else #else if (true) #endif { try { // this will throw (for some reason) when doing symbol fonts // (apparently, CP_SYMBOL is no longer supported). caOut = Encoding.GetEncoding(nCodePageOut).GetChars(baOut); } catch { if ((nCodePageOut == EncConverters.cnSymbolFontCodePage) || (nCodePageOut == EncConverters.cnIso8859_1CodePage)) { caOut = BruteForceWiden(nCodePageOut, baOut, nCharsLen); } else { throw; } } } break; } case EncodingForm.UTF16: { nCharsLen = nItems = (nOutLen / 2); DisplayDebugUCharValues(baOut, "Received (UTF16) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode); caOut = Encoding.Unicode.GetChars(baOut); break; } case EncodingForm.UTF8String: { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF8String)...", ref bDebugDisplayMode); // this encoding form is always encoded using the default code page. caOut = Encoding.Default.GetChars(baOut); nCharsLen = nItems = nOutLen; break; } case (EncodingForm)CCUnicode8: { DisplayDebugUCharValuesFromUTF8(baOut, "Received (UTF8Bytes) back from Converter/DLL (returning as UTF16)...", ref bDebugDisplayMode); caOut = Encoding.UTF8.GetChars(baOut); nCharsLen = nItems = caOut.Length; break; } case EncodingForm.UTF16BE: case EncodingForm.UTF32: case EncodingForm.UTF32BE: { nCharsLen = nItems = nOutLen / 2; DisplayDebugUCharValues(baOut, "Received (UTF16BE/32/32BE) back from Converter/DLL...", ref bDebugDisplayMode); caOut = new char[nCharsLen]; ByteArrToCharArr(baOut, caOut); // for UTF32, it is half again as little in the item count. if (eFormEngineOut != EncodingForm.UTF16BE) { nItems /= 2; } break; } default: EncConverters.ThrowError(ErrStatus.OutEncFormNotSupported); break; } #if !v22_AllowEmptyReturn if ((nCharsLen <= 0) #if DEBUG || (nCharsLen != caOut.Length) #endif ) { EncConverters.ThrowError(ErrStatus.NoReturnDataBadOutForm); } #endif // check to see if the engine handled the given output form. If not, then ask // TEC to do the conversion for us so that all engines can handle all possible // output encoding forms (e.g. caller requested utf32, but above CC could only // give us utf16/8) // Also, if the caller wanted something other than "None" for the eNormalizeOutput, // then we also have to call TEC for that as well (but I think this only makes // sense if the output is utf16(be) or utf32(be)) // p.s. if this had been a TEC converter, then the eNormalizeOutput flag would // ahready have been reset to None (by this point), since we would have directly // requested that normalized form when we created the converter--see // TecEncConverter.PreConvert) string strOutput = new string(caOut); #if DEBUG byte[] byteArray = Encoding.BigEndianUnicode.GetBytes(caOut); Util.DebugWriteLine(className, Util.getDisplayBytes("characters", byteArray)); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(strOutput); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized strOutput in UTF16BE", baResult)); #endif if ((eFormEngineOut != eOutEncodingForm) || (eNormalizeOutput != NormalizeFlags.None)) { strOutput = EncConverters.UnicodeEncodingFormConvertEx(strOutput, eFormEngineOut, nItems, eOutEncodingForm, eNormalizeOutput, out nItems); } DisplayDebugUCharValues(strOutput, "Returning back to client...", ref bDebugDisplayMode); rciOutput = nItems; return(strOutput); }
protected override void PreConvert ( EncodingForm eInEncodingForm, ref EncodingForm eInFormEngine, EncodingForm eOutEncodingForm, ref EncodingForm eOutFormEngine, ref NormalizeFlags eNormalizeOutput, bool bForward ) { // let the base class do it's thing first base.PreConvert( eInEncodingForm, ref eInFormEngine, eOutEncodingForm, ref eOutFormEngine, ref eNormalizeOutput, bForward); // this converter only deals with 'String' flavors, so if it's // Unicode_to(_from)_Unicode, then we expect UTF-16 and if it's // Legacy_to(_from)_Legacy, then we expect LegacyString if( m_bLegacy ) eInFormEngine = eOutFormEngine = EncodingForm.LegacyString; else eInFormEngine = eOutFormEngine = EncodingForm.UTF16; // the bForward that comes here might be different from the IEncConverter->DirectionForward // (if it came in from a call to ConvertEx), so use *this* value to determine the direction // for the forthcoming conversion (DoConvert). m_bReverseLookup = !bForward; // check to see if the file(s) need to be (re-)loaded at this point. Load(); }