static unsafe void DisplayCompilerError(byte *pszName, byte *msg, byte *param, UInt32 line) { byte [] baMsg = ECNormalizeData.ByteStarToByteArr(msg); Encoding enc = Encoding.ASCII; string str = new string(enc.GetChars(baMsg)); if (param != (byte *)0) { str += ": \""; baMsg = ECNormalizeData.ByteStarToByteArr(param); str += new string(enc.GetChars(baMsg)); str += "\""; } if (line != 0) { str += " at line "; str += line.ToString(); } string strCaption = "Compilation feedback from TECkit for the '"; baMsg = ECNormalizeData.ByteStarToByteArr(pszName); strCaption += new string(enc.GetChars(baMsg)); strCaption += "' converter"; if (MessageBox.Show(str, strCaption, MessageBoxButtons.OKCancel, MessageBoxIcon.Exclamation) == DialogResult.Cancel) { EncConverters.ThrowError(ErrStatus.CompilationFailed); } }
// [DispId(16)] public virtual byte[] ConvertFromUnicode(string sInput) { if ((ConversionType != ConvType.Legacy_to_from_Unicode) && (ConversionType != ConvType.Unicode_to_from_Legacy) && (ConversionType != ConvType.Unicode_to_Legacy) ) { EncConverters.ThrowError(ErrStatus.InvalidConversionType); } bool bForward = !(ConversionType == ConvType.Legacy_to_from_Unicode); // similarly as above, use the normal 'InternalConvert' which is expecting to // return a string, and then convert it to a byte []. #if __MonoCS__ // EXCEPT THAT C++ BSTR DOES NOT MAP ONTO C# string! BSTR's can actually // be byte arrays internally without any problem. C# insists that a string // always contains valid data, eg, surrogate pairs must be matched when you // construct one from a character array. This obviously cannot be guaranteed // in legacy encoding data! int ciOutput; byte[] retval = InternalConvertEx(EncodingForm.UTF16, sInput, EncodingForm.LegacyBytes, NormalizeOutput, out ciOutput, bForward); return(retval); #else string sOutput = InternalConvert(EncodingForm.UTF16, sInput, EncodingForm.LegacyBytes, NormalizeOutput, bForward); return(ECNormalizeData.StringToByteArr(sOutput, false)); #endif }
protected unsafe void StringToProperByteStar(string strOutput, byte *lpOutBuffer, ref int rnOutLen) { // if the output is legacy, then we need to shrink it from wide to narrow if (m_bLegacy) { byte[] baOut = EncConverters.GetBytesFromEncoding(CodePageOutput, strOutput, true); if (baOut.Length > rnOutLen) { EncConverters.ThrowError(ErrStatus.OutputBufferFull); } rnOutLen = baOut.Length; ECNormalizeData.ByteArrToByteStar(baOut, lpOutBuffer); } else { int nLen = strOutput.Length * 2; if (nLen > (int)rnOutLen) { EncConverters.ThrowError(ErrStatus.OutputBufferFull); } rnOutLen = nLen; rnOutLen = ECNormalizeData.StringToByteStar(strOutput, lpOutBuffer, rnOutLen, false); } }
protected override unsafe void DoConvert ( byte *lpInBuffer, int nInLen, byte *lpOutBuffer, ref int rnOutLen ) { int status = 0; fixed(int *pnOut = &rnOutLen) { #if DEBUG if (Util.IsUnix) { byte[] baIn = new byte[nInLen]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nInLen, baIn); Util.DebugWriteLine(this, Util.getDisplayBytes("Sending bytes to CppDoConvert", baIn)); } #endif status = CppDoConvert(lpInBuffer, nInLen, lpOutBuffer, pnOut); } if (status != 0) { EncConverters.ThrowError(status); } }
protected override unsafe void DoConvert ( byte *lpInBuffer, int nInLen, byte *lpOutBuffer, ref int rnOutLen ) { if (Util.IsUnix) { byte[] baIn = new byte[nInLen]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nInLen, baIn); Util.DebugWriteLine(this, String.Format("Starting with {0} bytes.", baIn.Length)); byte[] baOut; if (m_bToWide) { // Perform the conversion from one encoding to the other. Encoding encFrom = Encoding.GetEncoding(m_nCodePage); Encoding encTo = Encoding.Unicode; baOut = Encoding.Convert(encFrom, encTo, baIn); } else { Encoding encFrom = Encoding.Unicode; Encoding encTo = Encoding.GetEncoding(m_nCodePage); baOut = Encoding.Convert(encFrom, encTo, baIn); } Util.DebugWriteLine(this, String.Format("Converted to {0} bytes.", baOut.Length)); if (baOut.Length > 0) { rnOutLen = Marshal.SizeOf(baOut[0]) * baOut.Length; } else { rnOutLen = 0; } Marshal.Copy(baOut, 0, (IntPtr)lpOutBuffer, baOut.Length); Marshal.WriteByte((IntPtr)lpOutBuffer, rnOutLen, 0); // nul terminate } else { if (m_bToWide) { rnOutLen = MultiByteToWideChar(m_nCodePage, 0, lpInBuffer, nInLen, (char *)lpOutBuffer, rnOutLen / 2); rnOutLen *= 2; // sizeof(WCHAR); // size in bytes } else { rnOutLen = WideCharToMultiByte(m_nCodePage, 0, (char *)lpInBuffer, nInLen / 2, lpOutBuffer, rnOutLen, 0, 0); } } }
protected override unsafe void DoConvert ( byte *lpInBuffer, int nInLen, byte *lpOutBuffer, ref int rnOutLen ) { rnOutLen = 0; // we need to put it *back* into a string because the StreamWriter that will // ultimately write to the StandardInput uses a string. For now, the only user // is Perl, which only supports Unicode to Unicode and so the data coming in // will be UTF-16. So to put it back into a string, we just need to use this: var baDst = new byte[nInLen]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nInLen, baDst); var enc = Encoding.Unicode; var strInput = enc.GetString(baDst); // call the helper that calls the exe var strOutput = DoExeCall(strInput); if (Util.IsUnix) { Util.DebugWriteLine(this, "Got result from system call: " + strOutput); byte[] baOut2 = Encoding.Unicode.GetBytes(strOutput); // easier to read Util.DebugWriteLine(this, Util.getDisplayBytes("Output UTF16LE", baOut2)); string filepath = Path.Combine(Path.GetTempPath(), "returning.txt"); Util.DebugWriteLine(this, "See " + filepath); TextWriter tw = new StreamWriter(filepath); tw.WriteLine("input: '" + strInput + "'"); tw.WriteLine("output: '" + strOutput + "'"); tw.Close(); } // if there's a response... if (String.IsNullOrEmpty(strOutput)) { return; } // put it in the output buffer rnOutLen = strOutput.Length * 2; rnOutLen = ECNormalizeData.StringToByteStar(strOutput, lpOutBuffer, rnOutLen, false); }
protected unsafe string GetTecAttributeName(int nID, byte *pbaNameBuffer, byte *pbyMapping) { // now ask TECkit for the values UInt16 sID = System.Convert.ToUInt16(nID); UInt32 nNameLength = 0; TECkit_GetMappingName( pbyMapping, m_nMapSize, sID, pbaNameBuffer, 1000, &nNameLength); byte [] baName = new byte [nNameLength]; ECNormalizeData.ByteStarToByteArr(pbaNameBuffer, (int)nNameLength, baName); return(new string(Encoding.ASCII.GetChars(baName))); }
// This function is the meat of the conversion process. It is really long, which // normally wouldn't be a virtue (especially as an "in-line" function), but in an // effort to save memory fragmentation by using stack memory to buffer the input // and output data, I'm using the alloca memory allocation function. Because of this // it can't be allocated in some subroutine and returned to a calling program (or the // stack will have erased them), so it has to be one big fat long function... // The basic structure is: // // o Check Input Data // o Give the sub-class (via PreConvert) the opportunity to load tables and do // any special preprocessing it needs to ahead of the actual conversion // o Possibly call the TECkit COM interface to convert Unicode flavors that the // engine (for this conversion) might not support (indicated via PreConvert) // o Normalize the input data to a byte array based on it's input EncodingForm // o Allocate (on the stack) a buffer for the output data (min 10000 bytes) // o Call the subclass (via DoConvert) to do the actual conversion. // o Normalize the output data to match the requested output EncodingForm (including // possibly calling the TECkit COM interface). // o Return the resultant BSTR and size of items to the output pointer variables. // protected virtual unsafe string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { Util.DebugWriteLine(className, "BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (sInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } if (sInput.Length == 0) { rciOutput = 0; return(""); } #if DEBUG && __MonoCS__ // for debugging only BEGIN //byte[] baIn = System.Text.Encoding.UTF8.GetBytes(sInput); // works byte[] baIn = System.Text.Encoding.BigEndianUnicode.GetBytes(sInput); // easier to read Util.DebugWriteLine(className, Util.getDisplayBytes("Input BigEndianUnicode", baIn)); baIn = System.Text.Encoding.Unicode.GetBytes(sInput); Util.DebugWriteLine(className, Util.getDisplayBytes("Input Unicode", baIn)); int nInLen = sInput.Length; byte [] baIn2 = new byte[nInLen]; for (int i = 0; i < nInLen; i++) { baIn2[i] = (byte)(sInput[i] & 0xFF); } Util.DebugWriteLine(className, Util.getDisplayBytes("Input Narrowized", baIn2)); /* * System.Text.Encoding encFrom = System.Text.Encoding.GetEncoding(12000); * System.Text.Encoding encTo = System.Text.Encoding.UTF8; * * // Perform the conversion from one encoding to the other. * Util.DebugWriteLine(className, "Starting with " + baIn.Length.ToString() + " bytes."); * byte[] baOut2 = System.Text.Encoding.Convert(encFrom, encTo, baIn); * Util.DebugWriteLine(className, "Converted to " + baOut2.Length.ToString() + " bytes."); * string resultString = System.Text.Encoding.Default.GetString(baOut2, 0, baOut2.Length); * Util.DebugWriteLine(className, "Test output '" + resultString + "'"); */ // for debugging only END #endif // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms ( bForward, ref eInEncodingForm, ref eOutEncodingForm ); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert ( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward ); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed(byte *lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine Util.DebugWriteLine(className, "Calling GetBytes"); ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); #if DEBUG && __MonoCS__ byte[] baOut = new byte[nBufSize]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nBufSize, baOut); Util.DebugWriteLine(className, Util.getDisplayBytes("Input Bytes", baOut)); #endif // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); #if DEBUG && __MonoCS__ Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); byte[] baOut2 = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut2); Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut2)); Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut2) + "'"); #endif string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode); #if DEBUG && __MonoCS__ Util.DebugWriteLine(className, "normalized result '" + result + "'"); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult)); baResult = System.Text.Encoding.Unicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult)); baResult = System.Text.Encoding.UTF8.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult)); Util.DebugWriteLine(className, "Returning."); #endif return(result); } } }
/// <summary> /// If we're returning legacy data as a byte array, we need to return it as a byte array. /// </summary> /// <returns> protected virtual unsafe byte[] InternalConvertEx(EncodingForm eInEncodingForm, string sInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward) { Util.DebugWriteLine(className, "(output bytes) BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (sInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } Util.DebugWriteLine(className, "sInput.Length() is " + sInput.Length.ToString() + "."); if (sInput.Length == 0) { // this section added 11/10/2011 by Jim K rciOutput = 0; return(new byte[0]); } Util.DebugWriteLine(className, "sInput is " + sInput + "."); // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed(byte *lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine Util.DebugWriteLine(className, "Calling GetBytes"); ECNormalizeData.GetBytes(sInput, sInput.Length, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); byte[] baOut = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); #if DEBUG Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut)); Util.DebugWriteLine(className, "Returning."); #endif rciOutput = nOutLen; return(baOut); } } }
/// legacy data as a byte array as input, we need to treat it as a byte array. /// </summary> protected virtual unsafe string InternalConvertEx(EncodingForm eInEncodingForm, byte[] baInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward) { Util.DebugWriteLine(className, "(input bytes) BEGIN"); Util.DebugWriteLine(className, "eEncFormIn " + eInEncodingForm.ToString() + ", " + "eEncFormOut " + eOutEncodingForm.ToString()); if (baInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } if (baInput.Length == 0) { rciOutput = 0; return(""); } // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms(bForward, ref eInEncodingForm, ref eOutEncodingForm); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward); int nBufSize = baInput.Length; fixed(byte *lpInBuffer = baInput) { int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. Util.DebugWriteLine(className, "Calling DoConvert"); DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); Util.DebugWriteLine(className, "Output length " + nOutLen.ToString()); byte[] baOut = new byte[nOutLen]; ECNormalizeData.ByteStarToByteArr(lpOutBuffer, nOutLen, baOut); #if DEBUG Util.DebugWriteLine(className, Util.getDisplayBytes("Output In Bytes", baOut)); Util.DebugWriteLine(className, "Got val '" + System.Text.Encoding.Unicode.GetString(baOut) + "'"); #endif string result = ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode); #if DEBUG Util.DebugWriteLine(className, "normalized result '" + result + "'"); byte[] baResult = System.Text.Encoding.BigEndianUnicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16BE", baResult)); baResult = System.Text.Encoding.Unicode.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output in UTF16LE", baResult)); baResult = System.Text.Encoding.UTF8.GetBytes(result); Util.DebugWriteLine(className, Util.getDisplayBytes("Normalized Output In UTF8", baResult)); Util.DebugWriteLine(className, "Returning."); #endif return(result); } } }
protected override unsafe void DoConvert ( byte *lpInBuffer, int nInLen, byte *lpOutBuffer, ref int rnOutLen ) { Util.DebugWriteLine(this, "BEGIN"); // we need to put it *back* into a string for the lookup // [aside: I should probably override base.InternalConvertEx so I can avoid having the base // class version turn the input string into a byte* for this call just so we can turn around // and put it *back* into a string for our processing... but I like working with a known // quantity and no other EncConverter does it that way. Besides, I'm afraid I'll break smtg ;-] byte[] baIn = new byte[nInLen]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nInLen, baIn); Encoding enc; if (m_bLegacy) { try { enc = Encoding.GetEncoding(this.CodePageInput); } catch { enc = Encoding.GetEncoding(EncConverters.cnIso8859_1CodePage); } } else { enc = Encoding.Unicode; } char[] caIn = enc.GetChars(baIn); // here's our input string string strInput = new string(caIn); Util.DebugWriteLine(this, "Twice-converted input string: '" + strInput + "'"); List <string> lstInputTokens, lstInputInBetweenTokens, lstOutputTokens, lstOutputInBetweenTokens; SplitAndConvertEx(strInput, out lstInputTokens, out lstInputInBetweenTokens, out lstOutputTokens, out lstOutputInBetweenTokens); // when we're finally done with all the replacements possible, build up a new output string of the // results (removing any possible "never used" chars that might have been added in AdjustLists) string strOutput = null; int i; for (i = 0; i < lstOutputTokens.Count; i++) { strOutput += lstOutputInBetweenTokens[i] + lstOutputTokens[i]; } strOutput += lstOutputInBetweenTokens[i]; Util.DebugWriteLine(this, "Output string: '" + strOutput + "'"); StringToProperByteStar(strOutput, lpOutBuffer, ref rnOutLen); Util.DebugWriteLine(this, "Result len " + rnOutLen.ToString()); Util.DebugWriteLine(this, "END"); }
protected unsafe void CompileMap(string strFilename, ref string strCompiledFilename) { int status = 0; try { FileStream fileMap = new FileStream(strFilename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); byte [] pTxt = new byte [fileMap.Length]; uint nMapSize = (uint)fileMap.Read(pTxt, 0, (int)fileMap.Length); byte * compiledTable = (byte *)0; UInt32 compiledSize = 0; try { // do this in a try/catch so the user can cancel if there are too many // errors. errFunc dsplyErr = new errFunc(TecEncConverter.DisplayCompilerError); byte [] baName = Encoding.ASCII.GetBytes(Name); fixed(byte *lpTxt = pTxt) fixed(byte *lpName = baName) status = TECkit_Compile( lpTxt, nMapSize, (byte)1, // docompression dsplyErr, lpName, &compiledTable, &compiledSize); } catch { status = (int)ErrStatus.CompilationFailed; } if (status == (int)ErrStatus.NoError) { // put the data from TEC into a managed byte array for the following Write byte [] baOut = new byte [compiledSize]; ECNormalizeData.ByteStarToByteArr(compiledTable, (int)compiledSize, baOut); // save the compiled mapping (but if it fails because it's locked, then // try to save it with a temporary name. FileStream fileTec = null; try { fileTec = File.OpenWrite(strCompiledFilename); } catch (System.IO.IOException) { // temporary filename for temporary CC tables (to check portions of the file at a time) strCompiledFilename = Path.GetTempFileName(); fileTec = File.OpenWrite(strCompiledFilename); } // get it's last created timestamp DoesFileExist(strCompiledFilename, ref m_timeModifiedTec); fileTec.Write(baOut, 0, (int)compiledSize); fileTec.Close(); } } catch { // compiling isn't crucial } if (status != (int)ErrStatus.NoError) { EncConverters.ThrowError(status); } }
protected override unsafe void DoConvert(byte *lpInBuffer, int nInLen, byte *lpOutBuffer, ref int rnOutLen) { // we need to put it *back* into a string for the lookup // [aside: I should probably override base.InternalConvertEx so I can avoid having the base // class version turn the input string into a byte* for this call just so we can turn around // and put it *back* into a string for our processing... but I like working with a known // quantity and no other EncConverter does it that way. Besides, I'm afraid I'll break smtg ;-] byte[] baIn = new byte[nInLen]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nInLen, baIn); string strOutput; Encoding enc; bool bInputLegacy = ((_bForward && (NormalizeLhsConversionType(ConversionType) == NormConversionType.eLegacy)) || (!_bForward && (NormalizeRhsConversionType(ConversionType) == NormConversionType.eLegacy))); if (bInputLegacy) { try { enc = Encoding.GetEncoding(CodePageInput); } catch { enc = Encoding.GetEncoding(EncConverters.cnIso8859_1CodePage); } } else { enc = Encoding.Unicode; } char[] caIn = enc.GetChars(baIn); // here's our input string string strInput = new string(caIn); if (_bForward) { _elemInput.InnerText = strInput; // TODO: catch errors? _docHtml.InvokeScript(ConvertFunctionName); strOutput = _elemOutput.InnerText; } else { _elemOutput.InnerText = strInput; // TODO: catch errors? _docHtml.InvokeScript(ConvertReverseFunctionName); strOutput = _elemInput.InnerText; } if (!String.IsNullOrEmpty(strOutput)) { StringToProperByteStar(strOutput, lpOutBuffer, ref rnOutLen); } }
protected override unsafe void DoConvert ( byte *lpInBuffer, int nInLen, byte *lpOutBuffer, ref int rnOutLen ) { /* rde1.2.1.0 don't pad with space anymore * rde2.2.0.0 Ahh... now I remember why this was there before: if you use boundary * condition testing in CC (e.g. "prec(ws) 'i' fol(ws)", where 'ws' contains things * like spaces, nl, tabs, punctuation, etc) then those tests will fail on the first * and last character in the stream (which are at a boundary conditions, but can't be * detected by CC). Anyway, so I want to put back in the stream delimiting, but the * reason this was originally taken out was because someone had a CC table which was * eating spaces, so I'll use 'd10' (which never comes in on an Windows system by itself) * to delimit the stream AND only then if it's a spelling fixer cc table (see Initialize) */ #if !rde220 // the delimiter (if used) is actually '\n', but this normally isn't received by CC // without '\r' as well, so it makes a good delimiter in that CC tables aren't likely // to be looking to eat it up (which was the problem we had when we delimited with // a space). const byte byDelim = 10; if (m_bUseDelimiters) { // move the input data down to make room for the initial delimiter ECNormalizeData.MemMove(lpInBuffer + 1, lpInBuffer, nInLen); lpInBuffer[0] = byDelim; lpInBuffer[nInLen + 1] = byDelim; nInLen += 2; } #else bool bLastWasD10 = false; if (lpInBuffer[nInLen - 1] == ' ') { bLastWasSpace = true; } else { lpInBuffer[nInLen++] = (byte)' '; lpInBuffer[nInLen] = 0; } #endif int status = 0; fixed(int *pnOut = &rnOutLen) { status = CCProcessBuffer(m_hTable, lpInBuffer, nInLen, lpOutBuffer, pnOut); } if (status != 0) { TranslateErrStatus(status); } #if !rde220 else if (m_bUseDelimiters) { if (lpOutBuffer[0] == byDelim) { ECNormalizeData.MemMove(lpOutBuffer, lpOutBuffer + 1, --rnOutLen); } if (lpOutBuffer[rnOutLen - 1] == byDelim) { rnOutLen--; } } #else // otherwise strip out that final space we added (sometimes it goes away by itself!!??, // so check first...) // also only if the last of the input was *NOT* a space... else if (!bLastWasSpace && (lpOutBuffer[rnOutLen - 1] == ' ')) { rnOutLen--; } #endif }
protected override unsafe void DoConvert(byte *lpInBuffer, int nInLen, byte *lpOutBuffer, ref int rnOutLen) { // we need to put it *back* into a string for the lookup // [aside: I should probably override base.InternalConvertEx so I can avoid having the base // class version turn the input string into a byte* for this call just so we can turn around // and put it *back* into a string for our processing... but I like working with a known // quantity and no other EncConverter does it that way. Besides, I'm afraid I'll break smtg ;-] var baIn = new byte[nInLen]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nInLen, baIn); var caIn = Encoding.Unicode.GetChars(baIn); // here's our input string var strInput = new string(caIn); string strOutput = null; if (_bForward) { var bySpace = strInput.Split(_achSpace, StringSplitOptions.RemoveEmptyEntries); _breakIterator.SetText(strInput); var words = _breakIterator.Enumerate().ToList(); if (bySpace.Length == words.Count) { // it didn't do anything! // if it is mandarin, this is probably expected and we can do this if (_regexForMandarin.IsMatch(strInput)) { strOutput = bySpace .SelectMany(word => word) .Aggregate <char, string>(null, (current, ch) => current + (ch + ConverterIdentifier)); } else { strOutput = strInput; } } else { int nNumWords = words.Count - 1; for (var i = 0; i < nNumWords; i++) { var word = words[i]; if (!String.IsNullOrEmpty(word) && (word != ConverterIdentifier)) { strOutput += words[i] + ConverterIdentifier; } } strOutput += words.Last(); } } else { strOutput = strInput.Replace(ConverterIdentifier, null); } if (String.IsNullOrEmpty(strOutput)) { return; } var nLen = strOutput.Length * 2; if (nLen > rnOutLen) { EncConverters.ThrowError(ErrStatus.OutputBufferFull); } rnOutLen = nLen; ECNormalizeData.StringToByteStar(strOutput, lpOutBuffer, rnOutLen, false); }