protected unsafe void StringToProperByteStar(string strOutput, byte *lpOutBuffer, ref int rnOutLen) { // if the output is legacy, then we need to shrink it from wide to narrow if (m_bLegacy) { byte[] baOut = EncConverters.GetBytesFromEncoding(CodePageOutput, strOutput, true); if (baOut.Length > rnOutLen) { EncConverters.ThrowError(ErrStatus.OutputBufferFull); } rnOutLen = baOut.Length; ECNormalizeData.ByteArrToByteStar(baOut, lpOutBuffer); } else { int nLen = strOutput.Length * 2; if (nLen > (int)rnOutLen) { EncConverters.ThrowError(ErrStatus.OutputBufferFull); } rnOutLen = nLen; ECNormalizeData.StringToByteStar(strOutput, lpOutBuffer, rnOutLen); } }
static unsafe void DisplayCompilerError(byte *pszName, byte *msg, byte *param, UInt32 line) { byte [] baMsg = ECNormalizeData.ByteStarToByteArr(msg); Encoding enc = Encoding.ASCII; string str = new string(enc.GetChars(baMsg)); if (param != (byte *)0) { str += ": \""; baMsg = ECNormalizeData.ByteStarToByteArr(param); str += new string(enc.GetChars(baMsg)); str += "\""; } if (line != 0) { str += " at line "; str += line.ToString(); } string strCaption = "Compilation feedback from TECkit for the '"; baMsg = ECNormalizeData.ByteStarToByteArr(pszName); strCaption += new string(enc.GetChars(baMsg)); strCaption += "' converter"; if (MessageBox.Show(str, strCaption, MessageBoxButtons.OKCancel, MessageBoxIcon.Exclamation) == DialogResult.Cancel) { EncConverters.ThrowError(ErrStatus.CompilationFailed); } }
protected override unsafe void DoConvert ( byte *lpInBuffer, int nInLen, byte *lpOutBuffer, ref int rnOutLen ) { // we need to put it *back* into a string for the lookup // [aside: I should probably override base.InternalConvertEx so I can avoid having the base // class version turn the input string into a byte* for this call just so we can turn around // and put it *back* into a string for our processing... but I like working with a known // quantity and no other EncConverter does it that way. Besides, I'm afraid I'll break smtg ;-] byte[] baIn = new byte[nInLen]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nInLen, baIn); Encoding enc; if (m_bLegacy) { try { enc = Encoding.GetEncoding(this.CodePageInput); } catch { enc = Encoding.GetEncoding(EncConverters.cnIso8859_1CodePage); } } else { enc = Encoding.Unicode; } char[] caIn = enc.GetChars(baIn); // here's our input string string strInput = new string(caIn); List <string> lstInputTokens, lstInBetweenTokens, lstOutputTokens; SplitAndConvert(strInput, out lstInputTokens, out lstInBetweenTokens, out lstOutputTokens); // when we're finally done with all the replacements possible, build up a new output string of the // results (removing any possible "never used" chars that might have been added in AdjustLists) string strOutput = null; int i; for (i = 0; i < lstOutputTokens.Count; i++) { strOutput += lstInBetweenTokens[i] + lstOutputTokens[i]; } strOutput += lstInBetweenTokens[i]; StringToProperByteStar(strOutput, lpOutBuffer, ref rnOutLen); }
protected unsafe string GetTecAttributeName(int nID, byte *pbaNameBuffer, byte *pbyMapping) { // now ask TECkit for the values UInt16 sID = System.Convert.ToUInt16(nID); UInt32 nNameLength = 0; TECkit_GetMappingName( pbyMapping, m_nMapSize, sID, pbaNameBuffer, 1000, &nNameLength); byte [] baName = new byte [nNameLength]; ECNormalizeData.ByteStarToByteArr(pbaNameBuffer, (int)nNameLength, baName); return(new string(Encoding.ASCII.GetChars(baName))); }
// [DispId(16)] public virtual byte[] ConvertFromUnicode(string sInput) { if ((ConversionType != ConvType.Legacy_to_from_Unicode) && (ConversionType != ConvType.Unicode_to_from_Legacy) && (ConversionType != ConvType.Unicode_to_Legacy) ) { EncConverters.ThrowError(ErrStatus.InvalidConversionType); } bool bForward = !(ConversionType == ConvType.Legacy_to_from_Unicode); // similarly as above, use the normal 'InternalConvert' which is expecting to // return a string, and then convert it to a byte []. string sOutput = InternalConvert(EncodingForm.UTF16, sInput, EncodingForm.LegacyBytes, NormalizeOutput, bForward); return(ECNormalizeData.StringToByteArr(sOutput)); }
// [DispId(15)] public virtual string ConvertToUnicode(byte [] baInput) { if ((ConversionType != ConvType.Legacy_to_from_Unicode) && (ConversionType != ConvType.Unicode_to_from_Legacy) && (ConversionType != ConvType.Legacy_to_Unicode) ) { EncConverters.ThrowError(ErrStatus.InvalidConversionType); } bool bForward = !(ConversionType == ConvType.Unicode_to_from_Legacy); // since 'InternalConvert' is expecting a string, convert the given byte [] // to a string and set the input encoding form as LegacyBytes. // (not as efficent as adding a new InternalConvertToUnicode which takes a // byte [] instead, but a) that would require a lot of changes which I'm // afraid would break something, and b) this is far more maintainable). string sInput = ECNormalizeData.ByteArrToString(baInput); return(InternalConvert(EncodingForm.LegacyBytes, sInput, EncodingForm.UTF16, NormalizeOutput, bForward)); }
protected override unsafe void DoConvert ( byte *lpInBuffer, int nInLen, byte *lpOutBuffer, ref int rnOutLen ) { rnOutLen = 0; if (!String.IsNullOrEmpty(WorkingDir)) { // we need to put it *back* into a string because the StreamWriter that will // ultimately write to the StandardInput uses a string. Use the correct codepg. byte [] baDst = new byte [nInLen]; ECNormalizeData.ByteStarToByteArr(lpInBuffer, nInLen, baDst); Encoding enc; try { enc = Encoding.GetEncoding(this.CodePageInput); } catch { enc = Encoding.GetEncoding(EncConverters.cnIso8859_1CodePage); } string strInput = enc.GetString(baDst); // call the helper that calls the exe string strOutput = DoExeCall(strInput); // if there's a response... if (!String.IsNullOrEmpty(strOutput)) { // ... put it in the output buffer // if the output is legacy, then we need to shrink it from wide to narrow // it'll be legacy either if (the direction is forward and the rhs=eLegacy) // or if (the direction is reverse and the rhs=eLegacy) bool bLegacyOutput = ( ((this.DirectionForward == true) && (EncConverter.NormalizeRhsConversionType(this.ConversionType) == NormConversionType.eLegacy) ) || ((this.DirectionForward == false) && (EncConverter.NormalizeLhsConversionType(this.ConversionType) == NormConversionType.eLegacy) ) ); if (bLegacyOutput) { try { enc = Encoding.GetEncoding(this.CodePageOutput); } catch { enc = Encoding.GetEncoding(EncConverters.cnIso8859_1CodePage); } byte [] baOut = enc.GetBytes(strOutput); ECNormalizeData.ByteArrToByteStar(baOut, lpOutBuffer); rnOutLen = baOut.Length; } else { rnOutLen = strOutput.Length * 2; ECNormalizeData.StringToByteStar(strOutput, lpOutBuffer, rnOutLen); } } } else { EncConverters.ThrowError(ErrStatus.RegistryCorrupt); } }
protected unsafe void CompileMap(string strFilename, ref string strCompiledFilename) { int status = 0; try { FileStream fileMap = new FileStream(strFilename, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); byte [] pTxt = new byte [fileMap.Length]; uint nMapSize = (uint)fileMap.Read(pTxt, 0, (int)fileMap.Length); byte * compiledTable = (byte *)0; UInt32 compiledSize = 0; try { // do this in a try/catch so the user can cancel if there are too many // errors. errFunc dsplyErr = new errFunc(TecEncConverter.DisplayCompilerError); byte [] baName = Encoding.ASCII.GetBytes(Name); fixed(byte *lpTxt = pTxt) fixed(byte *lpName = baName) status = TECkit_Compile( lpTxt, nMapSize, (byte)1, // docompression dsplyErr, lpName, &compiledTable, &compiledSize); } catch { status = (int)ErrStatus.CompilationFailed; } if (status == (int)ErrStatus.NoError) { // put the data from TEC into a managed byte array for the following Write byte [] baOut = new byte [compiledSize]; ECNormalizeData.ByteStarToByteArr(compiledTable, (int)compiledSize, baOut); // save the compiled mapping (but if it fails because it's locked, then // try to save it with a temporary name. FileStream fileTec = null; try { fileTec = File.OpenWrite(strCompiledFilename); } catch (System.IO.IOException) { // temporary filename for temporary CC tables (to check portions of the file at a time) strCompiledFilename = Path.GetTempFileName(); fileTec = File.OpenWrite(strCompiledFilename); } // get it's last created timestamp DoesFileExist(strCompiledFilename, ref m_timeModifiedTec); fileTec.Write(baOut, 0, (int)compiledSize); fileTec.Close(); } } catch { // compiling isn't crucial } if (status != (int)ErrStatus.NoError) { EncConverters.ThrowError(status); } }
protected override unsafe void DoConvert ( byte *lpInBuffer, int nInLen, byte *lpOutBuffer, ref int rnOutLen ) { /* rde1.2.1.0 don't pad with space anymore * rde2.2.0.0 Ahh... now I remember why this was there before: if you use boundary * condition testing in CC (e.g. "prec(ws) 'i' fol(ws)", where 'ws' contains things * like spaces, nl, tabs, punctuation, etc) then those tests will fail on the first * and last character in the stream (which are at a boundary conditions, but can't be * detected by CC). Anyway, so I want to put back in the stream delimiting, but the * reason this was originally taken out was because someone had a CC table which was * eating spaces, so I'll use 'd10' (which never comes in on an Windows system by itself) * to delimit the stream AND only then if it's a spelling fixer cc table (see Initialize) */ #if !rde220 // the delimiter (if used) is actually '\n', but this normally isn't received by CC // without '\r' as well, so it makes a good delimiter in that CC tables aren't likely // to be looking to eat it up (which was the problem we had when we delimited with // a space). const byte byDelim = 10; if (m_bUseDelimiters) { // move the input data down to make room for the initial delimiter ECNormalizeData.MemMove(lpInBuffer + 1, lpInBuffer, nInLen); lpInBuffer[0] = byDelim; lpInBuffer[nInLen + 1] = byDelim; nInLen += 2; } #else bool bLastWasD10 = false; if (lpInBuffer[nInLen - 1] == ' ') { bLastWasSpace = true; } else { lpInBuffer[nInLen++] = (byte)' '; lpInBuffer[nInLen] = 0; } #endif int status = 0; fixed(int *pnOut = &rnOutLen) { status = CCProcessBuffer(m_hTable, lpInBuffer, nInLen, lpOutBuffer, pnOut); } if (status != 0) { TranslateErrStatus(status); } #if !rde220 else if (m_bUseDelimiters) { if (lpOutBuffer[0] == byDelim) { ECNormalizeData.MemMove(lpOutBuffer, lpOutBuffer + 1, --rnOutLen); } if (lpOutBuffer[rnOutLen - 1] == byDelim) { rnOutLen--; } } #else /* * // otherwise strip out that final space we added (sometimes it goes away by itself!!??, * // so check first...) * // also only if the last of the input was *NOT* a space... * else if( !bLastWasSpace && (lpOutBuffer[rnOutLen-1] == ' ') ) * { * rnOutLen--; * } */ #endif }
// This function is the meat of the conversion process. It is really long, which // normally wouldn't be a virtue (especially as an "in-line" function), but in an // effort to save memory fragmentation by using stack memory to buffer the input // and output data, I'm using the alloca memory allocation function. Because of this // it can't be allocated in some subroutine and returned to a calling program (or the // stack will have erased them), so it has to be one big fat long function... // The basic structure is: // // o Check Input Data // o Give the sub-class (via PreConvert) the opportunity to load tables and do // any special preprocessing it needs to ahead of the actual conversion // o Possibly call the TECkit COM interface to convert Unicode flavors that the // engine (for this conversion) might not support (indicated via PreConvert) // o Normalize the input data to a byte array based on it's input EncodingForm // o Allocate (on the stack) a buffer for the output data (min 10000 bytes) // o Call the subclass (via DoConvert) to do the actual conversion. // o Normalize the output data to match the requested output EncodingForm (including // possibly calling the TECkit COM interface). // o Return the resultant BSTR and size of items to the output pointer variables. // protected virtual unsafe string InternalConvertEx ( EncodingForm eInEncodingForm, string sInput, int ciInput, EncodingForm eOutEncodingForm, NormalizeFlags eNormalizeOutput, out int rciOutput, bool bForward ) { if (sInput == null) { EncConverters.ThrowError(ErrStatus.IncompleteChar); } // if the user hasn't specified, then take the default case for the ConversionType: // if L/RHS == eLegacy, then LegacyString // if L/RHS == eUnicode, then UTF16 CheckInitEncForms ( bForward, ref eInEncodingForm, ref eOutEncodingForm ); // allow the converter engine's (and/or its COM wrapper) to do some preprocessing. EncodingForm eFormEngineIn = EncodingForm.Unspecified, eFormEngineOut = EncodingForm.Unspecified; PreConvert ( eInEncodingForm, // [in] form in the BSTR ref eFormEngineIn, // [out] form the conversion engine wants, etc. eOutEncodingForm, ref eFormEngineOut, ref eNormalizeOutput, bForward ); // get enough space for us to normalize the input data (6x ought to be enough) int nBufSize = sInput.Length * 6; byte[] abyInBuffer = new byte[nBufSize]; fixed(byte *lpInBuffer = abyInBuffer) { // use a helper class to normalize the data to the format needed by the engine ECNormalizeData.GetBytes(sInput, ciInput, eInEncodingForm, ((bForward) ? CodePageInput : CodePageOutput), eFormEngineIn, lpInBuffer, ref nBufSize, ref m_bDebugDisplayMode); // get some space for the converter to fill with, but since this is allocated // on the stack, don't muck around; get 10000 bytes for it. int nOutLen = Math.Max(10000, nBufSize * 6); byte[] abyOutBuffer = new byte[nOutLen]; fixed(byte *lpOutBuffer = abyOutBuffer) { lpOutBuffer[0] = lpOutBuffer[1] = lpOutBuffer[2] = lpOutBuffer[3] = 0; // call the wrapper sub-classes' DoConvert to let them do it. DoConvert(lpInBuffer, nBufSize, lpOutBuffer, ref nOutLen); return(ECNormalizeData.GetString(lpOutBuffer, nOutLen, eOutEncodingForm, ((bForward) ? CodePageOutput : CodePageInput), eFormEngineOut, eNormalizeOutput, out rciOutput, ref m_bDebugDisplayMode)); } } }