// TODO: remove method if not needed public void Test(int skip, IDCMapping mapping, IDCFileInfo fileInfo) { if (m_traceSwitch.TraceVerbose) { Debug.WriteLine("DataConverter Test"); Debug.WriteLine("\tDCMapping"); Debug.WriteLine("\t\tBeginMarker =" + mapping.BeginMarker + "."); Debug.WriteLine("\t\tDomain =" + mapping.Domain + "."); Debug.WriteLine("\t\tEndMarker =" + mapping.EndMarker + "."); Debug.WriteLine("\tIDCFileInfo"); Debug.WriteLine("\t\tFileEncoding =" + fileInfo.FileEncoding + "."); // Debug.WriteLine("\t\tFileEncodingSource =" + fileInfo.FileEncodingSource + "."); Debug.WriteLine("\t\tInputFileName =" + fileInfo.InputFileName + "."); Debug.WriteLine("\t\tOutputFileName =" + fileInfo.OutputFileName + "."); } }
/// <summary> /// A Factory method to create the correct type of MarkerSpec object. /// </summary> /// <param name="mapping"></param> /// <returns></returns> public static MarkerSpec CreateMarkerSpec(IDCMapping mapping) { MarkerSpec ms; ms = CreateMarkerSpec(mapping.BeginMarker, mapping.MarkerEncoding, mapping.DataEncoding, mapping.IsInline, mapping.NewBeginMarker, mapping.EndMarker, mapping.NewEndMarker); return ms; }
public void ConvertNew(IDCMapping[] mappings, IDCFileInfo[] fileInfos) { // Test(1, mappings[0], fileInfos[0]); m_tokenizer = new Tokenizer(); foreach (IDCMapping mapping in mappings) { if (mapping.MarkerEncoding.Length <= 0) mapping.MarkerEncoding = DefaultMarkerMap; if (mapping.DataEncoding.Length <= 0) mapping.DataEncoding = DefaultDataMap; MarkerSpec ms = MarkerSpec.CreateMarkerSpec(mapping); m_tokenizer.Tri.Add(ms); } Token token; string output; FileStream stream; StreamReader streamReader; Stream outputStream; bool fBOM; StreamWriter outputWriter = null; // Do for each input file in fileInfo System.Text.Encoding encoding; foreach (IDCFileInfo fileInfo in fileInfos) { stream = new FileStream(fileInfo.InputFileName, FileMode.Open, FileAccess.Read); bool fAlreadyUnicode = true; switch (fileInfo.FileEncoding) { case DCFileEncoding.DC_FE_BYTES: case DCFileEncoding.DC_FE_Unknown: encoding = ReversibleEncoding; fAlreadyUnicode = false; break; case DCFileEncoding.DC_FE_UTF16BE: encoding = System.Text.Encoding.BigEndianUnicode; break; case DCFileEncoding.DC_FE_UTF16LE: encoding = System.Text.Encoding.Unicode; break; case DCFileEncoding.DC_FE_UTF8: encoding = System.Text.Encoding.UTF8; break; default: Debug.Fail("Requested input file encoding not implemented."); encoding = ReversibleEncoding; fAlreadyUnicode = false; break; } streamReader = new StreamReader(stream, encoding); m_tokenizer.Input = streamReader; outputStream = new FileStream(fileInfo.OutputFileName, FileMode.Create, FileAccess.Write); fBOM = fileInfo.HasBOM; if (fBOM) { // Use StreamWriter if BOM needed. outputWriter = new StreamWriter(outputStream, System.Text.Encoding.UTF8); } do { // Enhance (BobbyD): It seems that all the StreamWriters output a BOM, // even if we don't want one. One solution is below, that is, write the data // using Streams instead of StreamWriters. Except this is sort of messy, // a cleaner solution may be to subclass Encoding.UTF8, override the method // GetPreamble() to return a zero length byte array, instead of the UTF8 BOM, // and then the nice and clean StreamWriter can be used. More information on // this is under the help topic of Encoding.GetPreamble Method. token = m_tokenizer.Next(); output = token.Output(fAlreadyUnicode); byte[] bytes = System.Text.Encoding.UTF8.GetBytes(output); if (token is NewlineToken) { if (fBOM) outputWriter.WriteLine(output); else { outputStream.Write(bytes, 0, bytes.Length); outputStream.WriteByte((byte)'\r'); outputStream.WriteByte((byte)'\n'); } } else { if (fBOM) outputWriter.Write(output); else outputStream.Write(bytes, 0, bytes.Length); } } while (!(token is EndOfFileToken)); m_tokenizer.Input.Close(); // close the input stream if (fBOM) outputWriter.Close(); else outputStream.Close(); } }