// TODO: remove method if not needed
 public void Test(int skip, IDCMapping mapping, IDCFileInfo fileInfo)
 {
     if (m_traceSwitch.TraceVerbose)
     {
         Debug.WriteLine("DataConverter Test");
         Debug.WriteLine("\tDCMapping");
         Debug.WriteLine("\t\tBeginMarker =" + mapping.BeginMarker + ".");
         Debug.WriteLine("\t\tDomain      =" + mapping.Domain      + ".");
         Debug.WriteLine("\t\tEndMarker   =" + mapping.EndMarker   + ".");
         Debug.WriteLine("\tIDCFileInfo");
         Debug.WriteLine("\t\tFileEncoding       =" + fileInfo.FileEncoding       + ".");
     //				Debug.WriteLine("\t\tFileEncodingSource =" + fileInfo.FileEncodingSource + ".");
         Debug.WriteLine("\t\tInputFileName      =" + fileInfo.InputFileName      + ".");
         Debug.WriteLine("\t\tOutputFileName     =" + fileInfo.OutputFileName     + ".");
     }
 }
Example #2
0
        /// <summary>
        /// A Factory method to create the correct type of MarkerSpec object.
        /// </summary>
        /// <param name="mapping"></param>
        /// <returns></returns>
        public static MarkerSpec CreateMarkerSpec(IDCMapping mapping)
        {
            MarkerSpec ms;

            ms = CreateMarkerSpec(mapping.BeginMarker, mapping.MarkerEncoding, mapping.DataEncoding,
                mapping.IsInline, mapping.NewBeginMarker, mapping.EndMarker, mapping.NewEndMarker);

            return ms;
        }
        public void ConvertNew(IDCMapping[] mappings, IDCFileInfo[] fileInfos)
        {
            //			Test(1, mappings[0], fileInfos[0]);

            m_tokenizer = new Tokenizer();

            foreach (IDCMapping mapping in mappings)
            {
                if (mapping.MarkerEncoding.Length <= 0)
                    mapping.MarkerEncoding = DefaultMarkerMap;
                if (mapping.DataEncoding.Length <= 0)
                    mapping.DataEncoding = DefaultDataMap;
                MarkerSpec ms = MarkerSpec.CreateMarkerSpec(mapping);
                m_tokenizer.Tri.Add(ms);
            }

            Token token;
            string output;

            FileStream stream;
            StreamReader streamReader;
            Stream outputStream;
            bool fBOM;
            StreamWriter outputWriter = null;

            // Do for each input file in fileInfo
            System.Text.Encoding encoding;
            foreach (IDCFileInfo fileInfo in fileInfos)
            {
                stream = new FileStream(fileInfo.InputFileName,
                    FileMode.Open, FileAccess.Read);
                bool fAlreadyUnicode = true;
                switch (fileInfo.FileEncoding)
                {
                    case DCFileEncoding.DC_FE_BYTES:
                    case DCFileEncoding.DC_FE_Unknown:
                        encoding = ReversibleEncoding;
                        fAlreadyUnicode = false;
                        break;
                    case DCFileEncoding.DC_FE_UTF16BE:
                        encoding = System.Text.Encoding.BigEndianUnicode;
                        break;
                    case DCFileEncoding.DC_FE_UTF16LE:
                        encoding = System.Text.Encoding.Unicode;
                        break;
                    case DCFileEncoding.DC_FE_UTF8:
                        encoding = System.Text.Encoding.UTF8;
                        break;
                    default:
                        Debug.Fail("Requested input file encoding not implemented.");
                        encoding = ReversibleEncoding;
                        fAlreadyUnicode = false;
                        break;
                }
                streamReader = new StreamReader(stream, encoding);
                m_tokenizer.Input = streamReader;

                outputStream = new FileStream(fileInfo.OutputFileName, FileMode.Create,
                    FileAccess.Write);

                fBOM = fileInfo.HasBOM;
                if (fBOM)
                {
                    // Use StreamWriter if BOM needed.
                    outputWriter = new StreamWriter(outputStream, System.Text.Encoding.UTF8);
                }

                do
                {
                    // Enhance (BobbyD): It seems that all the StreamWriters output a BOM,
                    // even if we don't want one. One solution is below, that is, write the data
                    // using Streams instead of StreamWriters. Except this is sort of messy,
                    // a cleaner solution may be to subclass Encoding.UTF8, override the method
                    // GetPreamble() to return a zero length byte array, instead of the UTF8 BOM,
                    // and then the nice and clean StreamWriter can be used. More information on
                    // this is under the help topic of Encoding.GetPreamble Method.
                    token = m_tokenizer.Next();
                    output = token.Output(fAlreadyUnicode);
                    byte[] bytes = System.Text.Encoding.UTF8.GetBytes(output);
                    if (token is NewlineToken)
                    {
                        if (fBOM)
                            outputWriter.WriteLine(output);
                        else
                        {
                            outputStream.Write(bytes, 0, bytes.Length);
                            outputStream.WriteByte((byte)'\r');
                            outputStream.WriteByte((byte)'\n');
                        }
                    }
                    else
                    {
                        if (fBOM)
                            outputWriter.Write(output);
                        else
                            outputStream.Write(bytes, 0, bytes.Length);
                    }
                } while (!(token is EndOfFileToken));

                m_tokenizer.Input.Close();			// close the input stream

                if (fBOM)
                    outputWriter.Close();
                else
                    outputStream.Close();
            }
        }