Esempio n. 1
0
        public Token Next()
        {
            Token returnToken;

            if (m_startOfFile)
            {
                m_startOfFile = false;
                // Since all field tokens need a token on the stack (they do a pop) before they push themselves
                // on the stack, this is needed to put something on the stack for the first field token.
                returnToken = new StartOfFileToken();
                returnToken.Initialize(this);

                return returnToken;
            }

            if (m_column == m_line.Length && m_fGetNextLine)
            {
                m_fGetNextLine = false;

                m_line = m_input.ReadLine();
                if (m_line == null)
                {
                    returnToken = new EndOfFileToken(m_lineNo, m_column);
                    returnToken.Initialize(this);
                    return returnToken;
                }

                m_lineNo++;
                m_column = 0;
            }

            if (m_column == m_line.Length)
            {
                // Finish processing this line by returning a NewlineToken
                m_fGetNextLine = true;
                returnToken = new NewlineToken(m_lineNo, m_column);
                returnToken.Initialize(this);
                return returnToken;
            }

            // m_column is the column in the line to start searching
            int nextMarkerStart; // column in line where the next marker starts
            int nextColumn; // column in line after end of the next marker
            MarkerSpec target;

            nextMarkerStart = m_tri.Search(m_line, m_column, out nextColumn, out target);
            if (nextMarkerStart == -1)
            {
                // no more markers on m_line after m_column
                nextMarkerStart = m_line.Length;
            }

            if (nextMarkerStart > m_column)
            {
                // there was a non-zero distance between the end of the last found marker
                // and the start of the current marker, therefore there is data on the line,
                // and needs to be returned in a data token

                // Review (BobbyD): Possible enhancement: The leading whitespace after a field marker
                // could be returned in a separate WhitespaceToken, separate from the DataToken.
                // This would allow the whitespace to be converted using the MarkerMap from the field marker
                // (which is what the Waxhaw TE team would like to see), rather than using the DataMap.
                // In addition, the whitespace could be normalized to one space if needed.
                string data = m_line.Substring(m_column, nextMarkerStart-m_column);
                returnToken = new DataToken(m_lineNo, m_column, data);
                m_column = nextMarkerStart;
            }
            else
            {
                // since there was no data to return, return some type of marker token
                returnToken = ((MarkerSpec)target).MakeToken(m_lineNo, m_column);
                m_column = nextColumn;
            }

            returnToken.Initialize(this);
            return returnToken;
        }
        public void TestDontConvertUnicodeToken()
        {
            Token token = new DataToken(0, 0, @"Hello, World!");

            Tokenizer tokenizer = new Tokenizer();

            // Setting token.Tokenizer and token.Map simulates Initialize()
            // so a full test environment does not need to be created.
            token.Tokenizer = tokenizer;

            // Set a non-existent converter so we can check to make sure it does not get called
            token.Map = "Garbanzo";
            Assert.AreEqual(@"Hello, World!", token.Output(true));
        }
        public void TestTokenConvert()
        {
            // loads converters from on disk XML encoding repository file
            EncConverters converters = new EncConverters();

            // location of TECkit map files
            string mapDir = Info.TestFileDir;

            // writes three converters to XML encoding repository file on disk.
            converters.Add("ISO-8859-1<>UNICODE", mapDir + @"iso-8859-1.map",
                ConvType.Legacy_to_from_Unicode, "ISO-8859-1", "UNICODE", ProcessTypeFlags.UnicodeEncodingConversion);
            converters.Add("ASCII<>MIXED CASE UNICODE", mapDir + @"mixedcase.map",
                ConvType.Legacy_to_from_Unicode, "ISO-8859-1", "UNICODE", ProcessTypeFlags.UnicodeEncodingConversion);
            converters.Add("ASCII>UPPER CASE UNICODE", mapDir + @"uppercase.map",
                ConvType.Legacy_to_Unicode, "ISO-8859-1", "UNICODE", ProcessTypeFlags.UnicodeEncodingConversion);

            Token token = new DataToken(0, 0, @"Hello, World!");

            Tokenizer tokenizer = new Tokenizer();

            // Setting token.Tokenizer and token.Map simulates Initialize()
            // so a full test environment does not need to be created.
            token.Tokenizer = tokenizer;

            string rawString;

            // an empty string for the map name indicates a default Unicode conversion should be used
            token.Map = "";
            rawString = token.RawOutput();
            Assert.AreEqual(@"Hello, World!", token.ConvertToUnicode(rawString));

            token.Map = "ISO-8859-1<>UNICODE";
            rawString = token.RawOutput();
            Assert.AreEqual(@"Hello, World!", token.ConvertToUnicode(rawString));

            token.Map = "ASCII<>MIXED CASE UNICODE";
            rawString = token.RawOutput();
            Assert.AreEqual(@"hELLO,~~~wORLD!", token.ConvertToUnicode(rawString));

            token.Map = "ASCII>UPPER CASE UNICODE";
            rawString = token.RawOutput();
            Assert.AreEqual(@"HELLO,~~WORLD!", token.ConvertToUnicode(rawString));
        }
 public void TestDataTokenModify()
 {
     Token token = new DataToken(0, 0, @"See file C:\so.txt");
     token.Map = "";
     Assert.AreEqual(@"See file C:\\so.txt", token.Output(true));
 }