public Token Next() { Token returnToken; if (m_startOfFile) { m_startOfFile = false; // Since all field tokens need a token on the stack (they do a pop) before they push themselves // on the stack, this is needed to put something on the stack for the first field token. returnToken = new StartOfFileToken(); returnToken.Initialize(this); return returnToken; } if (m_column == m_line.Length && m_fGetNextLine) { m_fGetNextLine = false; m_line = m_input.ReadLine(); if (m_line == null) { returnToken = new EndOfFileToken(m_lineNo, m_column); returnToken.Initialize(this); return returnToken; } m_lineNo++; m_column = 0; } if (m_column == m_line.Length) { // Finish processing this line by returning a NewlineToken m_fGetNextLine = true; returnToken = new NewlineToken(m_lineNo, m_column); returnToken.Initialize(this); return returnToken; } // m_column is the column in the line to start searching int nextMarkerStart; // column in line where the next marker starts int nextColumn; // column in line after end of the next marker MarkerSpec target; nextMarkerStart = m_tri.Search(m_line, m_column, out nextColumn, out target); if (nextMarkerStart == -1) { // no more markers on m_line after m_column nextMarkerStart = m_line.Length; } if (nextMarkerStart > m_column) { // there was a non-zero distance between the end of the last found marker // and the start of the current marker, therefore there is data on the line, // and needs to be returned in a data token // Review (BobbyD): Possible enhancement: The leading whitespace after a field marker // could be returned in a separate WhitespaceToken, separate from the DataToken. // This would allow the whitespace to be converted using the MarkerMap from the field marker // (which is what the Waxhaw TE team would like to see), rather than using the DataMap. // In addition, the whitespace could be normalized to one space if needed. string data = m_line.Substring(m_column, nextMarkerStart-m_column); returnToken = new DataToken(m_lineNo, m_column, data); m_column = nextMarkerStart; } else { // since there was no data to return, return some type of marker token returnToken = ((MarkerSpec)target).MakeToken(m_lineNo, m_column); m_column = nextColumn; } returnToken.Initialize(this); return returnToken; }
public void TestDontConvertUnicodeToken() { Token token = new DataToken(0, 0, @"Hello, World!"); Tokenizer tokenizer = new Tokenizer(); // Setting token.Tokenizer and token.Map simulates Initialize() // so a full test environment does not need to be created. token.Tokenizer = tokenizer; // Set a non-existent converter so we can check to make sure it does not get called token.Map = "Garbanzo"; Assert.AreEqual(@"Hello, World!", token.Output(true)); }
public void TestTokenConvert() { // loads converters from on disk XML encoding repository file EncConverters converters = new EncConverters(); // location of TECkit map files string mapDir = Info.TestFileDir; // writes three converters to XML encoding repository file on disk. converters.Add("ISO-8859-1<>UNICODE", mapDir + @"iso-8859-1.map", ConvType.Legacy_to_from_Unicode, "ISO-8859-1", "UNICODE", ProcessTypeFlags.UnicodeEncodingConversion); converters.Add("ASCII<>MIXED CASE UNICODE", mapDir + @"mixedcase.map", ConvType.Legacy_to_from_Unicode, "ISO-8859-1", "UNICODE", ProcessTypeFlags.UnicodeEncodingConversion); converters.Add("ASCII>UPPER CASE UNICODE", mapDir + @"uppercase.map", ConvType.Legacy_to_Unicode, "ISO-8859-1", "UNICODE", ProcessTypeFlags.UnicodeEncodingConversion); Token token = new DataToken(0, 0, @"Hello, World!"); Tokenizer tokenizer = new Tokenizer(); // Setting token.Tokenizer and token.Map simulates Initialize() // so a full test environment does not need to be created. token.Tokenizer = tokenizer; string rawString; // an empty string for the map name indicates a default Unicode conversion should be used token.Map = ""; rawString = token.RawOutput(); Assert.AreEqual(@"Hello, World!", token.ConvertToUnicode(rawString)); token.Map = "ISO-8859-1<>UNICODE"; rawString = token.RawOutput(); Assert.AreEqual(@"Hello, World!", token.ConvertToUnicode(rawString)); token.Map = "ASCII<>MIXED CASE UNICODE"; rawString = token.RawOutput(); Assert.AreEqual(@"hELLO,~~~wORLD!", token.ConvertToUnicode(rawString)); token.Map = "ASCII>UPPER CASE UNICODE"; rawString = token.RawOutput(); Assert.AreEqual(@"HELLO,~~WORLD!", token.ConvertToUnicode(rawString)); }
public void TestDataTokenModify() { Token token = new DataToken(0, 0, @"See file C:\so.txt"); token.Map = ""; Assert.AreEqual(@"See file C:\\so.txt", token.Output(true)); }