/// <summary> /// Tokenises the specified string /// </summary> /// <param name="input"></param> /// <returns></returns> public IEnumerable <Symbol> Tokenise(string input) { int ptr = 0; SymbolPosition currentPos = new SymbolPosition(1, 0); while (ptr < input.Length) { bool found = false; for (int i = 0; i < tokenInfoList.Count; i++) { TokenInfo tokenInfo = tokenInfoList[i]; Match match = tokenInfo.Pattern.Match(input, ptr); if (match.Success && match.Index == ptr) { string segment = input.Substring(match.Index, match.Length); if (!tokenInfo.Discard) { yield return(new Symbol(tokenInfo.Token, segment, currentPos)); } ptr = match.Index + match.Length; int numNewLines = 0, newCol = currentPos.Column; for (int j = 0; j < segment.Length; j++) { if (segment[j] == '\n') { numNewLines++; newCol = 0; } else if (segment[j] == '\t') { newCol += TAB_SIZE; } else if (segment[j] != '\r') { newCol++; } } currentPos = new SymbolPosition(currentPos.Line + numNewLines, newCol); found = true; break; } } if (!found) { throw new Exception(string.Format("Unrecognised symbol '{0}'", StringUtils.Escape(input.Substring(ptr, 1)))); } } yield break; }
/// <summary> /// Initialises a new instance of the Symbol class /// </summary> /// <param name="type"></param> /// <param name="value"></param> public Symbol(SymbolType type, string value, SymbolPosition position) { Type = type; Value = value; Position = position; }