/// <summary> /// Runs the lexer on the machine's input file. /// </summary> internal void Run() { this.p_LineNumber = 0; while (!this.m_Machine.InputFile.EndOfStream || this.m_SeekCache.Length > 0) { // Retrieve a character from the seek cache (used because // sometimes we need to read ahead multiple characters and // we can only peek the next character) or alternatively // the file if the seek cache is empty. char c; if (this.m_SeekCache.Length > 0) { c = this.m_SeekCache[0]; this.m_SeekCache = this.m_SeekCache.Substring(1); c = this.ConvertNewline(c); } else { c = (char)this.m_Machine.InputFile.Read(); c = this.ConvertNewline(c); } if (c == '\n') this.p_LineNumber += 1; this.p_Char = c; this.p_TextCache += c; // Check for changes between the comment statuses. if (this.p_Char == '/' && this.p_TextCache.Length > 1) { if (this.p_TextCache[this.p_TextCache.Length - 2] == '/') this.m_IsComment = true; else if (this.p_TextCache[this.p_TextCache.Length - 2] == '*' && this.m_IsLongComment) { this.m_IsLongComment = false; Regex r = new Regex("[ \t\r\n]*\\/\\*.*\\*\\/[ \t\r\n]*", RegexOptions.Multiline); Match m = r.Match(this.p_TextCache); this.p_TextCache = r.Replace(this.p_TextCache, ""); this.AddNode(new DirectNode(m.Value)); this.m_ExcludedTokens.Clear(); if (this.p_TextCache.Length > 0) this.p_Char = this.p_TextCache[this.p_TextCache.Length - 1]; else this.p_Char = '\0'; continue; } } else if (this.p_Char == '*' && this.p_TextCache.Length > 1) { if (this.p_TextCache[this.p_TextCache.Length - 2] == '/') this.m_IsLongComment = true; } else if (this.p_Char == '\n' && this.m_IsComment) { this.m_IsComment = false; Regex r = new Regex("[ \t\r\n]*\\/\\/[^\n]*\n"); Match m = r.Match(this.p_TextCache); this.p_TextCache = r.Replace(this.p_TextCache, ""); this.AddNode(new DirectNode(m.Value)); this.m_ExcludedTokens.Clear(); if (this.p_TextCache.Length > 0) this.p_Char = this.p_TextCache[this.p_TextCache.Length - 1]; else this.p_Char = '\0'; continue; } if (this.m_IsComment || this.m_IsLongComment) continue; // The length of the string is 0, which causes problems with // tokens that rely on StartsWith. Since there's no characters, // and hence nothing that any token can make a decision on, we // just continue the loop. if (this.p_TextCache.TrimStart().Length == 0) { continue; } // First check the immediate parent to see whether or not it wants // to be unloaded from the list (and to consume the current text // state). if (this.m_ParentStack.Count > 0) { Token t = this.m_ParentStack.Peek(); this.m_CurrentToken = t; bool r = t.DetectEnd(this); this.m_CurrentToken = null; if (r) { // Find the current instance of the token type in the // lexer list and replace it with the parent instance. for (int i = 0; i < this.m_Tokens.Count; i++) { if (this.m_Tokens[i].GetType() == t.GetType()) { this.m_Tokens[i] = this.m_ParentStack.Pop(); break; } } this.p_TextCache = ""; this.p_Char = '\0'; this.m_ExcludedTokens.Clear(); continue; } } if (this.m_TokenWithOwnership == null) { // If all of the tokens have placed themselves in // the excluded list, we add a new DirectNode (since // we don't understand the content) and clear the // text state. if (this.m_Tokens.Count == this.m_ExcludedTokens.Count) { this.AddNode(new DirectNode(this.p_TextCache)); this.p_TextCache = ""; this.p_Char = '\0'; this.m_ExcludedTokens.Clear(); continue; } for (int a = 0; a < this.m_Tokens.Count; a++) { Token t = this.m_Tokens[a]; if (this.m_ExcludedTokens.Contains(t)) continue; this.m_CurrentToken = t; t.Detect(this); // The AddParent() call modifies the m_CurrentToken // variable. Make sure it's saved back into the // token list. int i = this.m_Tokens.IndexOf(t); this.m_Tokens[i] = this.m_CurrentToken; if (this.m_ShouldResetText) { this.m_TokenWithOwnership = null; this.p_TextCache = ""; this.p_Char = '\0'; this.m_ExcludedTokens.Clear(); this.m_ShouldResetText = false; break; // Skip the rest of the tokens. } } } else { this.m_CurrentToken = this.m_TokenWithOwnership; this.m_TokenWithOwnership.Detect(this); if (this.m_ShouldResetText) { this.m_TokenWithOwnership = null; this.p_TextCache = ""; this.p_Char = '\0'; this.m_ExcludedTokens.Clear(); this.m_ShouldResetText = false; } } this.m_CurrentToken = null; } }
/// <summary> /// Take ownership of the current text state. After a Token calls this /// function, it is the only Token that will recieve Run() calls. This /// ensures that other tokens don't add nodes when they are not meant to. /// </summary> internal void TakeOwnership() { this.m_TokenWithOwnership = this.m_CurrentToken; }
/// <summary> /// Adds the current token as a parent to future tokens (i.e. a /// ClassDefinitionToken becomes the parent so that variables within /// classes will be transformed correctly). When a token is the /// immediate parent, it will have DetectEnd() called at the start of /// the lexer loop. If this function returns true, it indicates that /// the token no longer wants to own the current block and the lexer /// loop restarts with the parent removed from the list. /// </summary> internal void AddParent() { // You can't be a parent if you currently own the text. However, // it's assumed that if you are calling this function, you want to // implicitly end ownership of the text. There's no harm done if // a token calls EndOwnership after AddParent (and it reads nicer // as well). if (this.HasOwnership()) { this.EndOwnership(); } // We actually create a new instance of the same token since // we may have token detectors that are children of the same // type of token detectors, and the parent status would be // shared if we were using the same token detector in the // parent list. this.m_ParentStack.Push(this.m_CurrentToken); Type t = this.m_CurrentToken.GetType(); ConstructorInfo ci = t.GetConstructor(Type.EmptyTypes); if (ci == null) { throw new MethodAccessException("The token object which attempted to raise itself as a parent does not have a public constructor."); } this.m_CurrentToken = (Token)ci.Invoke(null); }