public static Code Compile( string sourceCode, string sourceNameForErrorMessages) { // Compile the text to a code sequence. // Change ' to ’ right away. This both looks better and keeps it from interfering with Javascript quotes in HTML. // TODO: But what about double quotes? They must get converted just before display, because you can't tell whether to use left or right double quotes until the whole text is assembled. But that makes links with quotes in them not work. var tokens = Token.Tokenize(sourceCode.Replace('\'', '’'), sourceNameForErrorMessages); return(SequenceCode.BuildFromTokens(tokens, sourceCode, sourceNameForErrorMessages)); }
// This allows base Code class to construct a sequence. public static SequenceCode BuildFromTokens( List <Token> tokens, string sourceTextForErrorMessages, string sourceNameForErrorMessages) { LookAhead Look = new LookAhead(tokens); var sequenceCode = GetSequence(); Look.Require(TokenType.EndOfSourceText, sourceTextForErrorMessages, sourceNameForErrorMessages); sequenceCode.SourceText = sourceTextForErrorMessages; return(sequenceCode); // Some local helper functions. SequenceCode GetSequence() { var result = new SequenceCode(); while (true) { if (Look.Got(TokenType.Characters)) { result.Codes.Add(new CharacterCode(Look.Value)); } else if (Look.Got(TokenType.Special)) { // Ex. [he] result.Codes.Add(new SpecialCode(Look.Value)); } else if (Look.Got(TokenType.Merge)) { // [merge] // [merge sceneId] string sceneId = null; if (Look.Got(TokenType.Id)) { sceneId = Look.Value; } result.Codes.Add(new MergeCode(sceneId)); } else if (Look.Got(TokenType.Return)) { // [return] result.Codes.Add(new ReturnCode()); } else if (Look.Got(TokenType.Scene)) { // [scene soundsLikeAScam] Look.Require(TokenType.Id, sourceTextForErrorMessages, sourceNameForErrorMessages); result.Codes.Add(new SceneCode(Look.Value)); } else if (Look.Got(TokenType.Score) || Look.Got(TokenType.Sort)) { // SCORE ID [, ID...] // SORT ID [, ID...] var sortOnly = Look.Type == TokenType.Sort; List <string> ids = new List <string>(); do { Look.Require(TokenType.Id, sourceTextForErrorMessages, sourceNameForErrorMessages); ids.Add(Look.Value); } while (Look.Got(TokenType.Comma)); result.Codes.Add(new ScoreCode(ids, sortOnly)); } else if (Look.Got(TokenType.Text)) { Look.Require(TokenType.Id, sourceTextForErrorMessages, sourceNameForErrorMessages); string id = Look.Value; string text = ""; if (Look.Got(TokenType.Characters)) { text = Look.Value; } Look.Require(TokenType.End, sourceTextForErrorMessages, sourceNameForErrorMessages); result.Codes.Add(new TextCode(id, text)); } else if (Look.Got(TokenType.Set)) { result.Codes.Add(new SetCode(GetExpressions(false))); } else if (Look.Got(TokenType.When)) { if (Look.Got(TokenType.Else)) { result.Codes.Add(new WhenElseCode()); } else { result.Codes.Add(new WhenCode(GetExpressions(true))); } } else if (Look.Got(TokenType.If)) { var ifCode = GetIf(); result.Codes.Add(ifCode); // The whole if/or case statement is terminated by 'end'. Look.Require(TokenType.End, sourceTextForErrorMessages, sourceNameForErrorMessages); } else { // Hopefully the token we've been looking at is something the caller is expecting to see next (i.e. end of source text). return(result); } } } List <Expression> GetExpressions( bool allowNotEqual) { // ID // NOT ID // ID=ID // NOT ID=ID // allowNotEqual: [when not a=b] makes sense. But [set not a=b] doesn't mean anything. var result = new List <Expression>(); do { var not = Look.Got(TokenType.Not); Look.Require(TokenType.Id, sourceTextForErrorMessages, sourceNameForErrorMessages); var leftId = Look.Value; string rightId = null; if (allowNotEqual || !not) { if (Look.Got(TokenType.Equal)) { Look.Require(TokenType.Id, sourceTextForErrorMessages, sourceNameForErrorMessages); rightId = Look.Value; } } result.Add(new Expression(not, leftId, rightId)); } while (Look.Got(TokenType.Comma)); return(result); } /* This flat 'if' sequence with 'or' (which is like 'elif' but more Englishy)... * * [if reaction=Flee] * A * [or reaction=Escape] * B * [else] * C * [end] * * ...is equivalent to this nested sequence: * * if reaction=Flee * A * else * if reaction=Escape * B * else * C */ IfCode GetIf() { // This is called after getting 'if' or 'or'. // First get the expression. It's like one of these: // [if brave] // [if not killedInspector] var expressions = GetExpressions(true); var trueCode = GetSequence(); Code falseCode = null; if (Look.Got(TokenType.Else)) { falseCode = GetSequence(); } else if (Look.Got(TokenType.Or)) { falseCode = GetIf(); } // Otherwise must be 'end'. Let caller handle it. return(new IfCode(expressions, trueCode, falseCode)); } }
public CodeTree( string sourceText, string sourceNameForErrorMessages, Dictionary <string, Setting> settings) { // Compile the text to a code tree. SourceText = sourceText; var tokenList = new TokenList(sourceText, sourceNameForErrorMessages, settings); var Look = new LookAhead(tokenList); RootCode = ParseSequence(); Look.Require(TokenType.EndOfSourceText, sourceText, sourceNameForErrorMessages); // Some local helper functions. SequenceCode ParseSequence() { var codes = new List <Code>(); while (true) { if (Look.Got(TokenType.Characters)) { codes.Add(new CharacterCode(Look.Value)); } else if (Look.Got(TokenType.SpecialId)) { // Ex. [he] codes.Add(new SpecialCode(Look.Value)); } else if (Look.Got(TokenType.Merge)) { // [merge] // [merge sceneId] string?sceneId = Look.Got(TokenType.Id) ? Look.Value : null; codes.Add(new MergeCode(sceneId)); } else if (Look.Got(TokenType.Return)) { // [return] codes.Add(new ReturnCode()); } else if (Look.Got(TokenType.Scene)) { // [scene soundsLikeAScam] Look.Require(TokenType.Id, sourceText, sourceNameForErrorMessages); codes.Add(new SceneCode(Look.Value)); } else if (Look.Got(TokenType.Score) || Look.Got(TokenType.Sort)) { // SCORE SCOREID [, SCOREID...] // SORT SCOREID [, ID...] var sortOnly = Look.Type == TokenType.Sort; List <string> ids = new List <string>(); do { Look.Require(TokenType.ScoreId, sourceText, sourceNameForErrorMessages); ids.Add(Look.Value); } while (Look.Got(TokenType.Comma)); codes.Add(new ScoreCode(ids, sortOnly)); } else if (Look.Got(TokenType.Text)) { Look.Require(TokenType.Id, sourceText, sourceNameForErrorMessages); string id = Look.Value; string text = ""; if (Look.Got(TokenType.Characters)) { text = Look.Value; } Look.Require(TokenType.End, sourceText, sourceNameForErrorMessages); codes.Add(new TextCode(id, text)); } else if (Look.Got(TokenType.Set)) { codes.Add(new SetCode(ParseExpressions(false))); } else if (Look.Got(TokenType.When)) { if (Look.Got(TokenType.Else)) { codes.Add(new WhenElseCode()); } else { codes.Add(new WhenCode(ParseExpressions(true))); } } else if (Look.Got(TokenType.If)) { var ifCode = ParseIf(); codes.Add(ifCode); // The whole if/or case statement is terminated by 'end'. Look.Require(TokenType.End, sourceText, sourceNameForErrorMessages); } else { // Hopefully the token we've been looking at is something the caller is expecting to see next (i.e. end of source text). return(new SequenceCode(codes)); } } } List <Expression> ParseExpressions( bool allowNotEqual) { // BOOLEANID // NOT BOOLEANID // STRINGID=ID // NOT STRINGID=ID // allowNotEqual: [when not a=b] makes sense. But [set not a=b] doesn't mean anything. var result = new List <Expression>(); do { var not = Look.Got(TokenType.Not); string leftId; string?rightId = null; if (Look.Got(TokenType.BooleanId) || Look.Got(TokenType.ScoreId)) { leftId = Look.Value; } else { if (not && !allowNotEqual) { throw new InvalidOperationException(string.Format($"file {sourceNameForErrorMessages}: unexpected {TokenType.Not} in\n{sourceText}")); } Look.Require(TokenType.StringId, sourceText, sourceNameForErrorMessages); leftId = Look.Value; Look.Require(TokenType.Equal, sourceText, sourceNameForErrorMessages); Look.Require(TokenType.Id, sourceText, sourceNameForErrorMessages); rightId = Look.Value; } result.Add(new Expression(not, leftId, rightId)); } while (Look.Got(TokenType.Comma)); return(result); } /* This flat 'if' sequence with 'or' (which is like 'elif' but more Englishy)... * * [if reaction=Flee] * A * [or reaction=Escape] * B * [else] * C * [end] * * ...is equivalent to this nested sequence: * * if reaction=Flee * A * else * if reaction=Escape * B * else * C */ IfCode ParseIf() { // This is called after getting 'if' or 'or'. // First get the expression. It's like one of these: // [if brave] // [if not killedInspector] var expressions = ParseExpressions(true); var trueCode = ParseSequence(); Code?falseCode = null; if (Look.Got(TokenType.Else)) { falseCode = ParseSequence(); } else if (Look.Got(TokenType.Or)) { falseCode = ParseIf(); } // Otherwise must be 'end'. Let caller handle it. return(new IfCode(expressions, trueCode, falseCode)); } }