// Read a MalSequence, checking that it starts and terminates correctly. // Named read_list to follow the ref, but has been genericized to handle vectors as well. static public MalSeqBase read_list(Reader reader, MalSeqBase sequence, char start, char end) { // Check that we are in fact at the start of a list. string token = reader.Next(); if (token[0] != start) { // Parse error - probably internal if the list code is correct. throw new MalInternalError("Sequence expected '" + start + "' but got: " + token); } // Use read_form to get the list's contents, accumulating them into the list. while (true) { token = reader.Peek(); if (token != null) { // We are in the list or at the end. if (token[0] == end) { // Reached valid end of list. Consume the end char. reader.Next(); // And we are done. break; } // Mutually recurse to read the next list element. MalVal newVal = read_form(reader); sequence.Add(newVal); } else { // The input has finished but the list hasn't. Try to get more input. reader.LoadMoreTokens(start, end); } } return(sequence); }
// Read a MalVal form - which is either an atom or a sequence. static public MalVal read_form(Reader reader) { if (reader.Peek() == null) { // Reader is empty - caused by a comment line in the input. return(null); } else if (reader.Peek().StartsWith('\'')) { // Create a list containing the quote symbol and the quoted form. // Skip the quote symbol. reader.Next(); // Now read the quoted thing, and build a quote form. return(new MalList(new MalQuote(), read_form(reader))); // TODO handle quasiquotes and splices in the same way. } else if (reader.Peek().StartsWith('(')) { // Create a new List and read it's body. return(read_list(reader, new MalList(), '(', ')')); } else if (reader.Peek().StartsWith('[')) { // Create a new Vector and read it's body. return(read_list(reader, new MalVector(), '[', ']')); } else if (reader.Peek().StartsWith('{')) { // Create a new HashMap and read it's body. // TODO - check that hashmap contains a list of keywords and values. return(read_list(reader, new MalHashMap(), '{', '}')); } else if (reader.Peek().StartsWith(')') || reader.Peek().StartsWith(']') || reader.Peek().StartsWith('}')) { // A sequence close character that doesn't match a start. // This correctly handles a case like [1 ( 2 ] 3). throw new MalParseError("Expecting sequence or atom but got '" + reader.Peek() + "'"); } else { // This isn't a list try so parse it as an atom. return(read_atom(reader)); } }
static public MalVal read_token(Reader reader) { // "If you have a problem and you think regex is the answer, now you have two problems!" // Unlike the referenceC#-Mal, read_token handles floats and badly-formed symbols. // In the Mal Guide this is called read_atom but I renamed it to avoid confusion // with Mal Atoms. string tokenToRead = reader.Next(); if (tokenToRead.Length <= 0) { throw new MalInternalError("Reader has returned empty string"); } switch (tokenToRead[0]) { case '+': if (tokenToRead.Length == 1) { // Token is a solo '+', not the beginning of a number. return(new MalSym(tokenToRead)); } // Skip the sign and extract a positive number; return(ParseNumber(tokenToRead.Substring(1), true)); case '-': if (tokenToRead.Length == 1) { // Token is a solo '-', not the beginning of a number. return(new MalSym(tokenToRead)); } // Skip the sign and extract a negative number; return(ParseNumber(tokenToRead.Substring(1), false)); case '.': // An initial '.' is only allowed at the start of a number, as in '.2'. return(ParseNumber(tokenToRead, true)); case '\"': if (tokenToRead.EndsWith("\"")) { // Get rid of the quotes before storing the string. Seems right although // I haven't confirmed by checking the reference version. char[] charsToTrim = { '"' }; tokenToRead = tokenToRead.Trim(charsToTrim); return(new MalString(tokenToRead)); } // TODO - never reaches this point. The reader regex seems to throw away '"' chars if there is only 1. throw new MalParseError("String '" + tokenToRead + "' lacks a closing thingy"); case ':': // Handle a keyword if (tokenToRead.Length == 1) { // Can't have a solo colon. throw new MalParseError("':' must be followed by a keyword"); } return(new MalKeyword(tokenToRead)); default: if (IsDigit(tokenToRead[0].ToString())) { // Token seems to be an unsigned number. return(ParseNumber(tokenToRead, true)); } else if (tokenToRead == "nil") { return(malNil); } else if (tokenToRead == "true") { return(malTrue); } else if (tokenToRead == "false") { return(malFalse); } else { // If here it is 'just' a symbol. return(new MalSym(tokenToRead)); } } throw new MalInternalError("Can't process '" + tokenToRead + "'"); }
// Read a MalVal form - which is either an atom or a sequence. static public MalVal read_form(Reader reader) { if (reader.Peek() == null) { // Reader is empty - caused by a comment line in the input. return(null); } else if (reader.Peek().StartsWith('(')) { // Create a new List and read it's body. return(read_list(reader, new MalList(), '(', ')')); } else if (reader.Peek().StartsWith('[')) { // Create a new Vector and read it's body. return(read_list(reader, new MalVector(), '[', ']')); } else if (reader.Peek().StartsWith('{')) { // Create a new HashMap and read it's body. EVAL checks it has valid key val pairs. return(read_list(reader, new MalHashMap(), '{', '}')); } else if (reader.Peek().StartsWith(')') || reader.Peek().StartsWith(']') || reader.Peek().StartsWith('}')) { // A sequence close character that doesn't match a start. // This correctly handles a case like [1 ( 2 ] 3). throw new MalParseError("Expecting sequence or atom but got '" + reader.Peek() + "'"); } else if (reader.Peek().StartsWith('&')) { // Reader macro. We have '&atomName'. Convert this into (deref atomName); string atomToDeref = reader.Peek(); if (atomToDeref.Length == 1) { // Treat a solo '&' as a varargs symbol, reader.Next(); return(malVarArgsChar); } // Build a deref form, extracting the atom name from the token. MalList derefForm = new MalList(); derefForm.Add(new MalSym("deref")); derefForm.Add(new MalSym(atomToDeref.Substring(1))); // TODO - ??? handle non-explicit atom names? E.g. where '&' is followed by a form that creates an atom name. // Advance past the deref symbol and return the new form. reader.Next(); return(derefForm); } else if (reader.Peek().StartsWith('\'')) { // Return a list containing a quote symbol and the quoted form. reader.Next(); MalList quoteForm = new MalList(); quoteForm.Add(new MalSym("quote")); quoteForm.Add(read_form(reader)); return(quoteForm); } else if (reader.Peek().StartsWith('`')) { // Return a list containing a quasiquote symbol and the quasiquoted form. reader.Next(); MalList quasiquoteForm = new MalList(); quasiquoteForm.Add(new MalSym("quasiquote")); quasiquoteForm.Add(read_form(reader)); return(quasiquoteForm); } else if (reader.Peek().StartsWith("~@")) { // Return a list containing a splice-unquote symbol and the next form. // Dammit! I'd missed the '~' here and spent several days wondering why (or ...) didn't work. reader.Next(); MalList quasiquoteForm = new MalList(); quasiquoteForm.Add(new MalSym("splice-unquote")); quasiquoteForm.Add(read_form(reader)); return(quasiquoteForm); } else if (reader.Peek().StartsWith('~')) { // Return a list containing an unquote symbol and the next form. reader.Next(); MalList quasiquoteForm = new MalList(); quasiquoteForm.Add(new MalSym("unquote")); quasiquoteForm.Add(read_form(reader)); return(quasiquoteForm); } else { // This isn't a list so parse it as an atom. return(read_token(reader)); } }
// Read a MalVal form - which is either an atom or a sequence. static public MalVal read_form(Reader reader) { if (reader.Peek() == null) { // Reader is empty - caused by a comment line in the input. return(null); } else if (reader.Peek().StartsWith('(')) { // Create a new List and read it's body. return(read_list(reader, new MalList(), '(', ')')); } else if (reader.Peek().StartsWith('[')) { // Create a new Vector and read it's body. return(read_list(reader, new MalVector(), '[', ']')); } else if (reader.Peek().StartsWith('{')) { // Create a new HashMap and read it's body. EVAL checks it has valid key val pairs. return(read_list(reader, new MalHashMap(), '{', '}')); } else if (reader.Peek().StartsWith(')') || reader.Peek().StartsWith(']') || reader.Peek().StartsWith('}')) { // A sequence close character that doesn't match a start. // This correctly handles a case like [1 ( 2 ] 3). throw new MalParseError("Expecting sequence or atom but got '" + reader.Peek() + "'"); } else if (reader.Peek().StartsWith('&')) { // Reader macro. We have '&atomName'. Convert this into (deref atomName); string atomToDeref = reader.Peek(); if (atomToDeref.Length == 1) { throw new MalParseError("'&' lacks atom name"); } // Build a deref form, extracting the atom name from the token. MalList derefForm = new MalList(); derefForm.Add(new MalSym("deref")); derefForm.Add(new MalSym(atomToDeref.Substring(1))); // TODO - ??? handle non-explicit atom names? E.g. where '&' is followed by a form that creates an atom name. // Advance past the deref symbol and return the new form. reader.Next(); return(derefForm); } else if (reader.Peek().StartsWith('\'')) { // Create a list containing the quote symbol and the quoted form. // Skip the quote symbol. reader.Next(); MalList quoteForm = new MalList(); quoteForm.Add(new MalQuote()); quoteForm.Add(read_form(reader)); // Now read the quoted thing, and build a quote form. return(quoteForm); // TODO handle quasiquotes and splices in the same way. } else { // This isn't a list so parse it as an atom. return(read_token(reader)); } }
static public MalVal read_atom(Reader reader) { // "If you have a problem and you think regex is the answer, now you have two problems!" // Unlike the referenceC#-Mal, read_atom handles floats and badly-formed symbols. string tokenToRead = reader.Next(); if (tokenToRead.Length <= 0) { throw new MalParseError("INTERNAL - reader has returned empty string"); } switch (tokenToRead[0]) { case '+': if (tokenToRead.Length == 1) { // Token is a solo '+', not the beginning of a number. return(new MalSym(tokenToRead)); } // Skip the sign and extract a positive number; return(ParseNumber(tokenToRead.Substring(1), true)); case '-': if (tokenToRead.Length == 1) { // Token is a solo '-', not the beginning of a number. return(new MalSym(tokenToRead)); } // Skip the sign and extract a negative number; return(ParseNumber(tokenToRead.Substring(1), false)); case '.': // An initial '.' is only allowed at the start of a number, as in '.2'. return(ParseNumber(tokenToRead, true)); case '\"': if (tokenToRead.EndsWith("\"")) { return(new MalString(tokenToRead)); } // TODO - never reaches this point. The reader regex seems to throw away '"' chars if there is only 1. throw new MalParseError("String '" + tokenToRead + "' lacks a closing thingy"); case ':': // Handle a keyword if (tokenToRead.Length == 1) { // Can't have a solo colon. throw new MalParseError("':' must be followed by a keyword"); } return(new MalKeyword(tokenToRead)); default: if (IsDigit(tokenToRead[0].ToString())) { // Token seems to be an unsigned number. return(ParseNumber(tokenToRead, true)); } // If here it is 'just' a symbol. return(new MalSym(tokenToRead)); } throw new MalParseError("INTERNAL - can't process '" + tokenToRead + "'"); }