public TokenPos(TokenPos copyFrom, int end) { this.m_source = copyFrom.m_source; this.m_type = copyFrom.m_type; this.m_start = copyFrom.m_start; this.m_extracted = copyFrom.m_extracted; m_end = end; }
public const char escape = '\\'; // can be used only inside quoted strings public static TokenPos[] tokenize(string src) { var count = tokenize(src, null); if (count < 0) { return(null); } var tokens = new TokenPos[count]; count = tokenize(src, tokens); return(tokens); }
public static ListOrValue parseRoot(TokenPos[] tokens) { //ListOrValue retVal = new ListOrValue(); if (tokens == null || tokens.Length < 1) { return(new ListOrValue()); } var tok = new TokenPos( tokens[0].getSource(), TokenType.document, 0, tokens[0].sourceLength() //retVal.tokenSectionLength() //retVal.m_value.m_source.Length ); ListOrValue retVal = new ListOrValue(new List <ListOrValue>(), tok); //retVal.m_value.m_type = TokenType.document; //retVal.m_value.m_start = 0; //retVal.m_value.m_source = tokens[0].m_source; //retVal.m_value.m_end = retVal.m_value.m_source.Length; //retVal.m_list = new List<ListOrValue>(); var tokensLength = tokens.Length; int idx = 0; while (idx < tokensLength) { var item = parseToken(tokens, idx, out idx); if (item.isDefault()) { continue; } retVal.add(item); } if (retVal.count() < 1) { return(new ListOrValue()); } return(retVal); }
public static int tokenize(string self, TokenPos[] tokens) { if (self == null || self == "") { return(-1); } var curState = ParseState.insideBody; var lastToken = TokenType.none; var strLen = self.Length; var listStartCount = 0; var listEndCount = 0; var tokenIdx = 0; var nextIsScaped = false; Stack <int> listStartStack = null; var tokensLen = 0; if (tokens != null) { tokensLen = tokens.Length; listStartStack = new Stack <int>(tokensLen); // TODO think of an optimized way without heap allocation } for (var i = 0; i < strLen; i++) { var cur = self[i]; switch (curState) { case ParseState.insideBody: switch (cur) { case listStart: listStartCount++; if (tokens != null) { listStartStack.Push(tokenIdx); tokens[tokenIdx] = new TokenPos(self, TokenType.list, i, 0); //tokens[tokenIdx] = new TokenPos() { m_source = self, m_type = TokenType.list, m_start = i }; } tokenIdx++; lastToken = TokenType.list; break; case listEnd: listEndCount++; if (tokens != null) { var idx = listStartStack.Pop(); tokens[idx] = new TokenPos(tokens[idx], i + 1); //tokens[idx].m_end = i + 1; } lastToken = TokenType.list; break; case stringStart: curState = ParseState.insideQuotedString; if (tokens != null) { tokens[tokenIdx] = new TokenPos(self, TokenType.quotedString, i, 0); //tokens[tokenIdx] = new TokenPos() { m_source = self, m_type = TokenType.quotedString, m_start = i }; } tokenIdx++; lastToken = TokenType.quotedString; break; case escape: return(-1); // syntax error, can only use scape char inside identifier or string default: // it could be entering a neutral section if (cur < 33) // 33 is the first printable ascii char, the exclamation mark '!' { curState = ParseState.insideNeutral; if (tokens != null && lastToken == TokenType.unquotedString) // close last unquoted string if any { tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i); //tokens[tokenIdx - 1].m_end = i; } if (tokens != null) { tokens[tokenIdx] = new TokenPos(self, TokenType.neutral, i, 0); //tokens[tokenIdx] = new TokenPos() { m_source = self, m_type = TokenType.neutral, m_start = i }; } tokenIdx++; lastToken = TokenType.neutral; break; } // else, it is starting a identifier if (tokens != null && lastToken == TokenType.neutral) // close last neutral if any { tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i); //tokens[tokenIdx - 1].m_end = i; } if (tokens != null) { tokens[tokenIdx] = new TokenPos(self, TokenType.unquotedString, i, 0); //tokens[tokenIdx] = new TokenPos() { m_source = self, m_type = TokenType.unquotedString, m_start = i }; } tokenIdx++; lastToken = TokenType.unquotedString; curState = ParseState.insideUnquotedString; break; } break; case ParseState.insideQuotedString: switch (cur) { case escape: if (nextIsScaped) { nextIsScaped = false; break; } nextIsScaped = true; break; case stringEnd: if (nextIsScaped) { nextIsScaped = false; break; } if (tokens != null) { tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i + 1); //tokens[tokenIdx - 1].m_end = i + 1; } curState = ParseState.insideBody; lastToken = TokenType.quotedString; break; default: break; // normal string characters } break; case ParseState.insideUnquotedString: switch (cur) { case listStart: if (tokens != null) // close last unquoted string { tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i); //tokens[tokenIdx - 1].m_end = i; } tokenIdx++; i--; // must reevaluate character curState = ParseState.insideBody; break; case listEnd: if (tokens != null) // close last unquoted string { tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i); //tokens[tokenIdx - 1].m_end = i; } i--; // must reevaluate character curState = ParseState.insideBody; break; case stringStart: return(-1); case escape: return(-1); default: if (cur > 32) { break; } // could be starting a neutral // if (tokens != null && lastToken == TokenType.unquotedString) // close last unquoted if any { tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i); //tokens[tokenIdx - 1].m_end = i; } if (tokens != null) { tokens[tokenIdx] = new TokenPos(self, TokenType.neutral, i, 0); //tokens[tokenIdx] = new TokenPos() { m_source = self, m_type = TokenType.neutral, m_start = i }; } tokenIdx++; curState = ParseState.insideNeutral; lastToken = TokenType.unquotedString; break; } break; case ParseState.insideNeutral: if (cur < 33) { break; } if (tokens != null) // close last neutral { tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i); //tokens[tokenIdx - 1].m_end = i; } i--; // must reevaluate character curState = ParseState.insideBody; lastToken = TokenType.neutral; break; } } if (curState == ParseState.insideQuotedString) { return(-1); // unclosed enquoted string } if (listStartCount != listEndCount) { return(-1); // unatched open/close list } if (tokens != null && tokensLen > 0 && tokens[tokensLen - 1].getEnd() < 1) // close last token { tokens[tokensLen - 1] = new TokenPos(tokens[tokensLen - 1], strLen); //tokens[tokensLen - 1].m_end = strLen; } return(tokenIdx); }
public ListOrValue(List <ListOrValue> list, TokenPos token) { m_list = list; m_value = token; }