예제 #1
0
 public TokenPos(TokenPos copyFrom, int end)
 {
     this.m_source    = copyFrom.m_source;
     this.m_type      = copyFrom.m_type;
     this.m_start     = copyFrom.m_start;
     this.m_extracted = copyFrom.m_extracted;
     m_end            = end;
 }
예제 #2
0
        public const char escape      = '\\'; // can be used only inside quoted strings

        public static TokenPos[] tokenize(string src)
        {
            var count = tokenize(src, null);

            if (count < 0)
            {
                return(null);
            }
            var tokens = new TokenPos[count];

            count = tokenize(src, tokens);
            return(tokens);
        }
예제 #3
0
        public static ListOrValue parseRoot(TokenPos[] tokens)
        {
            //ListOrValue retVal = new ListOrValue();
            if (tokens == null || tokens.Length < 1)
            {
                return(new ListOrValue());
            }

            var tok = new TokenPos(
                tokens[0].getSource(),
                TokenType.document,
                0,
                tokens[0].sourceLength()
                //retVal.tokenSectionLength()
                //retVal.m_value.m_source.Length
                );

            ListOrValue retVal = new ListOrValue(new List <ListOrValue>(), tok);
            //retVal.m_value.m_type = TokenType.document;
            //retVal.m_value.m_start = 0;
            //retVal.m_value.m_source = tokens[0].m_source;
            //retVal.m_value.m_end = retVal.m_value.m_source.Length;
            //retVal.m_list = new List<ListOrValue>();

            var tokensLength = tokens.Length;
            int idx          = 0;

            while (idx < tokensLength)
            {
                var item = parseToken(tokens, idx, out idx);
                if (item.isDefault())
                {
                    continue;
                }
                retVal.add(item);
            }
            if (retVal.count() < 1)
            {
                return(new ListOrValue());
            }

            return(retVal);
        }
예제 #4
0
        public static int tokenize(string self, TokenPos[] tokens)
        {
            if (self == null || self == "")
            {
                return(-1);
            }
            var         curState       = ParseState.insideBody;
            var         lastToken      = TokenType.none;
            var         strLen         = self.Length;
            var         listStartCount = 0;
            var         listEndCount   = 0;
            var         tokenIdx       = 0;
            var         nextIsScaped   = false;
            Stack <int> listStartStack = null;
            var         tokensLen      = 0;

            if (tokens != null)
            {
                tokensLen      = tokens.Length;
                listStartStack = new Stack <int>(tokensLen); // TODO think of an optimized way without heap allocation
            }

            for (var i = 0; i < strLen; i++)
            {
                var cur = self[i];

                switch (curState)
                {
                case ParseState.insideBody:
                    switch (cur)
                    {
                    case listStart:

                        listStartCount++;
                        if (tokens != null)
                        {
                            listStartStack.Push(tokenIdx);
                            tokens[tokenIdx] = new TokenPos(self, TokenType.list, i, 0);
                            //tokens[tokenIdx] = new TokenPos() { m_source = self, m_type = TokenType.list, m_start = i };
                        }
                        tokenIdx++;
                        lastToken = TokenType.list;
                        break;

                    case listEnd:
                        listEndCount++;
                        if (tokens != null)
                        {
                            var idx = listStartStack.Pop();
                            tokens[idx] = new TokenPos(tokens[idx], i + 1);
                            //tokens[idx].m_end = i + 1;
                        }
                        lastToken = TokenType.list;
                        break;

                    case stringStart:

                        curState = ParseState.insideQuotedString;
                        if (tokens != null)
                        {
                            tokens[tokenIdx] = new TokenPos(self, TokenType.quotedString, i, 0);
                            //tokens[tokenIdx] = new TokenPos() { m_source = self, m_type = TokenType.quotedString, m_start = i };
                        }
                        tokenIdx++;
                        lastToken = TokenType.quotedString;
                        break;

                    case escape:
                        return(-1);        // syntax error, can only use scape char inside identifier or string

                    default:
                        // it could be entering a neutral section
                        if (cur < 33)           // 33 is the first printable ascii char, the exclamation mark '!'

                        {
                            curState = ParseState.insideNeutral;
                            if (tokens != null && lastToken == TokenType.unquotedString)           // close last unquoted string if any
                            {
                                tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i);
                                //tokens[tokenIdx - 1].m_end = i;
                            }
                            if (tokens != null)
                            {
                                tokens[tokenIdx] = new TokenPos(self, TokenType.neutral, i, 0);
                                //tokens[tokenIdx] = new TokenPos() { m_source = self, m_type = TokenType.neutral, m_start = i };
                            }
                            tokenIdx++;
                            lastToken = TokenType.neutral;
                            break;
                        }
                        // else, it is starting a identifier
                        if (tokens != null && lastToken == TokenType.neutral)           // close last neutral if any
                        {
                            tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i);
                            //tokens[tokenIdx - 1].m_end = i;
                        }

                        if (tokens != null)
                        {
                            tokens[tokenIdx] = new TokenPos(self, TokenType.unquotedString, i, 0);
                            //tokens[tokenIdx] = new TokenPos() { m_source = self, m_type = TokenType.unquotedString, m_start = i };
                        }
                        tokenIdx++;

                        lastToken = TokenType.unquotedString;
                        curState  = ParseState.insideUnquotedString;
                        break;
                    }
                    break;

                case ParseState.insideQuotedString:
                    switch (cur)
                    {
                    case escape:
                        if (nextIsScaped)
                        {
                            nextIsScaped = false;
                            break;
                        }
                        nextIsScaped = true;
                        break;

                    case stringEnd:
                        if (nextIsScaped)
                        {
                            nextIsScaped = false;
                            break;
                        }
                        if (tokens != null)
                        {
                            tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i + 1);
                            //tokens[tokenIdx - 1].m_end = i + 1;
                        }
                        curState  = ParseState.insideBody;
                        lastToken = TokenType.quotedString;
                        break;

                    default:
                        break;         // normal string characters
                    }
                    break;

                case ParseState.insideUnquotedString:
                    switch (cur)
                    {
                    case listStart:
                        if (tokens != null)           // close last unquoted string
                        {
                            tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i);
                            //tokens[tokenIdx - 1].m_end = i;
                        }
                        tokenIdx++;
                        i--;         // must reevaluate character
                        curState = ParseState.insideBody;
                        break;

                    case listEnd:
                        if (tokens != null)           // close last unquoted string
                        {
                            tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i);
                            //tokens[tokenIdx - 1].m_end = i;
                        }
                        i--;         // must reevaluate character
                        curState = ParseState.insideBody;
                        break;

                    case stringStart:
                        return(-1);

                    case escape:
                        return(-1);

                    default:
                        if (cur > 32)
                        {
                            break;
                        }
                        // could be starting a neutral //

                        if (tokens != null && lastToken == TokenType.unquotedString)           // close last unquoted if any
                        {
                            tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i);
                            //tokens[tokenIdx - 1].m_end = i;
                        }

                        if (tokens != null)
                        {
                            tokens[tokenIdx] = new TokenPos(self, TokenType.neutral, i, 0);
                            //tokens[tokenIdx] = new TokenPos() { m_source = self, m_type = TokenType.neutral, m_start = i };
                        }
                        tokenIdx++;
                        curState  = ParseState.insideNeutral;
                        lastToken = TokenType.unquotedString;
                        break;
                    }
                    break;

                case ParseState.insideNeutral:
                    if (cur < 33)
                    {
                        break;
                    }

                    if (tokens != null)       // close last neutral
                    {
                        tokens[tokenIdx - 1] = new TokenPos(tokens[tokenIdx - 1], i);
                        //tokens[tokenIdx - 1].m_end = i;
                    }
                    i--;     // must reevaluate character
                    curState  = ParseState.insideBody;
                    lastToken = TokenType.neutral;
                    break;
                }
            }

            if (curState == ParseState.insideQuotedString)
            {
                return(-1); // unclosed enquoted string
            }
            if (listStartCount != listEndCount)
            {
                return(-1);                                                            // unatched open/close list
            }
            if (tokens != null && tokensLen > 0 && tokens[tokensLen - 1].getEnd() < 1) // close last token
            {
                tokens[tokensLen - 1] = new TokenPos(tokens[tokensLen - 1], strLen);
                //tokens[tokensLen - 1].m_end = strLen;
            }

            return(tokenIdx);
        }
예제 #5
0
 public ListOrValue(List <ListOrValue> list, TokenPos token)
 {
     m_list  = list;
     m_value = token;
 }