Ejemplo n.º 1
0
        /// <summary>
        /// Tokenizes a given string.
        /// </summary>
        /// <param name="data">The string to convert to tokes.</param>
        /// <param name="numTokens">The number of tokens in this string.</param>
        /// <returns>An array of tokens as bytes.</returns>
        public byte[] Tokenize(string data, out int numTokens, out List <List <TokenData.TokenDictionaryEntry> > tokens, bool exceptionOnUnknownToken)
        {
            StringBuilder bytes = new StringBuilder();
            int           i     = 0;

            while (TrimStart && i < data.Length && (data[i] == ' ' || data[i] == '\t'))
            {
                i++;
            }

            char[] rawData = data.ToArray();
            numTokens = 0;
            int lineNumber = 0;

            tokens = new List <List <TokenDictionaryEntry> >();
            tokens.Add(new List <TokenDictionaryEntry>());

            while (i < rawData.Length)
            {
                string foundData = "";
                char[] match     = null;
                bool   found     = false;

                if (rawData[i] != '\\')
                {
                    found = StringToBytesTrie.LongestSubstringMatch(rawData, i, out foundData, out match);
                }
                else
                {
                    i++;
                    if (i == rawData.Length)
                    {
                        break;
                    }

                    if (rawData[i] != '\r')
                    {
                        match = new[] { rawData[i] };
                        found = StringToBytesTrie.GetData(match, out foundData);
                    }
                }

                if (!found)
                {
                    if (rawData[i] == '\r')
                    {
                        i++;
                    }
                    else
                    {
                        if (exceptionOnUnknownToken)
                        {
                            throw new TokenizationException(foundData, i - 1);
                        }
                        tokens[lineNumber].Add(new TokenDictionaryEntry()
                        {
                            Name = foundData, StyleType = "Error"
                        });
                        if (!foundData.StartsWith("\\"))
                        {
                            i++;
                        }
                    }
                }
                else
                {
                    string token = new string(match);
                    i += token.Length;
                    numTokens++;
                    bytes.Append(foundData);

                    if (token != "\n")
                    {
                        tokens[lineNumber].Add(FlatTokens[token]);
                    }
                    else
                    {
                        lineNumber++;
                        tokens.Add(new List <TokenDictionaryEntry>());
                        while (TrimStart && i < data.Length && (data[i] == ' ' || data[i] == '\t'))
                        {
                            i++;
                        }
                    }
                }
            }

            return(HexHelper.GetByteArray(bytes.ToString()));
        }
Ejemplo n.º 2
0
        private Dictionary <byte, TokenDictionaryEntry> GetTokensFromNode(XmlNodeList nodes, out List <string> alts, string prevBytes = "")
        {
            Dictionary <byte, TokenDictionaryEntry> currentTokens = new Dictionary <byte, TokenDictionaryEntry>();

            alts = new List <string>();
            foreach (XmlNode node in nodes)
            {
                //alts = new List<string>();
                if (node.NodeType != XmlNodeType.Comment)
                {
                    if (node.Name == "Token")
                    {
                        byte key = getByteFromXml(node);

                        TokenDictionaryEntry value = new TokenDictionaryEntry()
                        {
                            Byte = key
                        };

                        if (node.Attributes["string"] != null)
                        {
                            value.Name = node.Attributes["string"].Value;
                            if (value.Name == "\\n")
                            {
                                value.Name = "\n";
                            }

                            if (node.Attributes["comment"] != null)
                            {
                                string comment = Regex.Unescape(node.Attributes["comment"].Value);
                                value.Comment = comment;
                                if (!Comments.ContainsKey(value.Name))
                                {
                                    Comments.Add(value.Name, comment);
                                }
                            }

                            if (node.Attributes["site"] != null)
                            {
                                string site = node.Attributes["site"].Value;
                                value.Site = site;
                                if (!Sites.ContainsKey(value.Name))
                                {
                                    Sites.Add(value.Name, site);
                                }
                            }

                            if (node.Attributes["group"] != null)
                            {
                                value.Group = node.Attributes["group"].Value;
                                if (!GroupNames.Contains(value.Group))
                                {
                                    GroupNames.Add(value.Group);
                                }
                            }
                            else
                            {
                                value.Group = "_default";
                            }

                            if (!Groups.ContainsKey(value.Name))
                            {
                                Groups.Add(value.Name, value.Group);
                            }

                            if (node.Attributes["style"] != null)
                            {
                                value.StyleType = node.Attributes["style"].Value;
                            }

                            if (node.Attributes["stringTerminator"] != null)
                            {
                                bool term;
                                value.StringTerminator = bool.TryParse(node.Attributes["stringTerminator"].Value, out term) ? term : false;
                            }

                            if (node.Attributes["stringStarter"] != null)
                            {
                                bool term;
                                value.StringStarter = bool.TryParse(node.Attributes["stringStarter"].Value, out term) ? term : false;
                            }

                            if (node.Attributes["indentGroup"] != null)
                            {
                                value.IndentGroup = node.Attributes["indentGroup"].Value;
                            }

                            if (node.Attributes["indentGroupTerminator"] != null)
                            {
                                if (string.IsNullOrEmpty(value.IndentGroup))
                                {
                                    throw new ArgumentException(string.Format("Cannot have indentGroupTerminator without indentGroup. On token {0}.", value.Name));
                                }

                                bool term;
                                value.IndentGroupTerminator = bool.TryParse(node.Attributes["indentGroupTerminator"].Value, out term) ? term : false;
                            }

                            string bytes = prevBytes + key.ToString("X2");
                            value.Bytes = bytes;
                            StringToBytesTrie.AddData(value.Name, bytes);
                            BytesToStringTrie.AddData(HexHelper.GetByteArray(bytes), value);
                        }
                        List <string> myAlts = new List <string>();
                        if (node.HasChildNodes)
                        {
                            value.SubTokens = GetTokensFromNode(node.ChildNodes, out myAlts, prevBytes + key.ToString("X2"));
                        }
                        currentTokens.Add(key, value);
                        if (value.Name != null)
                        {
                            if (FlatTokens.ContainsKey(value.Name))
                            {
                                throw new AmbiguousTokenException(value.Name);
                            }
                            FlatTokens.Add(value.Name, value);
                            foreach (string alt in myAlts)
                            {
                                if (FlatTokens.ContainsKey(alt))
                                {
                                    throw new AmbiguousTokenException(string.Format("{0} alt: ({1})", value.Name, alt));
                                }
                                value.Alts.Add(alt);
                                FlatTokens.Add(alt, value);
                            }
                            myAlts.Clear();
                        }
                    }
                    else if (node.Name == "Alt")
                    {
                        string alt = node.Attributes["string"].Value;
                        StringToBytesTrie.AddData(alt, prevBytes);
                        alts.Add(alt);
                    }
                }
            }

            return(currentTokens);
        }