private byte getByteFromXml(XmlNode node) { string val = node.Attributes["byte"].Value; byte key; // starting with an "@" means it's a reference value if (val.StartsWith("@")) { key = FlatTokens[val.Substring(1)].Byte; } else if (!byte.TryParse(val, out key)) { key = (HexHelper.GetByteArray(val, 1))[0]; } return(key); }
/// <summary> /// Tokenizes a given string. /// </summary> /// <param name="data">The string to convert to tokes.</param> /// <param name="numTokens">The number of tokens in this string.</param> /// <returns>An array of tokens as bytes.</returns> public byte[] Tokenize(string data, out int numTokens, out List <List <TokenData.TokenDictionaryEntry> > tokens, bool exceptionOnUnknownToken) { StringBuilder bytes = new StringBuilder(); int i = 0; while (TrimStart && i < data.Length && (data[i] == ' ' || data[i] == '\t')) { i++; } char[] rawData = data.ToArray(); numTokens = 0; int lineNumber = 0; tokens = new List <List <TokenDictionaryEntry> >(); tokens.Add(new List <TokenDictionaryEntry>()); while (i < rawData.Length) { string foundData = ""; char[] match = null; bool found = false; if (rawData[i] != '\\') { found = StringToBytesTrie.LongestSubstringMatch(rawData, i, out foundData, out match); } else { i++; if (i == rawData.Length) { break; } if (rawData[i] != '\r') { match = new[] { rawData[i] }; found = StringToBytesTrie.GetData(match, out foundData); } } if (!found) { if (rawData[i] == '\r') { i++; } else { if (exceptionOnUnknownToken) { throw new TokenizationException(foundData, i - 1); } tokens[lineNumber].Add(new TokenDictionaryEntry() { Name = foundData, StyleType = "Error" }); if (!foundData.StartsWith("\\")) { i++; } } } else { string token = new string(match); i += token.Length; numTokens++; bytes.Append(foundData); if (token != "\n") { tokens[lineNumber].Add(FlatTokens[token]); } else { lineNumber++; tokens.Add(new List <TokenDictionaryEntry>()); while (TrimStart && i < data.Length && (data[i] == ' ' || data[i] == '\t')) { i++; } } } } return(HexHelper.GetByteArray(bytes.ToString())); }
private Dictionary <byte, TokenDictionaryEntry> GetTokensFromNode(XmlNodeList nodes, out List <string> alts, string prevBytes = "") { Dictionary <byte, TokenDictionaryEntry> currentTokens = new Dictionary <byte, TokenDictionaryEntry>(); alts = new List <string>(); foreach (XmlNode node in nodes) { //alts = new List<string>(); if (node.NodeType != XmlNodeType.Comment) { if (node.Name == "Token") { byte key = getByteFromXml(node); TokenDictionaryEntry value = new TokenDictionaryEntry() { Byte = key }; if (node.Attributes["string"] != null) { value.Name = node.Attributes["string"].Value; if (value.Name == "\\n") { value.Name = "\n"; } if (node.Attributes["comment"] != null) { string comment = Regex.Unescape(node.Attributes["comment"].Value); value.Comment = comment; if (!Comments.ContainsKey(value.Name)) { Comments.Add(value.Name, comment); } } if (node.Attributes["site"] != null) { string site = node.Attributes["site"].Value; value.Site = site; if (!Sites.ContainsKey(value.Name)) { Sites.Add(value.Name, site); } } if (node.Attributes["group"] != null) { value.Group = node.Attributes["group"].Value; if (!GroupNames.Contains(value.Group)) { GroupNames.Add(value.Group); } } else { value.Group = "_default"; } if (!Groups.ContainsKey(value.Name)) { Groups.Add(value.Name, value.Group); } if (node.Attributes["style"] != null) { value.StyleType = node.Attributes["style"].Value; } if (node.Attributes["stringTerminator"] != null) { bool term; value.StringTerminator = bool.TryParse(node.Attributes["stringTerminator"].Value, out term) ? term : false; } if (node.Attributes["stringStarter"] != null) { bool term; value.StringStarter = bool.TryParse(node.Attributes["stringStarter"].Value, out term) ? term : false; } if (node.Attributes["indentGroup"] != null) { value.IndentGroup = node.Attributes["indentGroup"].Value; } if (node.Attributes["indentGroupTerminator"] != null) { if (string.IsNullOrEmpty(value.IndentGroup)) { throw new ArgumentException(string.Format("Cannot have indentGroupTerminator without indentGroup. On token {0}.", value.Name)); } bool term; value.IndentGroupTerminator = bool.TryParse(node.Attributes["indentGroupTerminator"].Value, out term) ? term : false; } string bytes = prevBytes + key.ToString("X2"); value.Bytes = bytes; StringToBytesTrie.AddData(value.Name, bytes); BytesToStringTrie.AddData(HexHelper.GetByteArray(bytes), value); } List <string> myAlts = new List <string>(); if (node.HasChildNodes) { value.SubTokens = GetTokensFromNode(node.ChildNodes, out myAlts, prevBytes + key.ToString("X2")); } currentTokens.Add(key, value); if (value.Name != null) { if (FlatTokens.ContainsKey(value.Name)) { throw new AmbiguousTokenException(value.Name); } FlatTokens.Add(value.Name, value); foreach (string alt in myAlts) { if (FlatTokens.ContainsKey(alt)) { throw new AmbiguousTokenException(string.Format("{0} alt: ({1})", value.Name, alt)); } value.Alts.Add(alt); FlatTokens.Add(alt, value); } myAlts.Clear(); } } else if (node.Name == "Alt") { string alt = node.Attributes["string"].Value; StringToBytesTrie.AddData(alt, prevBytes); alts.Add(alt); } } } return(currentTokens); }