/// <summary> /// Converts an array of bytes to a string of tokenizeable words. /// </summary> /// <param name="bytes">The bytes to convert.</param> /// <returns>The array as a string of tokenizeable words.</returns> public string Detokenize(byte[] bytes) { if (bytes == null || bytes.Length == 0) { return(""); } Dictionary <byte, TokenData.TokenDictionaryEntry> tokens = Tokens; StringBuilder text = new StringBuilder(bytes.Length); int i = 0; while (i < bytes.Length) { byte[] outBytes; TokenDictionaryEntry entry; if (BytesToStringTrie.LongestSubstringMatch(bytes, i, out entry, out outBytes)) { text.Append(entry.Name); i += outBytes.Length; } else { text.AppendFormat("{{UNKNOWN TOKEN [{0}]}}", bytes[i]); i++; } } return(text.ToString()); }
private Dictionary <byte, TokenDictionaryEntry> GetTokensFromNode(XmlNodeList nodes, out List <string> alts, string prevBytes = "") { Dictionary <byte, TokenDictionaryEntry> currentTokens = new Dictionary <byte, TokenDictionaryEntry>(); alts = new List <string>(); foreach (XmlNode node in nodes) { //alts = new List<string>(); if (node.NodeType != XmlNodeType.Comment) { if (node.Name == "Token") { byte key = getByteFromXml(node); TokenDictionaryEntry value = new TokenDictionaryEntry() { Byte = key }; if (node.Attributes["string"] != null) { value.Name = node.Attributes["string"].Value; if (value.Name == "\\n") { value.Name = "\n"; } if (node.Attributes["comment"] != null) { string comment = Regex.Unescape(node.Attributes["comment"].Value); value.Comment = comment; if (!Comments.ContainsKey(value.Name)) { Comments.Add(value.Name, comment); } } if (node.Attributes["site"] != null) { string site = node.Attributes["site"].Value; value.Site = site; if (!Sites.ContainsKey(value.Name)) { Sites.Add(value.Name, site); } } if (node.Attributes["group"] != null) { value.Group = node.Attributes["group"].Value; if (!GroupNames.Contains(value.Group)) { GroupNames.Add(value.Group); } } else { value.Group = "_default"; } if (!Groups.ContainsKey(value.Name)) { Groups.Add(value.Name, value.Group); } if (node.Attributes["style"] != null) { value.StyleType = node.Attributes["style"].Value; } if (node.Attributes["stringTerminator"] != null) { bool term; value.StringTerminator = bool.TryParse(node.Attributes["stringTerminator"].Value, out term) ? term : false; } if (node.Attributes["stringStarter"] != null) { bool term; value.StringStarter = bool.TryParse(node.Attributes["stringStarter"].Value, out term) ? term : false; } if (node.Attributes["indentGroup"] != null) { value.IndentGroup = node.Attributes["indentGroup"].Value; } if (node.Attributes["indentGroupTerminator"] != null) { if (string.IsNullOrEmpty(value.IndentGroup)) { throw new ArgumentException(string.Format("Cannot have indentGroupTerminator without indentGroup. On token {0}.", value.Name)); } bool term; value.IndentGroupTerminator = bool.TryParse(node.Attributes["indentGroupTerminator"].Value, out term) ? term : false; } string bytes = prevBytes + key.ToString("X2"); value.Bytes = bytes; StringToBytesTrie.AddData(value.Name, bytes); BytesToStringTrie.AddData(HexHelper.GetByteArray(bytes), value); } List <string> myAlts = new List <string>(); if (node.HasChildNodes) { value.SubTokens = GetTokensFromNode(node.ChildNodes, out myAlts, prevBytes + key.ToString("X2")); } currentTokens.Add(key, value); if (value.Name != null) { if (FlatTokens.ContainsKey(value.Name)) { throw new AmbiguousTokenException(value.Name); } FlatTokens.Add(value.Name, value); foreach (string alt in myAlts) { if (FlatTokens.ContainsKey(alt)) { throw new AmbiguousTokenException(string.Format("{0} alt: ({1})", value.Name, alt)); } value.Alts.Add(alt); FlatTokens.Add(alt, value); } myAlts.Clear(); } } else if (node.Name == "Alt") { string alt = node.Attributes["string"].Value; StringToBytesTrie.AddData(alt, prevBytes); alts.Add(alt); } } } return(currentTokens); }