public void Process(RegularNameNode root, int totalNodes) { // Rules // 1. Character index must be high enough so valueOffset is at least 1. // That means the index is at least char value + 1 // 2. After applying above, find lowest index where the child characters // would fit (i.e. none of the character slots desired are already // occupied in the map) // 3. If a node has a terminating child, any non-terminating children are // strictly based upon the current node's valueOffet // 4. Terminating node takes the first available index, unless it's child of // a node that contains regular child nodes, in which case it may appear // in the first available index, relative to the index of the first regular // child, or at the end of the currently used slots // 5. Each node assigns index to terminating child first, then regular children, // and after that child nodes are processed in depth-first order // 6. Gaps are OK IsProcessed = false; usedRangeMap = Enumerable.Repeat(false, totalNodes).ToList(); minFreeSlot = 0; maxFreeSlot = 0; // Special case: root always occupies 0 root.Index = 0; usedRangeMap[0] = true; ++minFreeSlot; ++maxFreeSlot; ProcessNode(root); IsProcessed = true; }
TerminalNameNode CreateTerminalNameNode(RegularNameNode parent, uint stringIndex) { var node = new TerminalNameNode(); node.Parent = parent; node.Character = 0; node.TailIndex = stringIndex; nodeCache.Add(node); parent.Children.Add(0, node); return(node); }
void InsertStringToTree(string s, uint index) { byte[] bytes = Encoding.UTF8.GetBytes(s); RegularNameNode currNode = root; foreach (byte b in bytes) { currNode = GetOrCreateRegularNameNode(currNode, b); } CreateTerminalNameNode(currNode, index); }
public void Generate() { // Can't be bothered to reset everything, so this is a one-time only operation if (IsGenerated) { throw new InvalidOperationException("Tree already generated."); } if (!IsFlat) { // 1. Sort strings strings.Sort((x, y) => string.CompareOrdinal(x, y)); // 2. Make root root = GetOrCreateRegularNameNode(null, 0); // 3. Build the character tree for (uint i = 0; i < strings.Count; ++i) { InsertStringToTree(strings[(int)i], i); } // 4. Use encoder to fill out indexes and stuff encoder.Process(root, nodeCache.Count); // 5. Create the output arrays valueOffsets = new uint[encoder.TotalSlots]; tree = new uint[encoder.TotalSlots]; tails = new uint[strings.Count]; foreach (var node in nodeCache) { tree[node.Index] = node.ParentIndex; var regularNode = node as RegularNameNode; if (regularNode != null) { valueOffsets[regularNode.Index] = regularNode.ValueOffset; } else { var termNode = node as TerminalNameNode; tails[termNode.TailIndex] = termNode.Index; valueOffsets[termNode.Index] = termNode.TailIndex; } } } else { // Sort by length to reduce offset table size strings.Sort((x, y) => x.Length.CompareTo(y.Length)); } // 6. Generate lookup for (uint i = 0; i < strings.Count; ++i) { stringLookup.Add(strings[(int)i], i); } IsGenerated = true; }
void ProcessTerminalNode(TerminalNameNode terminalNode, RegularNameNode currNode, uint slot) { terminalNode.Index = slot; terminalNode.ParentIndex = currNode.Index; currNode.ValueOffset = terminalNode.Index; ExtendRangeMap(terminalNode.Index); usedRangeMap[(int)terminalNode.Index] = true; UpdateMinFreeSlot(); // Just in case if (terminalNode.Index + 1 > maxFreeSlot) { maxFreeSlot = terminalNode.Index + 1; } if (OutputDebug) { Console.Write($"[{slot}] "); } }
RegularNameNode GetOrCreateRegularNameNode(RegularNameNode parent, byte ch) { NameNode node; if (parent != null) { if (!parent.Children.TryGetValue(ch, out node)) { node = new RegularNameNode(); node.Parent = parent; node.Character = ch; nodeCache.Add(node); parent.Children.Add(ch, node); } } else { node = new RegularNameNode(); node.Character = ch; nodeCache.Add(node); } return((RegularNameNode)node); }
void ProcessNode(RegularNameNode currNode) { cachedRange.Clear(); if (OutputDebug) { writer.Write($"{currNode.Index} {(char)currNode.Character} "); } TerminalNameNode terminalNode = currNode.Children.Select(x => x.Value) .FirstOrDefault(x => x is TerminalNameNode) as TerminalNameNode; bool terminalNodeProcessed = false; var children = currNode.Children.Select(x => x.Value).Where(x => x is RegularNameNode) .Cast <RegularNameNode>().OrderBy(x => x.Character).ToArray(); if (children.Length > 0) { uint minChildIndex = Math.Max(minFreeSlot, children[0].Character + 1u); uint minChildValue = children[0].Character; foreach (var child in children) { cachedRange.Add(child.Character - minChildValue); } bool needExtending; minChildIndex = FindFreeRange(minChildIndex, cachedRange, out needExtending, false); if (terminalNode != null) { // Check if we can place a terminal node before our current index int potentialTerminalIndex = (int)minChildIndex - (int)minChildValue; if (potentialTerminalIndex < 0 || usedRangeMap[potentialTerminalIndex]) { try { // Occupied, try basing children on terminating node // This throws if it won't work FindFreeRange(minFreeSlot + minChildValue, cachedRange, out needExtending, true); ProcessTerminalNode(terminalNode, currNode, minFreeSlot); minChildIndex = terminalNode.Index + minChildValue; } catch (Exception) { // Just stick it at the end ProcessTerminalNode(terminalNode, currNode, maxFreeSlot); minChildIndex = terminalNode.Index + minChildValue; // Validate FindFreeRange(minChildIndex, cachedRange, out needExtending, true); } } else { // Put terminating node down below ProcessTerminalNode(terminalNode, currNode, (uint)potentialTerminalIndex); } terminalNodeProcessed = true; } if (needExtending) { ExtendRangeMap(minChildIndex + cachedRange[cachedRange.Count - 1]); } for (int i = 0; i < children.Length; ++i) { var child = children[i]; child.Index = minChildIndex + cachedRange[i]; child.ParentIndex = currNode.Index; usedRangeMap[(int)child.Index] = true; } uint lastChildIndexFree = children[children.Length - 1].Index + 1; if (lastChildIndexFree > maxFreeSlot) { maxFreeSlot = lastChildIndexFree; } if (OutputDebug) { if (children.Length == 1) { writer.Write($"<{children[0].Index}>"); } else { writer.Write($"<{children[0].Index} {children[children.Length - 1].Index}>"); } } if (terminalNode == null) { currNode.ValueOffset = children[0].Index - children[0].Character; } UpdateMinFreeSlot(); if (OutputDebug) { writer.WriteLine(); ++writer.Indent; } foreach (var child in children) { ProcessNode(child); } if (OutputDebug) { --writer.Indent; } } if (terminalNode != null && !terminalNodeProcessed) { ProcessTerminalNode(terminalNode, currNode, minFreeSlot); if (OutputDebug) { writer.WriteLine(); } } }