Ejemplo n.º 1
0
        public void Process(RegularNameNode root, int totalNodes)
        {
            // Rules
            // 1. Character index must be high enough so valueOffset is at least 1.
            //    That means the index is at least char value + 1
            // 2. After applying above, find lowest index where the child characters
            //    would fit (i.e. none of the character slots desired are already
            //    occupied in the map)
            // 3. If a node has a terminating child, any non-terminating children are
            //    strictly based upon the current node's valueOffet
            // 4. Terminating node takes the first available index, unless it's child of
            //    a node that contains regular child nodes, in which case it may appear
            //    in the first available index, relative to the index of the first regular
            //    child, or at the end of the currently used slots
            // 5. Each node assigns index to terminating child first, then regular children,
            //    and after that child nodes are processed in depth-first order
            // 6. Gaps are OK

            IsProcessed  = false;
            usedRangeMap = Enumerable.Repeat(false, totalNodes).ToList();
            minFreeSlot  = 0;
            maxFreeSlot  = 0;

            // Special case: root always occupies 0
            root.Index      = 0;
            usedRangeMap[0] = true;
            ++minFreeSlot;
            ++maxFreeSlot;

            ProcessNode(root);
            IsProcessed = true;
        }
        TerminalNameNode CreateTerminalNameNode(RegularNameNode parent, uint stringIndex)
        {
            var node = new TerminalNameNode();

            node.Parent    = parent;
            node.Character = 0;
            node.TailIndex = stringIndex;
            nodeCache.Add(node);
            parent.Children.Add(0, node);
            return(node);
        }
        void InsertStringToTree(string s, uint index)
        {
            byte[]          bytes    = Encoding.UTF8.GetBytes(s);
            RegularNameNode currNode = root;

            foreach (byte b in bytes)
            {
                currNode = GetOrCreateRegularNameNode(currNode, b);
            }
            CreateTerminalNameNode(currNode, index);
        }
 public void Generate()
 {
     // Can't be bothered to reset everything, so this is a one-time only operation
     if (IsGenerated)
     {
         throw new InvalidOperationException("Tree already generated.");
     }
     if (!IsFlat)
     {
         // 1. Sort strings
         strings.Sort((x, y) => string.CompareOrdinal(x, y));
         // 2. Make root
         root = GetOrCreateRegularNameNode(null, 0);
         // 3. Build the character tree
         for (uint i = 0; i < strings.Count; ++i)
         {
             InsertStringToTree(strings[(int)i], i);
         }
         // 4. Use encoder to fill out indexes and stuff
         encoder.Process(root, nodeCache.Count);
         // 5. Create the output arrays
         valueOffsets = new uint[encoder.TotalSlots];
         tree         = new uint[encoder.TotalSlots];
         tails        = new uint[strings.Count];
         foreach (var node in nodeCache)
         {
             tree[node.Index] = node.ParentIndex;
             var regularNode = node as RegularNameNode;
             if (regularNode != null)
             {
                 valueOffsets[regularNode.Index] = regularNode.ValueOffset;
             }
             else
             {
                 var termNode = node as TerminalNameNode;
                 tails[termNode.TailIndex]    = termNode.Index;
                 valueOffsets[termNode.Index] = termNode.TailIndex;
             }
         }
     }
     else
     {
         // Sort by length to reduce offset table size
         strings.Sort((x, y) => x.Length.CompareTo(y.Length));
     }
     // 6. Generate lookup
     for (uint i = 0; i < strings.Count; ++i)
     {
         stringLookup.Add(strings[(int)i], i);
     }
     IsGenerated = true;
 }
Ejemplo n.º 5
0
 void ProcessTerminalNode(TerminalNameNode terminalNode, RegularNameNode currNode, uint slot)
 {
     terminalNode.Index       = slot;
     terminalNode.ParentIndex = currNode.Index;
     currNode.ValueOffset     = terminalNode.Index;
     ExtendRangeMap(terminalNode.Index);
     usedRangeMap[(int)terminalNode.Index] = true;
     UpdateMinFreeSlot();
     // Just in case
     if (terminalNode.Index + 1 > maxFreeSlot)
     {
         maxFreeSlot = terminalNode.Index + 1;
     }
     if (OutputDebug)
     {
         Console.Write($"[{slot}] ");
     }
 }
        RegularNameNode GetOrCreateRegularNameNode(RegularNameNode parent, byte ch)
        {
            NameNode node;

            if (parent != null)
            {
                if (!parent.Children.TryGetValue(ch, out node))
                {
                    node           = new RegularNameNode();
                    node.Parent    = parent;
                    node.Character = ch;
                    nodeCache.Add(node);
                    parent.Children.Add(ch, node);
                }
            }
            else
            {
                node           = new RegularNameNode();
                node.Character = ch;
                nodeCache.Add(node);
            }
            return((RegularNameNode)node);
        }
Ejemplo n.º 7
0
        void ProcessNode(RegularNameNode currNode)
        {
            cachedRange.Clear();

            if (OutputDebug)
            {
                writer.Write($"{currNode.Index} {(char)currNode.Character} ");
            }

            TerminalNameNode terminalNode = currNode.Children.Select(x => x.Value)
                                            .FirstOrDefault(x => x is TerminalNameNode) as TerminalNameNode;
            bool terminalNodeProcessed = false;

            var children = currNode.Children.Select(x => x.Value).Where(x => x is RegularNameNode)
                           .Cast <RegularNameNode>().OrderBy(x => x.Character).ToArray();

            if (children.Length > 0)
            {
                uint minChildIndex = Math.Max(minFreeSlot, children[0].Character + 1u);
                uint minChildValue = children[0].Character;
                foreach (var child in children)
                {
                    cachedRange.Add(child.Character - minChildValue);
                }
                bool needExtending;
                minChildIndex = FindFreeRange(minChildIndex, cachedRange, out needExtending, false);

                if (terminalNode != null)
                {
                    // Check if we can place a terminal node before our current index
                    int potentialTerminalIndex = (int)minChildIndex - (int)minChildValue;
                    if (potentialTerminalIndex < 0 || usedRangeMap[potentialTerminalIndex])
                    {
                        try
                        {
                            // Occupied, try basing children on terminating node
                            // This throws if it won't work
                            FindFreeRange(minFreeSlot + minChildValue, cachedRange, out needExtending, true);
                            ProcessTerminalNode(terminalNode, currNode, minFreeSlot);
                            minChildIndex = terminalNode.Index + minChildValue;
                        }
                        catch (Exception)
                        {
                            // Just stick it at the end
                            ProcessTerminalNode(terminalNode, currNode, maxFreeSlot);
                            minChildIndex = terminalNode.Index + minChildValue;
                            // Validate
                            FindFreeRange(minChildIndex, cachedRange, out needExtending, true);
                        }
                    }
                    else
                    {
                        // Put terminating node down below
                        ProcessTerminalNode(terminalNode, currNode, (uint)potentialTerminalIndex);
                    }
                    terminalNodeProcessed = true;
                }


                if (needExtending)
                {
                    ExtendRangeMap(minChildIndex + cachedRange[cachedRange.Count - 1]);
                }

                for (int i = 0; i < children.Length; ++i)
                {
                    var child = children[i];
                    child.Index       = minChildIndex + cachedRange[i];
                    child.ParentIndex = currNode.Index;
                    usedRangeMap[(int)child.Index] = true;
                }

                uint lastChildIndexFree = children[children.Length - 1].Index + 1;
                if (lastChildIndexFree > maxFreeSlot)
                {
                    maxFreeSlot = lastChildIndexFree;
                }

                if (OutputDebug)
                {
                    if (children.Length == 1)
                    {
                        writer.Write($"<{children[0].Index}>");
                    }
                    else
                    {
                        writer.Write($"<{children[0].Index} {children[children.Length - 1].Index}>");
                    }
                }

                if (terminalNode == null)
                {
                    currNode.ValueOffset = children[0].Index - children[0].Character;
                }
                UpdateMinFreeSlot();

                if (OutputDebug)
                {
                    writer.WriteLine();
                    ++writer.Indent;
                }

                foreach (var child in children)
                {
                    ProcessNode(child);
                }

                if (OutputDebug)
                {
                    --writer.Indent;
                }
            }

            if (terminalNode != null && !terminalNodeProcessed)
            {
                ProcessTerminalNode(terminalNode, currNode, minFreeSlot);
                if (OutputDebug)
                {
                    writer.WriteLine();
                }
            }
        }