示例#1
0
        private static void AddToTrie(SYMBMaskEntry *trie, SYMBMaskHeader *head, int id, SYMBHeader *table)
        {
            //If list is empty add the node and quit
            if (head->_numEntries == 0)
            {
                trie[head->_numEntries++] = new SYMBMaskEntry(1, -1, -1, -1, id, 0);
                return;
            }

            string value = table->GetStringEntry(id);

            //String.IsNullOrEmpty(value)
            if ((value ?? "") == "")
            {
                throw new ArgumentException("String is null or whitespace");
            }

            SYMBMaskEntry search = trie[head->_rootId];
            List <int>    path   = new List <int>
            {
                head->_rootId
            };

            //Find the string that matches the current string in the trie.  Needs to be done in order to determine where the important bit is in the string
            while (search._flags == 0)
            {
                //Assume that strings are treated as having an infinite number of null chars following them
                if (search._bit / 8 >= value.Length)
                {
                    path.Add(search._leftId);
                    search = trie[search._leftId];
                    continue;
                }

                // _leftId corresponds to bit=0, _rightId corresponds to bit=1
                if (CheckBit(value, search._bit))
                {
                    path.Add(search._rightId);
                    search = trie[search._rightId];
                }
                else
                {
                    path.Add(search._leftId);
                    search = trie[search._leftId];
                }
            }

            string searchVal = table->GetStringEntry(search._stringId);

            //Can't add duplicate strings
            if (searchVal == value)
            {
                throw new ArgumentException("Duplicate string");
            }

            bool  mismatch  = false;
            int   minLength = Math.Min(searchVal.Length, value.Length);
            short bit       = 0;

            //Locate mismatching character between the two strings
            for (short i = 0; i < minLength; i++)
            {
                if (value[i] != searchVal[i])
                {
                    mismatch = true;
                    bit      = (short)(8 * i);
                    break;
                }
            }

            bool right;

            //If a char was different one string does not contain the other
            if (mismatch)
            {
                //Find where the bits differed
                int cmpint = value[bit / 8] ^ searchVal[bit / 8];
                bit += clz8[cmpint];

                //If the bit is 1 the string being added takes the left fork
                right = CheckBit(value, bit);

                if (head->_numEntries == 1)
                {
                    trie[1]           = new SYMBMaskEntry(1, -1, -1, -1, id, 1);
                    trie[2]           = new SYMBMaskEntry(0, bit, right ? 0 : 1, right ? 1 : 0, -1, -1);
                    head->_numEntries = 3;
                    head->_rootId     = 2;
                    return;
                }

                //If the mismatch bit is lower than the first mismatch bit the new branch will be the root of the tree
                if (bit < trie[path[0]]._bit)
                {
                    trie[head->_numEntries++] = new SYMBMaskEntry(1, -1, -1, -1, id, head->_numEntries / 2);

                    if (right)
                    {
                        trie[head->_numEntries++] = new SYMBMaskEntry(0, bit, path[0], head->_numEntries - 2, -1, -1);
                    }
                    else
                    {
                        trie[head->_numEntries++] = new SYMBMaskEntry(0, bit, head->_numEntries - 2, path[0], -1, -1);
                    }

                    head->_rootId = head->_numEntries - 1;
                    return;
                }

                //Locate where the branch needs to be inserted
                for (int i = 1; i < path.Count; i++)
                {
                    if (trie[path[i]]._bit > bit || trie[path[i]]._flags == 1)
                    {
                        //Add leaf
                        trie[head->_numEntries++] = new SYMBMaskEntry(1, -1, -1, -1, id, head->_numEntries / 2);

                        //Remap previous branch to point to new branch
                        if (trie[path[i - 1]]._leftId == path[i])
                        {
                            trie[path[i - 1]]._leftId = head->_numEntries;
                        }
                        else
                        {
                            trie[path[i - 1]]._rightId = head->_numEntries;
                        }

                        //Create new branch
                        if (right)
                        {
                            trie[head->_numEntries++] =
                                new SYMBMaskEntry(0, bit, path[i], head->_numEntries - 2, -1, -1);
                        }
                        else
                        {
                            trie[head->_numEntries++] =
                                new SYMBMaskEntry(0, bit, head->_numEntries - 2, path[i], -1, -1);
                        }

                        return;
                    }
                }

                //This should never happen
                throw new Exception("Error building tree, unexpected structure");
            }

            //Since mismatch is false, one string is a substring of the other

            //The longer string is the one that takes the left branch
            right = value.Length > searchVal.Length;
            bit   = (short)(minLength * 8);

            if (right)
            {
                //Find the first bit after the substring that's 1.  Will always occur in the first 8 bits because 0x00 denotes string termination and thus isn't in value
                bit += clz8[value[bit / 8]];

                //If path.Count == 1 the only value is a leaf
                if (path.Count == 1)
                {
                    trie[1]           = new SYMBMaskEntry(1, -1, -1, -1, id, 1);
                    trie[2]           = new SYMBMaskEntry(0, bit, 0, 1, -1, -1);
                    head->_numEntries = 3;
                    head->_rootId     = 2;
                    return;
                }

                //Update old branch, insert new branch and node, and quit.  trie[path[path.Count-2]] is the last branch that was a comparison.
                trie[head->_numEntries++] = new SYMBMaskEntry(1, -1, -1, -1, id, head->_numEntries / 2);

                int trace = path.Count - 2;

                if (trie[path[trace]]._leftId == path[trace + 1])
                {
                    //Handling an extremely specific and annoying edge case
                    while (trie[path[trace]]._bit > bit)
                    {
                        trace--;
                        if (trace < 0)
                        {
                            //This node is actually the root of the tree
                            trie[head->_numEntries++] =
                                new SYMBMaskEntry(0, bit, path[0], head->_numEntries - 2, -1, -1);
                            head->_rootId = head->_numEntries - 2;
                            return;
                        }
                    }

                    trie[path[trace]]._leftId = head->_numEntries;
                }
                else
                {
                    trie[path[trace]]._rightId = head->_numEntries;
                }

                trie[head->_numEntries++] = new SYMBMaskEntry(0, bit, path[trace + 1], head->_numEntries - 2, -1, -1);
                return;
            }

            //Find first bit comparison that happens after the substring ends
            int index;

            for (index = 0; trie[path[index]]._flags == 0 && trie[path[index]]._bit <= bit; index++)
            {
            }

            //Find the first bit that's 1 and isn't already used in the trie
            int  cmpVal = searchVal[bit / 8];
            byte clzVal;
            bool test = trie[path[index]]._flags == 0;

            while (true)
            {
                clzVal = clz8[cmpVal];
                if (clzVal == 8)
                {
                    bit   += 8;
                    cmpVal = searchVal[bit / 8];
                    continue;
                }

                if (test && trie[path[index]]._bit <= bit + clzVal)
                {
                    if (trie[path[index]]._bit == bit + clzVal)
                    {
                        cmpVal ^= (1 << 7) >> clzVal;
                    }

                    test = trie[path[++index]]._flags == 0;
                    continue;
                }

                bit += clzVal;
                break;
            }

            //If the trie is a single leaf the new branch is the root of the trie
            if (head->_numEntries == 1)
            {
                trie[1]           = new SYMBMaskEntry(1, -1, -1, -1, id, 1);
                trie[2]           = new SYMBMaskEntry(0, bit, 1, 0, -1, -1);
                head->_numEntries = 3;
                head->_rootId     = 2;
                return;
            }

            //Update old branch, insert new branch and node, and quit
            trie[head->_numEntries++] = new SYMBMaskEntry(1, -1, -1, -1, id, head->_numEntries / 2);

            if (trie[path[index - 1]]._leftId == path[index])
            {
                trie[path[index - 1]]._leftId = head->_numEntries;
            }
            else
            {
                trie[path[index - 1]]._rightId = head->_numEntries;
            }

            trie[head->_numEntries++] = new SYMBMaskEntry(0, bit, head->_numEntries - 2, path[index], -1, -1);

            return;
        }
示例#2
0
        //Code written by Mawootad
        public static void Build(int[] indices, SYMBHeader *header, SYMBMaskHeader *maskHeader, SYMBMaskEntry *entries)
        {
            //initialization
            maskHeader->_rootId     = 0;
            maskHeader->_numEntries = 0;

            //Loop over indicies and add them.  This seems to be roughly how the file is normally built, as it has the same resulting leaf-node-leaf-node pattern
            foreach (int id in indices)
            {
                AddToTrie(entries, maskHeader, id, header);
            }
        }