Exemple #1
0
        public void Insert(string word)
        {
            if (string.Compare(word, _previousWord, StringComparison.OrdinalIgnoreCase) <= 0)
            {
                throw new Exception($"We expect the words to be sorted. But we received {_previousWord} followed by {word}");
            }

            int commonPrefix;

            for (commonPrefix = 0; commonPrefix < Math.Min(word.Length, _previousWord.Length); commonPrefix++)
            {
                if (word[commonPrefix] != _previousWord[commonPrefix])
                {
                    break;
                }
            }

            Minimize(commonPrefix);

            var node = _uncheckedNodes.Count == 0 ? Root : _uncheckedNodes.Peek().Item3;

            foreach (var letter in word.Skip(commonPrefix))
            {
                var nextNode = new DawgNode();
                node.Children[letter] = nextNode;
                _uncheckedNodes.Push(new Tuple <DawgNode, char, DawgNode>(node, letter, nextNode));
                node = nextNode;
            }

            node.TerminalNode = true;
            _previousWord     = word;
        }
Exemple #2
0
        internal Dawg(IEnumerable <string> words)
        {
            var builder = new DawgBuilder();

            foreach (var word in words)
            {
                builder.Insert(word);
            }

            var root = builder.Finish();

            var             allNodes = new List <DawgNode>();
            var             allChars = new HashSet <char>();
            int             low = 0, high = 0;
            var             totalChildCount = root.Traversal(ref low, ref high, allNodes, allChars);
            Func <int, int> realIndex       = x => - low + (x < 0 ? x : x - 1);

            Characters      = allChars.OrderBy(character => character).ToArray();
            Edges           = new int[totalChildCount];
            EdgeCharacter   = new ushort[totalChildCount];
            FirstChildIndex = new int[high - low];
            RootNodeIndex   = realIndex(root.Id);
            TerminalCount   = -low;

            var characterIndex = Characters.Select((character, i) => new KeyValuePair <char, ushort>(character, (ushort)i)).ToDictionary(x => x.Key, x => x.Value);

            var orderedNodes = new DawgNode[allNodes.Count];

            foreach (var node in allNodes)
            {
                orderedNodes[realIndex(node.Id)] = node;
            }

            var edgeIndex = 0;

            foreach (var node in orderedNodes)
            {
                FirstChildIndex[realIndex(node.Id)] = edgeIndex;
                foreach (var child in node.SortedChildren)
                {
                    Edges[edgeIndex]         = realIndex(child.Value.Id);
                    EdgeCharacter[edgeIndex] = characterIndex[child.Key];
                    edgeIndex++;
                }
            }
        }