public void Insert(string word) { if (string.Compare(word, _previousWord, StringComparison.OrdinalIgnoreCase) <= 0) { throw new Exception($"We expect the words to be sorted. But we received {_previousWord} followed by {word}"); } int commonPrefix; for (commonPrefix = 0; commonPrefix < Math.Min(word.Length, _previousWord.Length); commonPrefix++) { if (word[commonPrefix] != _previousWord[commonPrefix]) { break; } } Minimize(commonPrefix); var node = _uncheckedNodes.Count == 0 ? Root : _uncheckedNodes.Peek().Item3; foreach (var letter in word.Skip(commonPrefix)) { var nextNode = new DawgNode(); node.Children[letter] = nextNode; _uncheckedNodes.Push(new Tuple <DawgNode, char, DawgNode>(node, letter, nextNode)); node = nextNode; } node.TerminalNode = true; _previousWord = word; }
internal Dawg(IEnumerable <string> words) { var builder = new DawgBuilder(); foreach (var word in words) { builder.Insert(word); } var root = builder.Finish(); var allNodes = new List <DawgNode>(); var allChars = new HashSet <char>(); int low = 0, high = 0; var totalChildCount = root.Traversal(ref low, ref high, allNodes, allChars); Func <int, int> realIndex = x => - low + (x < 0 ? x : x - 1); Characters = allChars.OrderBy(character => character).ToArray(); Edges = new int[totalChildCount]; EdgeCharacter = new ushort[totalChildCount]; FirstChildIndex = new int[high - low]; RootNodeIndex = realIndex(root.Id); TerminalCount = -low; var characterIndex = Characters.Select((character, i) => new KeyValuePair <char, ushort>(character, (ushort)i)).ToDictionary(x => x.Key, x => x.Value); var orderedNodes = new DawgNode[allNodes.Count]; foreach (var node in allNodes) { orderedNodes[realIndex(node.Id)] = node; } var edgeIndex = 0; foreach (var node in orderedNodes) { FirstChildIndex[realIndex(node.Id)] = edgeIndex; foreach (var child in node.SortedChildren) { Edges[edgeIndex] = realIndex(child.Value.Id); EdgeCharacter[edgeIndex] = characterIndex[child.Key]; edgeIndex++; } } }