/// <summary> /// Adds the rest of the word to the dfa. /// </summary> /// <param name="parent">The state to attack the suffix to.</param> /// <param name="word">The current word.</param> /// <param name="wordPos">The index of the first letter of the suffix.</param> private void AddSuffix(State parent, WordWithValue word, int wordPos) { var states = new Stack<State>(); states.Push(parent); // Create new States. var i = wordPos; while (i < _wordLength - 1) { var prevState = states.Peek(); var newState = new State(_alphabetSize); prevState.CreateOutTransition(newState, word.Word[i]); states.Push(newState); i += 1; } // Connect to final State. var final = _stateManager.GetOrCreateFinalState(word.Value); states.Peek().CreateOutTransition(final, word.Word[_wordLength - 1]); // Merge new States with unique States. i -= 1; MergeStates(word.Word, i, states); }
private static void CheckWord(Classifier classifier, WordWithValue wordWithValue) { Assert.AreEqual(wordWithValue.Value, classifier.Classify(wordWithValue.Word)); }
/// <summary> /// Adds a word to the dfa and minimizes the dfa. /// </summary> private void AddWord(WordWithValue word) { if (HasBeenAdded(word.Word)) { return; } var curUnique = _stateManager.StartingState; var i = 0; var monofluentStates = new Stack<State>(); // We need the previous state for each state. monofluentStates.Push(curUnique); // Traverse common prefix before the first confluence state. while (i < _wordLength - 1 && curUnique.HasTransition(word.Word[i]) && !curUnique.Advance(word.Word[i]).IsConfluenceState) { curUnique = curUnique.Advance(word.Word[i]); monofluentStates.Push(curUnique); i += 1; } // Here curUnique points to the last State before the first confluence State, // or the last state in the common prefix if there is no confluence state. // Here i is the index of the character of the transition away from curUnique. if (curUnique != _stateManager.StartingState) { // This state will have an outgoing transition changed. // It has to be reinserted. // Have to remove it before changing because the transitions are the dictionary key. _stateManager.RemoveUniqueState(curUnique, GetHeight(i - 1)); } // Clone rest of common prefix. var lastAdded = curUnique; var clones = new Stack<State>(); // We need the previous state for each clone. clones.Push(lastAdded); while (i < _wordLength - 1 && curUnique.HasTransition(word.Word[i])) { curUnique = curUnique.Advance(word.Word[i]); var clone = curUnique.Clone(_alphabetSize); lastAdded.RedirectOutTransition(clone, word.Word[i]); lastAdded = clone; clones.Push(clone); i += 1; } // Here curUnique points to the last state in the common prefix if continued on the unique path. // Here lastAdded points to the last state in the common prefix if continued on the cloned path. // Here i is the index of the character of the transition away from lastAdded. // Add the rest of the word to the dfa. AddSuffix(lastAdded, word, i); // Merge clones into the state machine. i -= 1; i = MergeStates(word.Word, i, clones); // Fix monofluent States: // remove them from the dfa and either insert them back in or merge them with an equivalent state. while (monofluentStates.Count > 1) { var curState = monofluentStates.Pop(); var prevState = monofluentStates.Peek(); var uniqueState = _stateManager.TryGetEquivalentUniqueState(curState, GetHeight(i)); if (uniqueState == null) { _stateManager.InsertUniqueState(curState, GetHeight(i)); break; } if (prevState != _stateManager.StartingState) { _stateManager.RemoveUniqueState(prevState, GetHeight(i - 1)); } prevState.RedirectOutTransition(uniqueState, word.Word[i]); i -= 1; } }