Universal deterministic Levenshtein automaton for Damerau-Levenshtein distance
Exemplo n.º 1
0
		/// <summary>
		/// Basic Schulz and Mihov algoritm
		/// </summary>
		/// <returns>The correction nodes.</returns>
		/// <param name="typo">Typo.</param>
		/// <param name="start">Start.</param>
		/// <param name="editDistance">Edit distance.</param>
		/// <param name="includeOnlyWords">If set to <c>true</c> include only words.</param>
		IList<TrieNode> GetCorrectionNodes(string typo, TrieNode start, int editDistance, bool includeOnlyWords = true) {
			var corrections = new List<TrieNode> ();

			if (string.IsNullOrEmpty (typo)) {
				return corrections;
			}
			LevTAutomataImitation automata = new LevTAutomataImitation (typo, editDistance);

			Stack<SpellCheckerState> stack = new Stack<SpellCheckerState> ();
			stack.Push (new SpellCheckerState () {
				Node = start,
				AutomataState = 0,
				AutomataOffset = 0,
			});

			while (stack.Count > 0) {
				SpellCheckerState state = stack.Pop();

				automata.LoadState (state.AutomataState, state.AutomataOffset);
				AutomatonState nextZeroState = automata.GetNextState (0);

				foreach (char c in state.Node.Children.Keys) {
					AutomatonState nextState = null;

					if ((state.AutomataOffset < typo.Length && typo[state.AutomataOffset] == c)
					    || (state.AutomataOffset < typo.Length - 1 && typo[state.AutomataOffset + 1] == c)
					    || (state.AutomataOffset < typo.Length - 2 && typo[state.AutomataOffset + 2] == c)) {
						nextState = automata.GetNextState (automata.GetCharacteristicVector(c, state.AutomataOffset));
					} else {
						nextState = nextZeroState;
					}

					if (nextState != null) {
						TrieNode nextNode = state.Node.Children [c];
						if (nextNode.Children.Count > 0) {
							stack.Push (new SpellCheckerState () {
								Node = nextNode,
								AutomataState = nextState.State,
								AutomataOffset = nextState.Offset
							});
						}
						if ((nextNode.IsWord || !includeOnlyWords) && automata.IsAcceptState (nextState.State, nextState.Offset)) {
							corrections.Add (nextNode);
						}
					}
				}				
			}

			return corrections;
		}
Exemplo n.º 2
0
        /// <summary>
        /// Basic Schulz and Mihov algoritm
        /// </summary>
        /// <returns>The correction nodes.</returns>
        /// <param name="typo">Typo.</param>
        /// <param name="start">Start.</param>
        /// <param name="editDistance">Edit distance.</param>
        /// <param name="includeOnlyWords">If set to <c>true</c> include only words.</param>
        IList <TrieNode> GetCorrectionNodes(string typo, TrieNode start, int editDistance, bool includeOnlyWords = true)
        {
            var corrections = new List <TrieNode> ();

            if (string.IsNullOrEmpty(typo))
            {
                return(corrections);
            }
            LevTAutomataImitation automata = new LevTAutomataImitation(typo, editDistance);

            Stack <SpellCheckerState> stack = new Stack <SpellCheckerState> ();

            stack.Push(new SpellCheckerState()
            {
                Node           = start,
                AutomataState  = 0,
                AutomataOffset = 0,
            });

            while (stack.Count > 0)
            {
                SpellCheckerState state = stack.Pop();

                automata.LoadState(state.AutomataState, state.AutomataOffset);
                AutomatonState nextZeroState = automata.GetNextState(0);

                foreach (char c in state.Node.Children.Keys)
                {
                    AutomatonState nextState = null;

                    if ((state.AutomataOffset < typo.Length && typo[state.AutomataOffset] == c) ||
                        (state.AutomataOffset < typo.Length - 1 && typo[state.AutomataOffset + 1] == c) ||
                        (state.AutomataOffset < typo.Length - 2 && typo[state.AutomataOffset + 2] == c))
                    {
                        nextState = automata.GetNextState(automata.GetCharacteristicVector(c, state.AutomataOffset));
                    }
                    else
                    {
                        nextState = nextZeroState;
                    }

                    if (nextState != null)
                    {
                        TrieNode nextNode = state.Node.Children [c];
                        if (nextNode.Children.Count > 0)
                        {
                            stack.Push(new SpellCheckerState()
                            {
                                Node           = nextNode,
                                AutomataState  = nextState.State,
                                AutomataOffset = nextState.Offset
                            });
                        }
                        if ((nextNode.IsWord || !includeOnlyWords) && automata.IsAcceptState(nextState.State, nextState.Offset))
                        {
                            corrections.Add(nextNode);
                        }
                    }
                }
            }

            return(corrections);
        }