Esempio n. 1
0
    private static int TrueDistance(editItem dictionaryOriginal, editItem inputDelete, string inputOriginal)
    {
        //We allow simultaneous edits (deletes) of editDistanceMax on on both the dictionary and the input term.
        //For replaces and adjacent transposes the resulting edit distance stays <= editDistanceMax.
        //For inserts and deletes the resulting edit distance might exceed editDistanceMax.
        //To prevent suggestions of a higher edit distance, we need to calculate the resulting edit distance, if there are simultaneous edits on both sides.
        //Example: (bank==bnak and bank==bink, but bank!=kanb and bank!=xban and bank!=baxn for editDistanceMaxe=1)
        //Two deletes on each side of a pair makes them all equal, but the first two pairs have edit distance=1, the others edit distance=2.

        if (dictionaryOriginal.term == inputOriginal)
        {
            return(0);
        }
        else
        if (dictionaryOriginal.distance == 0)
        {
            return(inputDelete.distance);
        }
        else if (inputDelete.distance == 0)
        {
            return(dictionaryOriginal.distance);
        }
        else
        {
            return(DamerauLevenshteinDistance(dictionaryOriginal.term, inputOriginal));//adjust distance, if both distances>0
        }
    }
Esempio n. 2
0
    //inexpensive and language independent: only deletes, no transposes + replaces + inserts
    //replaces and inserts are expensive and language dependent (Chinese has 70,000 Unicode Han characters)
    private static List <editItem> Edits(string word, int editDistance, bool recursion)
    {
        editDistance++;
        List <editItem> deletes = new List <editItem>();

        if (word.Length > 1)
        {
            for (int i = 0; i < word.Length; i++)
            {
                editItem delete = new editItem();
                delete.term     = word.Remove(i, 1);
                delete.distance = editDistance;
                if (!deletes.Contains(delete))
                {
                    deletes.Add(delete);
                    //recursion, if maximum edit distance not yet reached
                    if (recursion && (editDistance < editDistanceMax))
                    {
                        foreach (editItem edit1 in Edits(delete.term, editDistance, recursion))
                        {
                            if (!deletes.Contains(edit1))
                            {
                                deletes.Add(edit1);
                            }
                        }
                    }
                }
            }
        }

        return(deletes);
    }
Esempio n. 3
0
    //for every word there all deletes with an edit distance of 1..editDistanceMax created and added to the dictionary
    //every delete entry has a suggestions list, which points to the original term(s) it was created from
    //The dictionary may be dynamically updated (word frequency and new words) at any time by calling createDictionaryEntry
    private static bool CreateDictionaryEntry(string key, string language)
    {
        bool           result = false;
        dictionaryItem value;

        if (dictionary.TryGetValue(language + key, out value))
        {
            //already exists:
            //1. word appears several times
            //2. word1==deletes(word2)
            value.count++;
        }
        else
        {
            value = new dictionaryItem();
            value.count++;
            dictionary.Add(language + key, value);
        }

        //edits/suggestions are created only once, no matter how often word occurs
        //edits/suggestions are created only as soon as the word occurs in the corpus,
        //even if the same term existed before in the dictionary as an edit from another word
        if (string.IsNullOrEmpty(value.term))
        {
            result     = true;
            value.term = key;

            //create deletes
            foreach (editItem delete in Edits(key, 0, true))
            {
                editItem suggestion = new editItem();
                suggestion.term     = key;
                suggestion.distance = delete.distance;

                dictionaryItem value2;
                if (dictionary.TryGetValue(language + delete.term, out value2))
                {
                    //already exists:
                    //1. word1==deletes(word2)
                    //2. deletes(word1)==deletes(word2)
                    if (!value2.suggestions.Contains(suggestion))
                    {
                        AddLowestDistance(value2.suggestions, suggestion);
                    }
                }
                else
                {
                    value2 = new dictionaryItem();
                    value2.suggestions.Add(suggestion);
                    dictionary.Add(language + delete.term, value2);
                }
            }
        }
        return(result);
    }
Esempio n. 4
0
 //save some time and space
 private static void AddLowestDistance(List <editItem> suggestions, editItem suggestion)
 {
     //remove all existing suggestions of higher distance, if verbose<2
     if ((verbose < 2) && (suggestions.Count > 0) && (suggestions[0].distance > suggestion.distance))
     {
         suggestions.Clear();
     }
     //do not add suggestion of higher distance than existing, if verbose<2
     if ((verbose == 2) || (suggestions.Count == 0) || (suggestions[0].distance >= suggestion.distance))
     {
         suggestions.Add(suggestion);
     }
 }
Esempio n. 5
0
	private void mEditOnClick(object sender, EventArgs ea)
	{
		//edit selected items
		foreach (ListViewFileItem lvi in contents.SelectedItems)
		{
			editItem ei = new editItem(lvi.SubItems[(int)ListViewOrder.Name].Text, lvi.SubItems[(int)ListViewOrder.FileType].Text,
									   lvi.SubItems[(int)ListViewOrder.Upx].Text, lvi.SubItems[(int)ListViewOrder.NullString].Text);
			if (ei.ShowDialog() == DialogResult.OK)
			{
				lvi.SubItems[(int)ListViewOrder.Name].Text = ei.FileName;
				lvi.SubItems[(int)ListViewOrder.FileType].Text = ei.Filetype;
				lvi.SubItems[(int)ListViewOrder.Upx].Text = ei.Upx;
				lvi.SubItems[(int)ListViewOrder.NullString].Text = ei.StringNull;
			}
		}
	}
Esempio n. 6
0
    private static int TrueDistance(editItem dictionaryOriginal, editItem inputDelete, string inputOriginal)
    {
        //We allow simultaneous edits (deletes) of editDistanceMax on on both the dictionary and the input term.
        //For replaces and adjacent transposes the resulting edit distance stays <= editDistanceMax.
        //For inserts and deletes the resulting edit distance might exceed editDistanceMax.
        //To prevent suggestions of a higher edit distance, we need to calculate the resulting edit distance, if there are simultaneous edits on both sides.
        //Example: (bank==bnak and bank==bink, but bank!=kanb and bank!=xban and bank!=baxn for editDistanceMaxe=1)
        //Two deletes on each side of a pair makes them all equal, but the first two pairs have edit distance=1, the others edit distance=2.

        if (dictionaryOriginal.term == inputOriginal) return 0; else
        if (dictionaryOriginal.distance == 0) return inputDelete.distance;
        else if (inputDelete.distance == 0) return dictionaryOriginal.distance;
        else return DamerauLevenshteinDistance(dictionaryOriginal.term, inputOriginal);//adjust distance, if both distances>0
    }
Esempio n. 7
0
    private static List<suggestItem> Lookup(string input, string language, int editDistanceMax)
    {
        List<editItem> candidates = new List<editItem>();

        //add original term
        editItem item = new editItem();
        item.term = input;
        item.distance = 0;
        candidates.Add(item);

        List<suggestItem> suggestions = new List<suggestItem>();
        dictionaryItem value;

        while (candidates.Count>0)
        {
            editItem candidate = candidates[0];
            candidates.RemoveAt(0);

            //save some time
            //early termination
            //suggestion distance=candidate.distance... candidate.distance+editDistanceMax
            //if canddate distance is already higher than suggestion distance, than there are no better suggestions to be expected
            if ((verbose < 2)&&(suggestions.Count > 0)&&(candidate.distance > suggestions[0].distance)) goto sort;
            if (candidate.distance > editDistanceMax) goto sort;

            if (dictionary.TryGetValue(language+candidate.term, out value))
            {
                if (!string.IsNullOrEmpty(value.term))
                {
                    //correct term
                    suggestItem si = new suggestItem();
                    si.term = value.term;
                    si.count = value.count;
                    si.distance = candidate.distance;

                    if (!suggestions.Contains(si))
                    {
                        suggestions.Add(si);
                        //early termination
                        if ((verbose < 2) && (candidate.distance == 0)) goto sort;
                    }
                }

                //edit term (with suggestions to correct term)
                dictionaryItem value2;
                foreach (editItem suggestion in value.suggestions)
                {
                    //save some time
                    //skipping double items early
                    if (suggestions.Find(x => x.term == suggestion.term) == null)
                    {
                        int distance = TrueDistance(suggestion, candidate, input);

                        //save some time.
                        //remove all existing suggestions of higher distance, if verbose<2
                        if ((verbose < 2) && (suggestions.Count > 0) && (suggestions[0].distance > distance)) suggestions.Clear();
                        //do not process higher distances than those already found, if verbose<2
                        if ((verbose < 2) && (suggestions.Count > 0) && (distance > suggestions[0].distance)) continue;

                        if (distance <= editDistanceMax)
                        {
                            if (dictionary.TryGetValue(language+suggestion.term, out value2))
                            {
                                suggestItem si = new suggestItem();
                                si.term = value2.term;
                                si.count = value2.count;
                                si.distance = distance;

                                suggestions.Add(si);
                            }
                        }
                    }
                }
            }//end foreach

            //add edits
            if (candidate.distance < editDistanceMax)
            {
                foreach (editItem delete in Edits(candidate.term, candidate.distance,false))
                {
                    if (!candidates.Contains(delete)) candidates.Add(delete);
                }
            }
        }//end while

        sort: suggestions = suggestions.OrderBy(c => c.distance).ThenByDescending(c => c.count).ToList();
        if ((verbose == 0)&&(suggestions.Count>1))  return suggestions.GetRange(0, 1); else return suggestions;
    }
Esempio n. 8
0
    //inexpensive and language independent: only deletes, no transposes + replaces + inserts
    //replaces and inserts are expensive and language dependent (Chinese has 70,000 Unicode Han characters)
    private static List<editItem> Edits(string word, int editDistance, bool recursion)
    {
        editDistance++;
        List<editItem> deletes = new List<editItem>();
        if (word.Length > 1)
        {
            for (int i = 0; i < word.Length; i++)
            {
                editItem delete = new editItem();
                delete.term=word.Remove(i, 1);
                delete.distance=editDistance;
                if (!deletes.Contains(delete))
                {
                    deletes.Add(delete);
                    //recursion, if maximum edit distance not yet reached
                    if (recursion && (editDistance < editDistanceMax))
                    {
                        foreach (editItem edit1 in Edits(delete.term, editDistance,recursion))
                        {
                            if (!deletes.Contains(edit1)) deletes.Add(edit1);
                        }
                    }
                }
            }
        }

        return deletes;
    }
Esempio n. 9
0
    //for every word there all deletes with an edit distance of 1..editDistanceMax created and added to the dictionary
    //every delete entry has a suggestions list, which points to the original term(s) it was created from
    //The dictionary may be dynamically updated (word frequency and new words) at any time by calling createDictionaryEntry
    private static bool CreateDictionaryEntry(string key, string language)
    {
        bool result = false;
        dictionaryItem value;
        if (dictionary.TryGetValue(language+key, out value))
        {
            //already exists:
            //1. word appears several times
            //2. word1==deletes(word2)
            value.count++;
        }
        else
        {
            value = new dictionaryItem();
            value.count++;
            dictionary.Add(language+key, value);
        }

        //edits/suggestions are created only once, no matter how often word occurs
        //edits/suggestions are created only as soon as the word occurs in the corpus,
        //even if the same term existed before in the dictionary as an edit from another word
        if (string.IsNullOrEmpty(value.term))
        {
            result = true;
            value.term = key;

            //create deletes
            foreach (editItem delete in Edits(key, 0, true))
            {
                editItem suggestion = new editItem();
                suggestion.term = key;
                suggestion.distance = delete.distance;

                dictionaryItem value2;
                if (dictionary.TryGetValue(language+delete.term, out value2))
                {
                    //already exists:
                    //1. word1==deletes(word2)
                    //2. deletes(word1)==deletes(word2)
                    if (!value2.suggestions.Contains(suggestion)) AddLowestDistance(value2.suggestions, suggestion);
                }
                else
                {
                    value2 = new dictionaryItem();
                    value2.suggestions.Add(suggestion);
                    dictionary.Add(language+delete.term, value2);
                }
            }
        }
        return result;
    }
Esempio n. 10
0
 //save some time and space
 private static void AddLowestDistance(List<editItem> suggestions, editItem suggestion)
 {
     //remove all existing suggestions of higher distance, if verbose<2
     if ((verbose < 2) && (suggestions.Count > 0) && (suggestions[0].distance > suggestion.distance)) suggestions.Clear();
     //do not add suggestion of higher distance than existing, if verbose<2
     if ((verbose == 2) || (suggestions.Count == 0) || (suggestions[0].distance >= suggestion.distance)) suggestions.Add(suggestion);
 }
Esempio n. 11
0
    private static List <suggestItem> Lookup(string input, string language, int editDistanceMax)
    {
        List <editItem> candidates = new List <editItem>();

        //add original term
        editItem item = new editItem();

        item.term     = input;
        item.distance = 0;
        candidates.Add(item);

        List <suggestItem> suggestions = new List <suggestItem>();
        dictionaryItem     value;

        while (candidates.Count > 0)
        {
            editItem candidate = candidates[0];
            candidates.RemoveAt(0);

            //save some time
            //early termination
            //suggestion distance=candidate.distance... candidate.distance+editDistanceMax
            //if canddate distance is already higher than suggestion distance, than there are no better suggestions to be expected
            if ((verbose < 2) && (suggestions.Count > 0) && (candidate.distance > suggestions[0].distance))
            {
                goto sort;
            }
            if (candidate.distance > editDistanceMax)
            {
                goto sort;
            }

            if (dictionary.TryGetValue(language + candidate.term, out value))
            {
                if (!string.IsNullOrEmpty(value.term))
                {
                    //correct term
                    suggestItem si = new suggestItem();
                    si.term     = value.term;
                    si.count    = value.count;
                    si.distance = candidate.distance;

                    if (!suggestions.Contains(si))
                    {
                        suggestions.Add(si);
                        //early termination
                        if ((verbose < 2) && (candidate.distance == 0))
                        {
                            goto sort;
                        }
                    }
                }

                //edit term (with suggestions to correct term)
                dictionaryItem value2;
                foreach (editItem suggestion in value.suggestions)
                {
                    //save some time
                    //skipping double items early
                    if (suggestions.Find(x => x.term == suggestion.term) == null)
                    {
                        int distance = TrueDistance(suggestion, candidate, input);

                        //save some time.
                        //remove all existing suggestions of higher distance, if verbose<2
                        if ((verbose < 2) && (suggestions.Count > 0) && (suggestions[0].distance > distance))
                        {
                            suggestions.Clear();
                        }
                        //do not process higher distances than those already found, if verbose<2
                        if ((verbose < 2) && (suggestions.Count > 0) && (distance > suggestions[0].distance))
                        {
                            continue;
                        }

                        if (distance <= editDistanceMax)
                        {
                            if (dictionary.TryGetValue(language + suggestion.term, out value2))
                            {
                                suggestItem si = new suggestItem();
                                si.term     = value2.term;
                                si.count    = value2.count;
                                si.distance = distance;

                                suggestions.Add(si);
                            }
                        }
                    }
                }
            }//end foreach

            //add edits
            if (candidate.distance < editDistanceMax)
            {
                foreach (editItem delete in Edits(candidate.term, candidate.distance, false))
                {
                    if (!candidates.Contains(delete))
                    {
                        candidates.Add(delete);
                    }
                }
            }
        }//end while

        sort : suggestions = suggestions.OrderBy(c => c.distance).ThenByDescending(c => c.count).ToList();
        if ((verbose == 0) && (suggestions.Count > 1))
        {
            return(suggestions.GetRange(0, 1));
        }
        else
        {
            return(suggestions);
        }
    }