/// <summary> /// To check the misspelled word in similarity /// If it is similar to the word in the dictionary, it return true /// </summary> /// <param name="dicWord">The word from dictionary</param> /// <param name="toBeCheckedWord">The mis spelled word needs to be checked</param> /// <param name="theMissSpelling">The ref of MissSpelling instance</param> /// <returns></returns> private bool similarityCheck(string dicWord, string toBeCheckedWord, ref Misspelling theMissSpelling) { if (getDistance(dicWord, toBeCheckedWord) <= 2) { if (theMissSpelling == null) { theMissSpelling = new Misspelling(); } theMissSpelling.TextPosition = getPosition(dicWord, toBeCheckedWord); return(true); } return(false); }
/// <summary> /// To check the char at the same position /// If the index is less than the threshold value it means it is similar to the word in the dictionary, it return true /// </summary> /// <param name="dicWord">The word from dictionary</param> /// <param name="toBeCheckedWord">The mis spelled word needs to be checked</param> /// <param name="theMissSpelling">The ref of MissSpelling instance</param> /// <returns></returns> private bool projectionCheck(string dicWord, string toBeCheckedWord, ref Misspelling theMissSpelling) { int failNum = 0; Dictionary <int, bool> theTable = new Dictionary <int, bool>(); if (!string.IsNullOrEmpty(dicWord) && !string.IsNullOrEmpty(toBeCheckedWord) && dicWord.Length == toBeCheckedWord.Length) { for (int i = 0; i <= dicWord.Length - 1; i++) { char left = dicWord[i]; char right = toBeCheckedWord[i]; if (left != right) { //the same postion char is different theTable.Add(i, false); failNum++; } else { theTable.Add(i, true); } } if (failNum > 0) { //It means there are some char are different float failFloat = failNum; float theLegth = dicWord.Length; float index = failFloat / theLegth; if (index <= 0.3) { //if index less than a value, it means it is simlar to the dic word //Then suggest the dicWord to change if (theMissSpelling == null) { theMissSpelling = new Misspelling(); } //It will only store the postion with last matched string //I think it's better to store every postion for evey matched string theMissSpelling.TextPosition = theTable.Where(i => i.Value == false).FirstOrDefault().Key; //indicate the dic word should be return return(true); } } } return(false); }
/// <summary> /// Main function : /// given a string of multiple words, return an array of all misspelled words /// </summary> /// <param name="text">It is a string which contains several words.The words needs to be checked</param> /// <returns></returns> public Misspelling[] CheckText(string text) { //get the original word list from text string[] words = text.Split(new Char[] { '?', ':', ' ', '!', ';', '.', '<', '>' }, StringSplitOptions.RemoveEmptyEntries); List <Misspelling> theMissSpellingList = new List <Misspelling>(); foreach (var word in words) { //remove space and tab string wordToBeCheck = word.Trim(); //remove 's after a word if (wordToBeCheck.EndsWith("'s")) { wordToBeCheck.Replace("'s", ""); } Misspelling theMissSpelling = null; var existedWord = dictionary.Where(s => s == wordToBeCheck).FirstOrDefault(); if (string.IsNullOrEmpty(existedWord)) { //If can not find the word in the dictionary it means misspelled theMissSpelling = new Misspelling() { Word = wordToBeCheck, TextPosition = -1 }; var suggestionWords = dictionary.Where(s => { return(isMissSpell(s, wordToBeCheck, ref theMissSpelling)); }).ToArray(); if (theMissSpelling != null) { theMissSpelling.Suggestions = suggestionWords; theMissSpelling.Suggestion = String.Join(",", suggestionWords); theMissSpellingList.Add(theMissSpelling); } } } return(theMissSpellingList.ToArray()); }
/// <summary> /// The predicate for the misspelling checking /// </summary> /// <param name="dicWord"></param> /// <param name="toBeCheckedWord"></param> /// <param name="theMissSpelling"></param> /// <returns></returns> private bool isMissSpell(string dicWord, string toBeCheckedWord, ref Misspelling theMissSpelling) { if (dicWord == toBeCheckedWord) { //it is correct, so it's not need to suggestion return(false); } //Condition1: //only some char is wrong but with the same width of string if (projectionCheck(dicWord, toBeCheckedWord, ref theMissSpelling)) { return(true); } //Confidtion2: //Maybe there is an additional char inserted into correct word if (similarityCheck(dicWord, toBeCheckedWord, ref theMissSpelling)) { return(true); } return(false); }