public List<SearchResultEntry> DoSearch(string searchTxt) { SearchText = searchTxt; WordSet searchWordSet = new WordSet(searchTxt); List<SearchResultEntry> answ = new List<SearchResultEntry>(); WordSetPerTag = new Dictionary<object, WordSet>(); WordSet currTextWs; SearchResultEntry se; foreach (TaggedText ttxt in TextsToBeSearched) { currTextWs = new WordSet(ttxt.Text); WordSetPerTag.Add(ttxt.Tag, currTextWs); se = CalculateGrade(currTextWs, searchWordSet); se.TagMark = ttxt.Tag; if(se.MatchGrade>=MinimumMatchGrade) answ.Add(se); } answ.Sort(); return answ; }
/// <summary> /// Calculate the grade of equality for two word sets /// </summary> /// <param name="currTextWs"></param> /// <param name="searchWordSet"></param> /// <returns>A search result entry containing information about the found elements</returns> private SearchResultEntry CalculateGrade(WordSet ws1, WordSet ws2) { SearchResultEntry answ = new SearchResultEntry(); answ.MatchTexts = new List<string>(); answ.CounterMatchTexts = new List<string>(); answ.MatchGrades = new List<double>(); answ.MatchPositions = new List<int>(); double gradeSum = 0.0; int minldist, currldist, minidx; int matchIdx; string w1, minCounterMatch; double currGrade; foreach (string w2 in ws2.Words) { matchIdx = ws1.Words.IndexOf(w2); if (matchIdx>=0) { answ.MatchTexts.Add(w2); answ.CounterMatchTexts.Add(w2); answ.MatchGrades.Add(1.0); answ.MatchPositions.Add(ws1.Positions[matchIdx]); gradeSum += 1.0; } else { minldist = int.MaxValue; minCounterMatch = null; minidx = 0; for (int i=0; i<ws1.Words.Count; i++) { w1 = ws1.Words[i]; currldist = LevenshteinDistance(w1, w2); if (currldist < minldist) { minldist = currldist; minidx = i; minCounterMatch = w1; } } currGrade = (double)(w2.Length - minldist) / w2.Length; if (currGrade >= MinimumMatchGrade) { answ.MatchTexts.Add(w2); answ.MatchGrades.Add(currGrade); answ.CounterMatchTexts.Add(minCounterMatch); answ.MatchPositions.Add(ws1.Positions[minidx]); gradeSum += currGrade; } } } answ.MatchGrade = gradeSum / (double)ws2.Words.Count; return answ; }