private static decimal RankWordMatching(string query, List<Synonym> words, QueryResult queryResult) { // Original serch terms. // The first search term gets more weight // If it contains all of the original search words (exactly) then 1 // If it contains x / y; return x / y // Contains words in right order = .6 // Contains all words in the search query = .4 // Contains syninym words in the right order = ? // Contains words in right order = .6 bool rightOrder = true; bool containsAllWords = true; int lastIndex = -1; foreach (Synonym word in words) { WordRef wrdIndx = queryResult.WordIndexes.Where(n => n.Word == word.OriginalWord).FirstOrDefault(); if (wrdIndx != null) { if (lastIndex < wrdIndx.PhraseIndex) lastIndex = wrdIndx.PhraseIndex; else { rightOrder = false; break; } } else { containsAllWords = false; break; } } // Contains all words in the search query //int wordCount = 0; //foreach (Synonym wrd in words) { // if ((from q in queryResult.WordIndexes // where q.Word.Equals(wrd.OriginalWord, StringComparison.CurrentCultureIgnoreCase) // select q).Count() >= 1) // wordCount++; //} //decimal result = ((wordCount + 1) / (words.Count + 1)); decimal result = (rightOrder ? 0.6m : 0m) + (containsAllWords ? .4m : 0m); return Normalize(result); }
private static decimal RankSearchTermsProximity(string query, List<Synonym> words, QueryResult queryResult) { // Not counting the thesaurus looked up words, // How close together are the search terms? Are they right next to each other? List<WordRef> wr = (from q in queryResult.WordIndexes orderby q.PhraseIndex select q).ToList(); int distance = 0; for (int i = 0; i < wr.Count-1; i++) { WordRef w1 = wr[i]; WordRef w2 = wr[i + 1]; // Distance between these two? int d = ((w1.PhraseIndex - w1.Word.Length) + (w2.PhraseIndex - w2.Word.Length)); if (d < 0) continue; else distance += d; } decimal result = (words.Count / ((distance + words.Count) + 1.0m)) * (words.Count * 2); return Normalize(result); }
private static decimal RankLowPhraseIndex(string query, List<Synonym> words, QueryResult queryResult) { // The sum of the pos decimal posVal = 0; foreach (WordRef wr in queryResult.WordIndexes) { int val = (wr.PhraseIndex - wr.Word.Length); if (val > 0) posVal += val; } decimal result = (words.Count / ((posVal / words.Count) + 1)) * (words.Count * 2); return Normalize(result); }
private static decimal RankMultipleOccurance(string query, List<Synonym> words, QueryResult queryResult) { // Only use the original words decimal WordCount = 0; foreach (Synonym word in words) { // Piano Jazz // Sum all of the piano's, then all of the "jazz" // Exact match word = 2 points WordCount += (from q in queryResult.WordIndexes where q.Word.Equals(word.OriginalWord, StringComparison.CurrentCultureIgnoreCase) select q).Count(); } int totalWords = (words.Count == 0 ? 1 : words.Count); decimal rank = ((WordCount + queryResult.WordIndexes.Count) / totalWords) / 10; return Normalize(rank); }
/// <summary> /// This function takes the raw list of words & results and the intersection list and combines the two /// </summary> /// <param name="results">The word lookup results</param> /// <param name="resultList">The intersection of the results lists</param> /// <returns>A SortedList(int, QueryResult) int: They [Key] value in the query; QueryResult: The matching words for that key</returns> private static SortedList<int, QueryResult> PivitQuery(Dictionary<Synonym, List<WordRef>> results, List<int> resultList) { // WordRef - Synonyms? SortedList<int, QueryResult> queryResults = new SortedList<int, QueryResult>(); List<WordRef> sr = new List<WordRef>(); foreach (Synonym wrd in results.Keys) { sr.AddRange( results[wrd].Where(n => resultList.Contains(n.Key)) ); } foreach (WordRef wr in sr) { if (queryResults.ContainsKey(wr.Key)) queryResults[wr.Key].WordIndexes.Add(wr); else { QueryResult q = new QueryResult() { Key = wr.Key, WordIndexes = new List<WordRef>() }; q.WordIndexes.Add(wr); queryResults.Add(wr.Key, q); } } return queryResults; }