public double CalcRankFactor(WordOccurrenceNode occ, Query query) { double queryRank = 0.0; int countTermQuery = 0; foreach (QueryItem item in query.QueryItens) { if (item.WordID == occ.Word.WordID) { countTermQuery++; } } int countTermDoc = occ.Hits.Count; double bm25_TF = ((occ.Word.QuantityDocFrequency + 1)*countTermDoc)/(countTermDoc + occ.Word.QuantityDocFrequency); double tf = occ.Frequency; double idf = Math.Log((((double)totalDocQuantity) + 1) / ((double)occ.Word.QuantityDocFrequency)); double tf_idf = ((double)countTermQuery) * (bm25_TF) * idf; queryRank += tf_idf; return queryRank; }
public double CalcRankFactor(WordOccurrenceNode occ, Query query) { double queryRank = 0.0; int countTermQuery = 0; foreach (QueryItem item in query.QueryItens) { if (item.WordID == occ.Word.WordID) { countTermQuery++; } } double qtf = ((double)countTermQuery / (double)query.QueryItens.Count); double termQueryFactor = ((k3 + 1) * qtf) / (k3 + qtf); double df = (double)occ.Word.QuantityDocFrequency; //double termLogFactor = Math.Log( ((totalDocQuantity - df + 0.5D)/(df + 0.5D)),Math.E); double termLogFactor = Math.Log( ((double)totalDocQuantity) / ((double)df)); double tf = ((double)occ.Hits.Count / (double)occ.Doc.WordQuantity); double normalizer = ((k1*(1 - b)) + (b * (occ.Doc.WordQuantity / avdl))) + tf; double normalizationTermFactor = ((k1 + 1) * tf) / normalizer; queryRank = termLogFactor * normalizationTermFactor * termQueryFactor; return queryRank; }
private List<Word> FindWords(Query parsedQuery) { List<Word> wordFound = new List<Word>(); for (int i = 0; ((i < parsedQuery.QueryItens.Count) && (i < maxSentence)); i++) { Word wf = indexer.Search(parsedQuery.QueryItens[i].WordID); if (wf != null) wordFound.Add(wf); } return wordFound; }
public List<DocumentResult> Search(string query) { Hashtable resultHash = new Hashtable(); List<DocumentResult> resultList = new List<DocumentResult>(); Query parsedQuery = new Query(query); List<Word> wordFound = FindWords(parsedQuery); //merging the list. foreach (Word item in wordFound) { List<WordOccurrenceNode> tempDocList = InvertedFileManager.Instance.GetWordOccurrencies(item); foreach (WordOccurrenceNode wordOccur in tempDocList) { if (!resultHash.ContainsKey(wordOccur.Doc.DocID)) { DocumentResult newDoc = new DocumentResult(wordOccur.Doc); newDoc.CalculateRank(wordOccur, parsedQuery); resultHash.Add(newDoc.DocID, newDoc); } else { DocumentResult newDoc = resultHash[wordOccur.Doc.DocID] as DocumentResult; newDoc.CalculateRank(wordOccur, parsedQuery); } } } //convert hasthtable to list foreach (DictionaryEntry entry in resultHash) { DocumentResult doc = entry.Value as DocumentResult; resultList.Add(doc); } //sort result list by QueryRank and return resultList.Sort((y, x) => x.QueryRank.CompareTo(y.QueryRank)); return resultList; }
public void CalculateRank(WordOccurrenceNode occ, Query query) { IRankFunction rankFunc = FactoryRankFunction.GetRankFunction(); this.queryRank += rankFunc.CalcRankFactor(occ, query); }
public List<DocumentResult> Search(string query) { Hashtable resultHash = new Hashtable(); List<DocumentResult> resultList = new List<DocumentResult>(); Query parsedQuery = new Query(query); List<Word> wordFound = FindWords(parsedQuery); //merging the list. foreach (Word item in wordFound) { WordOccurrenceNode firstOcc = item.FirstOccurrence; //problem: the number of occurrences is wrong! The 'else' case, doesn't exist and because this, //the program don't count the occurrences of the second word. //when he merge, it discards the occurrences. if (!resultHash.ContainsKey(firstOcc.Doc.DocID)) { DocumentResult newDoc = new DocumentResult(firstOcc.Doc); newDoc.CalculateRank(firstOcc, parsedQuery); resultHash.Add(newDoc.DocID, newDoc); } else { DocumentResult newDoc = resultHash[firstOcc.Doc.DocID] as DocumentResult; newDoc.CalculateRank(firstOcc, parsedQuery); } WordOccurrenceNode tmp = firstOcc; while (tmp.HasNext()) { tmp = tmp.NextOccurrence; if (!resultHash.ContainsKey(tmp.Doc.DocID)) { DocumentResult newDoc = new DocumentResult(tmp.Doc); newDoc.CalculateRank(tmp, parsedQuery); resultHash.Add(newDoc.DocID, newDoc); } else { DocumentResult newDoc = resultHash[tmp.Doc.DocID] as DocumentResult; newDoc.CalculateRank(tmp, parsedQuery); } } } //convert hasthtable to list foreach (DictionaryEntry entry in resultHash) { DocumentResult doc = entry.Value as DocumentResult; resultList.Add(doc); } //sort result list by QueryRank and return resultList.Sort((y, x) => x.QueryRank.CompareTo(y.QueryRank)); return resultList; }