public static DataListsClass Tokenize(string[] text, int docID) { DataListsClass lists = new DataListsClass(); foreach (String token in text) { if (lists.DF.ContainsKey(token)) { lists.DF[token]++; } else { lists.DF.Add(token, 1); } if (lists.postingLists.ContainsKey(token)) { if (!lists.postingLists[token].Contains(docID)) { lists.postingLists[token].AddLast(docID); } } else { LinkedList<int> temporaryLinkedList = new LinkedList<int>(); temporaryLinkedList.AddLast(docID); lists.postingLists.Add(token, temporaryLinkedList); } } lists.CalculateTfStar(); return lists; }
public List<KeyValuePair<HTMLLink, double>> ReturnRankedResult(DataListsClass query, Dictionary<HTMLLink, DataListsClass> documents) { List<KeyValuePair<HTMLLink, double>> ranks = new List<KeyValuePair<HTMLLink, double>>(); foreach (var doc in documents.Keys) { double rank = 0; foreach (var term in query.normalized_ft_idf.Keys) { if (documents[doc].normalized_ft_idf.Keys.Contains(term)) rank += documents[doc].normalized_ft_idf[term] * query.normalized_ft_idf[term]; } ranks.Add(new KeyValuePair<HTMLLink, double>(doc, rank)); } ranks.Sort((x, y) => y.Value.CompareTo(x.Value)); return ranks; }