public List<KeyValuePair<string, double>> RankedResults() { List<Document> documents = new List<Document>(); HashSet<string> dataSet = new HashSet<string>(); foreach (var result in results) { Document d = new Document(result); documents.Add(d); foreach (var term in d.tokens()){ dataSet.Add(term); } } //Build Document Vectors Dictionary<string, Vector> documentVectors = new Dictionary<string, Vector>(); foreach (var document in documents) { documentVectors.Add(document.ToString(), new Vector(dataSet, document)); } //Build Query Vector Query query = new Query(queryString); Vector queryVector = new Vector(dataSet, query); Dictionary<string, double> relevance = new Dictionary<string, double>(); foreach (var documentVector in documentVectors) { relevance.Add(documentVector.Key, Vector.GetSimilarityScore(queryVector, documentVector.Value)); } //Sort result by most relevant List<KeyValuePair<string, double>> myList = relevance.ToList(); return myList; }
public static void add(Document document) { numOfDocuments += 1; List<string> terms = document.tokens(); for (int i = 0; i < terms.Count; i++) { //term is already in index if (index.ContainsKey(terms[i])) { Dictionary<string, List<int>> temp = index[terms[i]]; //term already exists in document if (temp.ContainsKey(document.ToString())) { tfIndex[terms[i]][document.ToString()] += 1;//update tfIndex temp[document.ToString()].Add(i); index[terms[i]] = temp; } //first occurence in a document of term that already exists else { dfIndex[terms[i]] += 1; tfIndex[terms[i]][document.ToString()] = 1; List<int> positionsList = new List<int>(); positionsList.Add(i); temp[document.ToString()] = positionsList; } } //new term entry else { Dictionary<string, List<int>> temp = new Dictionary<string, List<int>>(); List<int> positionsList = new List<int>(); positionsList.Add(i); temp[document.ToString()] = positionsList; index[terms[i]] = temp; Dictionary<string, int> tf = new Dictionary<string, int>(); tf[document.ToString()] = 1; tfIndex[terms[i]] = tf; dfIndex[terms[i]] = 1; } } serialize(); }