public Dictionary <string, double> ProcessQueryTemp(string query) { var termsToCompute = GetTerms(query); var termsQueryFrequency = GetTermsFrequency(termsToCompute); var nrTermsToCompute = termsToCompute.Count; var termsIndex = IndexRepo.GetIndexOfWords(termsToCompute); var documentsLength = GetDocumentsLength(termsIndex); var documentsScoresAccumulator = new Dictionary <string, double>(); foreach (var termIndex in termsIndex) { var term = termIndex.Term; var docs = termIndex.Docs; var nrDocs = docs.Count; foreach (var doc in docs) { var tf = ComputeTermFrequency(doc.Frequency, documentsLength[doc.ID]); var idf = ComputeInverseDocumentFrequency(nrDocs); if (!documentsScoresAccumulator.ContainsKey(doc.ID)) { documentsScoresAccumulator.Add(doc.ID, 0); } documentsScoresAccumulator[doc.ID] += (((double)termsQueryFrequency[term]) / nrTermsToCompute) * tf * idf; } } return(documentsScoresAccumulator); }
public Dictionary <string, double> ProcessQueryTemp(string query) { var termsToCompute = GetTerms(query); var termsIndex = IndexRepo.GetIndexOfWords(termsToCompute); var termsQueryPositions = GetTermsPosition(termsToCompute); var termsWeight = GetTermsWeight(termsIndex); var documentsToScore = ComputeDocumentsIndexFromTermsIndex(termsIndex); var documentsLen = GetDocumentsLength(termsIndex); var result = new Dictionary <string, double>(); foreach (var docToScore in documentsToScore) { var docID = docToScore.Key; var fti = new Dictionary <string, double>(); var docLen = documentsLen[docID]; var termsRelativePositions = CustomMergeSort.MergeKSortedLists(docToScore.Value); var docScore = 0.0; for (int i = 0; i < termsRelativePositions.Count; ++i) { var termPos = termsRelativePositions[i].Key; var term = termsRelativePositions[i].Value; var minDistanceElementIndex = ComputeMinDistanceElementIndex(i, termsRelativePositions); var documentOffset = termPos - termsRelativePositions[minDistanceElementIndex].Key; var queryOffset = termsQueryPositions[term] - termsQueryPositions[termsRelativePositions[minDistanceElementIndex].Value]; if (!fti.ContainsKey(term)) { fti.Add(term, DefaultTermScore); } var dist = Math.Abs(documentOffset - queryOffset); fti[term] += termsWeight[term] * termsWeight[termsRelativePositions[minDistanceElementIndex].Value] * ReverseProximity(dist); } var K = ComputeK(docLen); foreach (var item in fti) { docScore += termsWeight[item.Key] * item.Value / (K + item.Value); } result.Add(docID, docScore); } return(result); }
public List <string> AND(List <string> termsToCompute) { var termsIndex = IndexRepo.GetIndexOfWords(termsToCompute); var preResult = new HashSet <string>(); if (termsIndex.Count == 0) { return(preResult.ToList()); } preResult.UnionWith(termsIndex[0].Docs.Select(x => x.ID)); foreach (var item in termsIndex.Skip(1)) { preResult.IntersectWith(item.Docs.Select(x => x.ID)); } return(preResult.ToList()); }
public List <string> OR(List <string> termsToCompute) { var termsIndex = IndexRepo.GetIndexOfWords(termsToCompute); return(ComputeOR(termsIndex)); }