예제 #1
0
        public Dictionary <string, double> ProcessQueryTemp(string query)
        {
            var termsToCompute             = GetTerms(query);
            var termsQueryFrequency        = GetTermsFrequency(termsToCompute);
            var nrTermsToCompute           = termsToCompute.Count;
            var termsIndex                 = IndexRepo.GetIndexOfWords(termsToCompute);
            var documentsLength            = GetDocumentsLength(termsIndex);
            var documentsScoresAccumulator = new Dictionary <string, double>();

            foreach (var termIndex in termsIndex)
            {
                var term   = termIndex.Term;
                var docs   = termIndex.Docs;
                var nrDocs = docs.Count;

                foreach (var doc in docs)
                {
                    var tf  = ComputeTermFrequency(doc.Frequency, documentsLength[doc.ID]);
                    var idf = ComputeInverseDocumentFrequency(nrDocs);

                    if (!documentsScoresAccumulator.ContainsKey(doc.ID))
                    {
                        documentsScoresAccumulator.Add(doc.ID, 0);
                    }

                    documentsScoresAccumulator[doc.ID] += (((double)termsQueryFrequency[term]) / nrTermsToCompute) * tf * idf;
                }
            }

            return(documentsScoresAccumulator);
        }
예제 #2
0
        public Dictionary <string, double> ProcessQueryTemp(string query)
        {
            var termsToCompute      = GetTerms(query);
            var termsIndex          = IndexRepo.GetIndexOfWords(termsToCompute);
            var termsQueryPositions = GetTermsPosition(termsToCompute);
            var termsWeight         = GetTermsWeight(termsIndex);
            var documentsToScore    = ComputeDocumentsIndexFromTermsIndex(termsIndex);
            var documentsLen        = GetDocumentsLength(termsIndex);
            var result = new Dictionary <string, double>();

            foreach (var docToScore in documentsToScore)
            {
                var docID  = docToScore.Key;
                var fti    = new Dictionary <string, double>();
                var docLen = documentsLen[docID];
                var termsRelativePositions = CustomMergeSort.MergeKSortedLists(docToScore.Value);
                var docScore = 0.0;

                for (int i = 0; i < termsRelativePositions.Count; ++i)
                {
                    var termPos = termsRelativePositions[i].Key;
                    var term    = termsRelativePositions[i].Value;
                    var minDistanceElementIndex = ComputeMinDistanceElementIndex(i, termsRelativePositions);
                    var documentOffset          = termPos - termsRelativePositions[minDistanceElementIndex].Key;
                    var queryOffset             = termsQueryPositions[term] - termsQueryPositions[termsRelativePositions[minDistanceElementIndex].Value];
                    if (!fti.ContainsKey(term))
                    {
                        fti.Add(term, DefaultTermScore);
                    }
                    var dist = Math.Abs(documentOffset - queryOffset);

                    fti[term] += termsWeight[term] * termsWeight[termsRelativePositions[minDistanceElementIndex].Value] * ReverseProximity(dist);
                }

                var K = ComputeK(docLen);
                foreach (var item in fti)
                {
                    docScore += termsWeight[item.Key] * item.Value / (K + item.Value);
                }

                result.Add(docID, docScore);
            }

            return(result);
        }
        public List <string> AND(List <string> termsToCompute)
        {
            var termsIndex = IndexRepo.GetIndexOfWords(termsToCompute);
            var preResult  = new HashSet <string>();

            if (termsIndex.Count == 0)
            {
                return(preResult.ToList());
            }

            preResult.UnionWith(termsIndex[0].Docs.Select(x => x.ID));
            foreach (var item in termsIndex.Skip(1))
            {
                preResult.IntersectWith(item.Docs.Select(x => x.ID));
            }

            return(preResult.ToList());
        }
        public List <string> OR(List <string> termsToCompute)
        {
            var termsIndex = IndexRepo.GetIndexOfWords(termsToCompute);

            return(ComputeOR(termsIndex));
        }