Beispiel #1
0
        public int CompareTo(object o) //reversed!
        {
            ResultDocument d = (ResultDocument)o;

            if (d.Similarity > similarity)
            {
                return(1);
            }
            else if (d.Similarity < similarity)
            {
                return(-1);
            }
            else
            {
                return(0);
            }
        }
Beispiel #2
0
        public ResultDocument[] Search(string query)
        {
            ArrayList queryTerms = MakeQuery(query); //exclude non-terms

            if (queryTerms.Count == 0)
            {
                return(null); //no relevant docs
            }
            float     queryNorm = Convert.ToSingle(Math.Sqrt(queryTerms.Count));
            Hashtable docs      = GetRelevantDocs(queryTerms);//docs relevant to ALL terms + sums of term weights for each doc

            //cos(q,d) = dot(q,d)/|q|x|d|
            int   docId;
            float sumOfWeights;
            float docNorm;
            float cos;
            float pageRank;
            float similarity;

            ResultDocument[] results = new ResultDocument[docs.Count];
            int cursor = 0;
            IDictionaryEnumerator en = docs.GetEnumerator();

            while (en.MoveNext())
            {
                docId        = Convert.ToInt16(en.Key);
                sumOfWeights = Convert.ToSingle(en.Value);
                docNorm      = index.GetDocNorm(docId);
                cos          = sumOfWeights / (queryNorm * docNorm);
                pageRank     = index.GetPageRank(docId);

                similarity = cos;
                if (cos > 0)
                {
                    similarity = w * cos + (1f - w) * pageRank;
                }

                results[cursor++] = new ResultDocument(docId, similarity);
            }
            Array.Sort(results);

            return(results);
        }
Beispiel #3
0
        public ResultDocument[] Search(int docId, int topResults)
        {
            //make a hashtable for terms in the doc which is being compared to all the rest
            Hashtable termIds = new Hashtable();

            DocTermItem[] docTerms = index.DocTerms(docId);
            foreach (DocTermItem dti in docTerms)
            {
                termIds.Add(dti.TermId, dti.TermCount);
            }

            ResultDocument[] allResults = new ResultDocument[TRAININGSET];
            float            docNorm    = index.GetDocNorm(docId);
            float            doc2norm;
            float            similarity;

            for (int doc2id = 0; doc2id < allResults.Length; doc2id++)
            {
                doc2norm           = index.GetDocNorm(doc2id);
                similarity         = getDotProduct(docId, doc2id, termIds) / (docNorm * doc2norm);
                allResults[doc2id] = new ResultDocument(doc2id, similarity);
            }

            Array.Sort(allResults);

            ResultDocument[] results = new ResultDocument[topResults];
            int j = 0;

            for (int i = 0; i < topResults; i++)
            {
                if (allResults[j].DocId == docId) //do not return the doc itself!
                {
                    j++;
                }

                results[i] = allResults[j++];
            }

            return(results);
        }