public int CompareTo(object o) //reversed! { ResultDocument d = (ResultDocument)o; if (d.Similarity > similarity) { return(1); } else if (d.Similarity < similarity) { return(-1); } else { return(0); } }
public ResultDocument[] Search(string query) { ArrayList queryTerms = MakeQuery(query); //exclude non-terms if (queryTerms.Count == 0) { return(null); //no relevant docs } float queryNorm = Convert.ToSingle(Math.Sqrt(queryTerms.Count)); Hashtable docs = GetRelevantDocs(queryTerms);//docs relevant to ALL terms + sums of term weights for each doc //cos(q,d) = dot(q,d)/|q|x|d| int docId; float sumOfWeights; float docNorm; float cos; float pageRank; float similarity; ResultDocument[] results = new ResultDocument[docs.Count]; int cursor = 0; IDictionaryEnumerator en = docs.GetEnumerator(); while (en.MoveNext()) { docId = Convert.ToInt16(en.Key); sumOfWeights = Convert.ToSingle(en.Value); docNorm = index.GetDocNorm(docId); cos = sumOfWeights / (queryNorm * docNorm); pageRank = index.GetPageRank(docId); similarity = cos; if (cos > 0) { similarity = w * cos + (1f - w) * pageRank; } results[cursor++] = new ResultDocument(docId, similarity); } Array.Sort(results); return(results); }
public ResultDocument[] Search(int docId, int topResults) { //make a hashtable for terms in the doc which is being compared to all the rest Hashtable termIds = new Hashtable(); DocTermItem[] docTerms = index.DocTerms(docId); foreach (DocTermItem dti in docTerms) { termIds.Add(dti.TermId, dti.TermCount); } ResultDocument[] allResults = new ResultDocument[TRAININGSET]; float docNorm = index.GetDocNorm(docId); float doc2norm; float similarity; for (int doc2id = 0; doc2id < allResults.Length; doc2id++) { doc2norm = index.GetDocNorm(doc2id); similarity = getDotProduct(docId, doc2id, termIds) / (docNorm * doc2norm); allResults[doc2id] = new ResultDocument(doc2id, similarity); } Array.Sort(allResults); ResultDocument[] results = new ResultDocument[topResults]; int j = 0; for (int i = 0; i < topResults; i++) { if (allResults[j].DocId == docId) //do not return the doc itself! { j++; } results[i] = allResults[j++]; } return(results); }