public ActionResult RunQuery(int id) { QueryPhrase queryPhrase = db.QueryPhrases.Find(id); Query q = new Query { SearchPhrase = queryPhrase.Phrase }; SearchController searchController = new SearchController(); Stopwatch sw = Stopwatch.StartNew(); Query processedQuery = searchController.ProcessQuery(q); ViewBag.Time = (sw.ElapsedMilliseconds/1000).ToString(); List<double> precision = new List<double>(); List<double> recall = new List<double>(); //add initial precision and recall values precision.Add(100); recall.Add(0); List<string> relevantDocuments = getRelevantDocuments(queryPhrase); double noOfRelevantDocs = relevantDocuments.Count; double relevantDocCount = 1; double resultCount = 1; foreach (var result in processedQuery.Results) { if(relevantDocuments.Contains(result.DocumentName())) { recall.Add((relevantDocCount / noOfRelevantDocs)*100); precision.Add((relevantDocCount / resultCount)*100); relevantDocCount++; } resultCount++; } ViewBag.DataString = CreateDataString(precision, recall); ViewBag.RelevantDocuments = queryPhrase.RelevantDocuments; return View(processedQuery); }
public Query ProcessQuery(Query query) { Stopwatch sw = Stopwatch.StartNew(); List<Result> results = new List<Result>(); long timeInLoop2 = 0; long timeInLoop1 = 0; List<string> termsInQuery = query.termsInQuery().Select(i => i.StemmedText).ToList(); List<Term> termObjectsInQuery = query.termsInQuery().ToList(); List<int> termIdsInQuery = query.termsInQuery().Select(i => i.ID).ToList(); List<TermDocumentWeight> termDocumentWeights = db.TermDocumentWeights.OrderBy(i => i.TermID).ToList(); List<TermDocumentWeight> termDocumentWeightsOrderedByDocument = db.TermDocumentWeights.OrderBy(i => i.DocumentID).ToList(); List<int> termDocWeightIds = db.TermDocumentWeights.Select(i => i.TermID).ToList(); List<Term> terms = db.Terms.ToList(); List<Document> Documents = db.Documents.OrderBy(i => i.ID).ToList(); int noOfDocuments = db.Documents.Count(); double vectorConstruction = 0; foreach (var doc in Documents) { Debug.WriteLine("Document Name: " + doc.Name); List<double> documentTF_IDFVector = new List<double>(); List<double> queryTF_IDFVector = new List<double>(); //This checks if there are any terms in the document that are in the query*, and then adds the query frequency and document frequency entry //to the vector if there is one Stopwatch s3 = Stopwatch.StartNew(); var docTerms = doc.Terms(terms, termDocumentWeightsOrderedByDocument).ToList(); Boolean termsOverlap = docTerms.Select(i => i.StemmedText).Intersect(termsInQuery).Any(); Debug.WriteLine("Time spent checking intersection of query and documents terms: " + s3.ElapsedMilliseconds); timeInLoop1 += s3.ElapsedMilliseconds; if (termsOverlap) { Stopwatch s = Stopwatch.StartNew(); foreach (var term in docTerms) { double inverseDocumentFrequency = Math.Log10(noOfDocuments / termDocWeightIds.Count(i => i == term.ID)/*termDocumentWeights.Count(i => i.TermID == term.ID)*/); queryTF_IDFVector.Add((inverseDocumentFrequency * term.getQueryFrequency(query, termIdsInQuery))); documentTF_IDFVector.Add((inverseDocumentFrequency * term.getFrequency(doc, termDocumentWeights))); } timeInLoop2 += s.ElapsedMilliseconds; Debug.WriteLine("Time spent constructing vector " + s.ElapsedMilliseconds); Debug.WriteLine("queryTFIDF Vector for doc " + doc.Name); foreach (var i in queryTF_IDFVector) { Debug.WriteLine(i); } Debug.WriteLine("documentTFIDF Vector for doc " + doc.Name); foreach (var i in documentTF_IDFVector) { Debug.WriteLine(i); } } //this calculates the similarity, by getting the dot product of each vector double similarity = 0; int count = 0; //normalizing each vector Vectors vector = new Vectors(); double normalizedQueryValue = vector.NormalizeVector(queryTF_IDFVector); queryTF_IDFVector = queryTF_IDFVector.Select(i => i / normalizedQueryValue).ToList(); double normalizedDocumentValue = vector.NormalizeVector(documentTF_IDFVector); documentTF_IDFVector = documentTF_IDFVector.Select(i => i / normalizedDocumentValue).ToList(); if (queryTF_IDFVector.Any(i => i > 0)) { Stopwatch s2 = Stopwatch.StartNew(); foreach (var instance in queryTF_IDFVector) { if (instance != 0) { similarity += documentTF_IDFVector.ElementAt(count) * instance; } count++; } vectorConstruction += s2.ElapsedMilliseconds; } Result result = new Result { DocumentID = doc.ID, Similarity = similarity }; if (result.Similarity > 0) results.Add(result); } Debug.WriteLine("timeInLoop2: " + timeInLoop2); Debug.WriteLine("timeInLoop1: " + timeInLoop1); Debug.WriteLine("Vector Construction: " + vectorConstruction); query.Results = results.OrderByDescending(i=>i.Similarity).ToList(); sw.Stop(); ViewBag.Time = (sw.ElapsedMilliseconds/1000).ToString(); return query; }
public int getQueryFrequency(Query q, List<int> termIdsInQuery) { return termIdsInQuery.Count(i => i == ID); }
public ActionResult Index(Query query) { Query processedQuery = ProcessQuery(query); return View("SearchResult", processedQuery); }