public ActionResult RunQuery(int id)
        {
            QueryPhrase queryPhrase = db.QueryPhrases.Find(id);
            Query q = new Query { SearchPhrase = queryPhrase.Phrase };

            SearchController searchController = new SearchController();
            Stopwatch sw = Stopwatch.StartNew();
            Query processedQuery = searchController.ProcessQuery(q);
            ViewBag.Time = (sw.ElapsedMilliseconds/1000).ToString();
            List<double> precision = new List<double>();
            List<double> recall = new List<double>();

            //add initial precision and recall values
            precision.Add(100);
            recall.Add(0);
            List<string> relevantDocuments = getRelevantDocuments(queryPhrase);
            double noOfRelevantDocs = relevantDocuments.Count;

            double relevantDocCount = 1;
            double resultCount = 1;
            foreach (var result in processedQuery.Results)
            {
                if(relevantDocuments.Contains(result.DocumentName()))
                {
                    recall.Add((relevantDocCount / noOfRelevantDocs)*100);
                    precision.Add((relevantDocCount / resultCount)*100);
                    relevantDocCount++;
                }
                resultCount++;
            }
            ViewBag.DataString = CreateDataString(precision, recall);
            ViewBag.RelevantDocuments = queryPhrase.RelevantDocuments;
            return View(processedQuery);
        }
        public Query ProcessQuery(Query query)
        {
            Stopwatch sw = Stopwatch.StartNew();
            List<Result> results = new List<Result>();
            long timeInLoop2 = 0;
            long timeInLoop1 = 0;
            List<string> termsInQuery = query.termsInQuery().Select(i => i.StemmedText).ToList();
            List<Term> termObjectsInQuery = query.termsInQuery().ToList();
            List<int> termIdsInQuery = query.termsInQuery().Select(i => i.ID).ToList();
            List<TermDocumentWeight> termDocumentWeights = db.TermDocumentWeights.OrderBy(i => i.TermID).ToList();
            List<TermDocumentWeight> termDocumentWeightsOrderedByDocument = db.TermDocumentWeights.OrderBy(i => i.DocumentID).ToList();
            List<int> termDocWeightIds = db.TermDocumentWeights.Select(i => i.TermID).ToList();
            List<Term> terms = db.Terms.ToList();
            List<Document> Documents = db.Documents.OrderBy(i => i.ID).ToList();
            int noOfDocuments = db.Documents.Count();
            double vectorConstruction = 0;
            foreach (var doc in Documents)
            {
                Debug.WriteLine("Document Name: " + doc.Name);
                List<double> documentTF_IDFVector = new List<double>();
                List<double> queryTF_IDFVector = new List<double>();

                //This checks if there are any terms in the document that are in the query*, and then adds the query frequency and document frequency entry
                //to the vector if there is one
                Stopwatch s3 = Stopwatch.StartNew();
                var docTerms = doc.Terms(terms, termDocumentWeightsOrderedByDocument).ToList();
                Boolean termsOverlap = docTerms.Select(i => i.StemmedText).Intersect(termsInQuery).Any();
                Debug.WriteLine("Time spent checking intersection of query and documents terms: " + s3.ElapsedMilliseconds);
                timeInLoop1 += s3.ElapsedMilliseconds;

                if (termsOverlap)
                {
                    Stopwatch s = Stopwatch.StartNew();
                    foreach (var term in docTerms)
                    {
                        double inverseDocumentFrequency = Math.Log10(noOfDocuments / termDocWeightIds.Count(i => i == term.ID)/*termDocumentWeights.Count(i => i.TermID == term.ID)*/);
                        queryTF_IDFVector.Add((inverseDocumentFrequency * term.getQueryFrequency(query, termIdsInQuery)));
                        documentTF_IDFVector.Add((inverseDocumentFrequency * term.getFrequency(doc, termDocumentWeights)));
                    }
                    timeInLoop2 += s.ElapsedMilliseconds;
                    Debug.WriteLine("Time spent constructing vector " + s.ElapsedMilliseconds);
                    Debug.WriteLine("queryTFIDF Vector for doc " + doc.Name);
                    foreach (var i in queryTF_IDFVector)
                    {
                        Debug.WriteLine(i);
                    }
                    Debug.WriteLine("documentTFIDF Vector for doc " + doc.Name);
                    foreach (var i in documentTF_IDFVector)
                    {
                        Debug.WriteLine(i);
                    }
                }

                //this calculates the similarity, by getting the dot product of each vector
                double similarity = 0;
                int count = 0;

                //normalizing each vector
                Vectors vector = new Vectors();
                double normalizedQueryValue = vector.NormalizeVector(queryTF_IDFVector);
                queryTF_IDFVector = queryTF_IDFVector.Select(i => i / normalizedQueryValue).ToList();
                double normalizedDocumentValue = vector.NormalizeVector(documentTF_IDFVector);
                documentTF_IDFVector = documentTF_IDFVector.Select(i => i / normalizedDocumentValue).ToList();

                if (queryTF_IDFVector.Any(i => i > 0))
                {
                    Stopwatch s2 = Stopwatch.StartNew();
                    foreach (var instance in queryTF_IDFVector)
                    {
                        if (instance != 0)
                        {
                            similarity += documentTF_IDFVector.ElementAt(count) * instance;
                        }
                        count++;
                    }
                    vectorConstruction += s2.ElapsedMilliseconds;
                }

                Result result = new Result { DocumentID = doc.ID, Similarity = similarity };
                if (result.Similarity > 0)
                    results.Add(result);
            }
            Debug.WriteLine("timeInLoop2: " + timeInLoop2);
            Debug.WriteLine("timeInLoop1: " + timeInLoop1);
            Debug.WriteLine("Vector Construction: " + vectorConstruction);
            query.Results = results.OrderByDescending(i=>i.Similarity).ToList();
            sw.Stop();
            ViewBag.Time = (sw.ElapsedMilliseconds/1000).ToString();
            return query;
        }
Beispiel #3
0
 public int getQueryFrequency(Query q, List<int> termIdsInQuery)
 {
     return termIdsInQuery.Count(i => i == ID);
 }
 public ActionResult Index(Query query)
 {
     Query processedQuery = ProcessQuery(query);
     return View("SearchResult", processedQuery);
 }