Ejemplo n.º 1
0
        private List <KeyValuePair <Document, double> > CalculateCosScores(List <string> tokens, DfWrapper wrapper)
        {
            List <KeyValuePair <int, double> > scores = new List <KeyValuePair <int, double> >();

            foreach (Document doc in wrapper.DocumentList)
            {
                double tempVal = 0;
                foreach (string word in tokens)
                {
                    if (doc.TermFrequency != 0)
                    {
                        tempVal += (1 + Math.Log(doc.TermFrequency));
                    }
                }

                scores.Add(new KeyValuePair <int, double>(doc.Id, tempVal));
            }

            List <KeyValuePair <int, double> > length = new List <KeyValuePair <int, double> >();

            foreach (Document item in wrapper.DocumentList)
            {
                double temp = 0;
                foreach (string word in tokens)
                {
                    if (incidenceVector[word].DocumentList.Contains(item))
                    {
                        temp += item.TfIdf * item.TfIdf;
                    }
                }
                length.Add(new KeyValuePair <int, double>(item.Id, Math.Sqrt(temp)));
            }

            List <KeyValuePair <int, double> > finalScores = new List <KeyValuePair <int, double> >();

            foreach (KeyValuePair <int, double> item in scores)
            {
                double lengthVal = -1;
                lengthVal = length.Find(x => x.Key == item.Key).Value;
                if (lengthVal >= 0)
                {
                    finalScores.Add(new KeyValuePair <int, double>(item.Key, item.Value / lengthVal));
                }
            }

            finalScores = finalScores.OrderBy(x => x.Value).ToList();


            List <KeyValuePair <Document, double> > finalDocumentPairs = new List <KeyValuePair <Document, double> >();

            foreach (KeyValuePair <int, double> item in finalScores)
            {
                finalDocumentPairs.Add(new KeyValuePair <Document, double>(wrapper.DocumentList.Find(x => x.Id == item.Key), item.Value));
            }

            return(finalDocumentPairs);
        }
Ejemplo n.º 2
0
        public List <KeyValuePair <Document, double> > PassQuery(string inputQuery)
        {
            List <string> disectedQuery = new List <string>(inputQuery.ToLower().Split(' '));

            disectedQuery.RemoveAll(e => stopWords.Exists(sw => sw.Equals(e)));

            List <string>   foundTokens          = new List <string>();
            List <Document> blacklistedDocuments = new List <Document>();
            int             queryTokenCounter    = 0;

            foreach (string queryWord in disectedQuery)
            {
                if (queryWord == "*not*")
                {
                    if (queryTokenCounter != disectedQuery.Count)
                    {
                        blacklistedDocuments.AddRange(incidenceVector[disectedQuery[queryTokenCounter + 1]].DocumentList);
                    }
                }
                if (incidenceVector.ContainsKey(queryWord) && !foundTokens.Contains(queryWord))
                {
                    foundTokens.Add(queryWord);
                }
                queryTokenCounter++;
            }
            DfWrapper foundPages = new DfWrapper();

            //foundPages.DocumentList.AddRange(ANDpageFinder(foundTokens));
            foundPages.DocumentList.AddRange(ORpageFinder(foundTokens));
            foundPages.DocumentList = foundPages.DocumentList.Distinct().ToList();
            foundPages.DocumentList.RemoveAll(e => blacklistedDocuments.Contains(e));

            //foundPages.DocumentList.ForEach(e => Console.WriteLine(e.Id));

            //REMEMBER 10 IS JUST CHOSEN RANDOMLY - Hardcode
            CalculateTf_Idf(foundPages.DocumentList.Count);

            List <KeyValuePair <Document, double> > cosscore = CalculateCosScores(foundTokens, foundPages);


            double[] pageranks = _crawler.PageRanker(0.10, 200);
            List <KeyValuePair <Document, double> > final = new List <KeyValuePair <Document, double> >();

            for (int i = 0; i < pageranks.Length; i++)
            {
                KeyValuePair <Document, double> temp = cosscore.Find(x => x.Key.Id == i);
                if (temp.Key != null)
                {
                    final.Add(new KeyValuePair <Document, double>(temp.Key, pageranks[i] * temp.Value));
                }
            }

            final.Sort((x, y) => x.Value.CompareTo(y.Value));
            final.Reverse();
            try
            {
                final = final.GetRange(0, 10);
            }
            catch (Exception)
            {
            }



            return(final);
        }