Example #1
0
        public List<KeyValuePair<string, double>> RankedResults()
        {
            List<Document> documents = new List<Document>();
            HashSet<string> dataSet = new HashSet<string>();
            foreach (var result in results)
            {
                Document d = new Document(result);
                documents.Add(d);
                foreach (var term in d.tokens()){
                    dataSet.Add(term);
                }
            }
            //Build Document Vectors
            Dictionary<string, Vector> documentVectors = new Dictionary<string, Vector>();
            foreach (var document in documents)
            {
                documentVectors.Add(document.ToString(), new Vector(dataSet, document));
            }
            //Build Query Vector
            Query query = new Query(queryString);
            Vector queryVector = new Vector(dataSet, query);

            Dictionary<string, double> relevance = new Dictionary<string, double>();
            foreach (var documentVector in documentVectors)
            {
                relevance.Add(documentVector.Key, Vector.GetSimilarityScore(queryVector, documentVector.Value));
            }
            //Sort result by most relevant
            List<KeyValuePair<string, double>> myList = relevance.ToList();
            return myList;
        }
Example #2
0
        public static void add(Document document)
        {
            numOfDocuments += 1;
            List<string> terms = document.tokens();
            for (int i = 0; i < terms.Count; i++)
            {
                //term is already in index
                if (index.ContainsKey(terms[i]))
                {
                    Dictionary<string, List<int>> temp = index[terms[i]];
                    //term already exists in document
                    if (temp.ContainsKey(document.ToString()))
                    {
                        tfIndex[terms[i]][document.ToString()] += 1;//update tfIndex
                        temp[document.ToString()].Add(i);
                        index[terms[i]] = temp;
                    }
                    //first occurence in a document of term that already exists
                    else
                    {
                        dfIndex[terms[i]] += 1;
                        tfIndex[terms[i]][document.ToString()] = 1;
                        List<int> positionsList = new List<int>();
                        positionsList.Add(i);
                        temp[document.ToString()] = positionsList;
                    }
                }
                //new term entry
                else
                {
                    Dictionary<string, List<int>> temp = new Dictionary<string, List<int>>();
                    List<int> positionsList = new List<int>();
                    positionsList.Add(i);
                    temp[document.ToString()] = positionsList;
                    index[terms[i]] = temp;

                    Dictionary<string, int> tf = new Dictionary<string, int>();
                    tf[document.ToString()] = 1;
                    tfIndex[terms[i]] = tf;
                    dfIndex[terms[i]] = 1;
                }
            }
            serialize();
        }