示例#1
0
        public List<KeyValuePair<string, double>> RankedResults()
        {
            List<Document> documents = new List<Document>();
            HashSet<string> dataSet = new HashSet<string>();
            foreach (var result in results)
            {
                Document d = new Document(result);
                documents.Add(d);
                foreach (var term in d.tokens()){
                    dataSet.Add(term);
                }
            }
            //Build Document Vectors
            Dictionary<string, Vector> documentVectors = new Dictionary<string, Vector>();
            foreach (var document in documents)
            {
                documentVectors.Add(document.ToString(), new Vector(dataSet, document));
            }
            //Build Query Vector
            Query query = new Query(queryString);
            Vector queryVector = new Vector(dataSet, query);

            Dictionary<string, double> relevance = new Dictionary<string, double>();
            foreach (var documentVector in documentVectors)
            {
                relevance.Add(documentVector.Key, Vector.GetSimilarityScore(queryVector, documentVector.Value));
            }
            //Sort result by most relevant
            List<KeyValuePair<string, double>> myList = relevance.ToList();
            return myList;
        }
示例#2
0
        public static void add(Document document)
        {
            numOfDocuments += 1;
            List<string> terms = document.tokens();
            for (int i = 0; i < terms.Count; i++)
            {
                //term is already in index
                if (index.ContainsKey(terms[i]))
                {
                    Dictionary<string, List<int>> temp = index[terms[i]];
                    //term already exists in document
                    if (temp.ContainsKey(document.ToString()))
                    {
                        tfIndex[terms[i]][document.ToString()] += 1;//update tfIndex
                        temp[document.ToString()].Add(i);
                        index[terms[i]] = temp;
                    }
                    //first occurence in a document of term that already exists
                    else
                    {
                        dfIndex[terms[i]] += 1;
                        tfIndex[terms[i]][document.ToString()] = 1;
                        List<int> positionsList = new List<int>();
                        positionsList.Add(i);
                        temp[document.ToString()] = positionsList;
                    }
                }
                //new term entry
                else
                {
                    Dictionary<string, List<int>> temp = new Dictionary<string, List<int>>();
                    List<int> positionsList = new List<int>();
                    positionsList.Add(i);
                    temp[document.ToString()] = positionsList;
                    index[terms[i]] = temp;

                    Dictionary<string, int> tf = new Dictionary<string, int>();
                    tf[document.ToString()] = 1;
                    tfIndex[terms[i]] = tf;
                    dfIndex[terms[i]] = 1;
                }
            }
            serialize();
        }
示例#3
0
        static void Main(string[] args)
        {
            Document a = new Document("C:\\Users\\LOLU\\Documents\\csc322\\doc1", "txt");
            Document b = new Document("C:\\Users\\LOLU\\Documents\\csc322\\doc2", "txt");
            Document c = new Document("C:\\Users\\LOLU\\Books~Tutorials\\OSS2014.pdf");
            Document d = new Document("C:\\Users\\LOLU\\Books~Tutorials\\codility lessons\\1-TimeComplexity.pdf");
            Document e = new Document("C:\\Users\\LOLU\\Books~Tutorials\\codility lessons\\2-CountingElements.pdf");
            Document f = new Document("C:\\Users\\LOLU\\Books~Tutorials\\codility lessons\\3-PrefixSums.pdf");
            Document g = new Document("C:\\Users\\LOLU\\Documents\\csc322\\test", "html");

            InvertedIndex.add(a);
            InvertedIndex.add(b);
            InvertedIndex.add(c);

            /*
            Query query = new Query("open source information");
            Console.WriteLine(query.QueryType());
            Console.WriteLine(query.tokens().Count);
            foreach(var item in query.RankedResults()){
                Console.WriteLine(item);
            }*/
        }
示例#4
0
 public Vector(HashSet<string> dataSet, Document document)
 {
     this.dataSet = dataSet;
     vectorRep = document.GetVector(dataSet);
 }