public static IList <int> Parse(string query, WordsVector wordsVector, DocumentsVector documentsVector) { var query_tfidf = ComputeQueryTfIdfVector(query, wordsVector, documentsVector.DocumentsIndex().Count); var documentsIndex = DocumentsOccuredInQuery(wordsVector, query_tfidf); var similarities = new Dictionary <int, double>(); foreach (var index in documentsIndex) { var similarity = ComputeCosineSimilarity( documentsVector.GetVectorOfDocumentIndex(index), query_tfidf); similarities.Add(index, similarity); } return(SortTheDictionaryOnValue(similarities)); }
public IList <int> Parse(string _query, WordsVector wordsVector, DocumentsVector documentsVector, HashSet <int> universalSet) { var query = Preprocessor.GetInstance().PreprocessQuery(_query); if (ProximityQueryRegex.IsMatch(query)) { return(ProximityQuery.Parse(query, wordsVector)); } else if (GeneralTextQueryRegex.IsMatch(query)) { return(GeneralTextQueryParser.Parse(query, wordsVector, documentsVector)); } else if (BooleanQueryRegex.IsMatch(query)) { return(BooleanQueryParser.Parse(query, universalSet, wordsVector)); } else { throw new Exception("Invalid Query"); } }
public void test() { string[] sentences = { "Articles Articles the English.", "Articles in the Arabic language.", "I love Playing cricket." }; var expected = new DocumentsVector(); for (int i = 0; i < 3; i++) { var vector = new Vector(sentences[i]); expected.Update(i, vector); } Dictionary <int, Vector> DV_dictionary = new Dictionary <int, Vector>(); DV_dictionary.Add(0, new Vector("Articles Articles the English.")); DV_dictionary.Add(1, new Vector("Articles in the Arabic language.")); DV_dictionary.Add(2, new Vector("I love Playing cricket.")); var actual = new DocumentsVector(DV_dictionary); Assert.AreEqual(expected, actual); }
private DataStorage() { FYP_Data = new Dictionary <int, FYPSearchModel>(); wordsVector = new WordsVector(); documentsVector = new DocumentsVector(); }