/// public List <String> semanticSearch(String query, String relevant, string[] cityCheckedByUser) { Dictionary <String, double> rankedDocuments; Document queryInSidDoc = new Document("", "", "", "", query, ""); List <String> max50Docs = new List <String>(); List <String> m_max50Docs = new List <String>(); Document queryInSidDocRelevant = new Document("", "", "", "", relevant, ""); Token[] tokensRelevant = parser.processDoc(queryInSidDocRelevant); // false in parser min that it is query (not document) Token[] tokens = parser.processDoc(queryInSidDoc); if (stemmed) { tokens = stemming.StemTokens(tokens); } if (cityCheckedByUser != null) { rankedDocuments = ranker.processQuerySemanticallyWithCities(tokens, tokensRelevant, cityCheckedByUser); } else { rankedDocuments = ranker.processQuerySemantically(tokens, tokensRelevant); } List <KeyValuePair <string, double> > sorted = (from kv in rankedDocuments orderby kv.Value select kv).ToList(); sorted.Reverse(); // we return only 50 docs to query for (int i = 0; i < 50 && i < sorted.Count; i++) { max50Docs.Add(sorted[i].Key.ToString()); // m_max50Docs.Add(m_sorted[i].Key.ToString()); } return(max50Docs); }
public static void Main() { Parser parser = new Parser(@"C:\Users\Hadar\Desktop\לימודים\סמסטר ז\אחזור\מנוע חלק ב\SearchEngine.v3.1\SearchEngine.v2.1\stop_words.txt", false); VariablesAjuster va = new VariablesAjuster(@"C:\Users\Hadar\Desktop\לימודים\סמסטר ז\אחזור\מנוע חלק ב\SearchEngine.v3.1\SearchEngine.v2.1\qrels.txt"); //QueryMutator qm = new QueryMutator(@"X:\Junk\glove.6B.100dc.vec", 1); Ranker ranker = new Ranker(@"C:\a\results", false, @"C:\a\glove.6B.100dc.vec"); double cosSimVal = 0.01; double bm25Val = 0.01; double maxCosSim = 0; double maxBM25 = 0; int max = 0; String queries = File.ReadAllText(@"C:\Users\Hadar\Desktop\לימודים\סמסטר ז\אחזור\מנוע חלק ב\SearchEngine.v3.1\SearchEngine.v2.1\queries.txt"); String[] q = queries.Split(new string[] { "\r\n\r\n\r\n" }, StringSplitOptions.RemoveEmptyEntries); Token[][] arr = new Token[15][]; Token[][] relevant = new Token[15][]; //Token[][] irrelevant = new Token[15][]; Query[] col = new Query[15]; for (int i = 0; i < 15; i++) { col[i] = new Query(q[i]); arr[i] = parser.processDoc(new Document(null, null, null, null, col[i].getQuery(), null)); relevant[i] = parser.processDoc(new Document(null, null, null, null, col[i].getRelevant(), null)); //irrelevant[i] = parser.processDoc(new Document(null, null, null, null, col[i].getNonRelevant(), null)); } for (int i = 0; i < 20; i++) { for (int j = 0; j < 20; j++) { //ranker.bm25Mult = bm25Val; //ranker.cosSimMult = cosSimVal; int score = 0; //calculate query and compare for (int k = 0; k < 15; k++) { //Token[][] mutated = qm.getPermutations(arr[k]); //Dictionary<string,double> queryResult= ranker.processQuery(arr[k],relevant[k]); Dictionary <string, double> queryResult = ranker.processQuerySemantically(arr[k], relevant[k]); //Dictionary<string, double> irrelevantResults = ranker.processQuery(irrelevant[k]); //queryResult = va.manipulateResults(queryResult, irrelevantResults, "substract"); score += va.compareResults(col[k], queryResult); } Console.WriteLine("BM=" + Math.Round(bm25Val, 2) + " CosSim=" + Math.Round(cosSimVal, 2) + " Score: " + score); //compare with max if larger - update if (score > max) { max = score; maxBM25 = bm25Val; maxCosSim = cosSimVal; } bm25Val += 0.05; } cosSimVal += 0.05; bm25Val = 0.01; } Console.WriteLine("MAX: BM=" + Math.Round(maxBM25, 2) + " CosSim=" + Math.Round(maxCosSim, 2) + " Score: " + max); }