public Searcher(bool stem) { stemmed = stem; stemming = new StemmingSequence(); parser = new Parser(StartDialog.stopWordsPath, false); ranker = new Ranker(StartDialog.indexPath, stemmed, @"C:\Users\welta\Desktop\New folder\glove.840B.300d\glove.840B.300d.vec" /*@"glove.6B.100dc.vec"*/); }
public Searcher(Parser p, Indexer ind, Ranker r, string path) { this.p = p; this.ind = ind; this.r = r; this.path = path; }
public Searcher(bool stem) { stemmed = stem; stemming = new StemmingSequence(); parser = new Parser(StartDialog.stopWordsPath, false); ranker = new Ranker(StartDialog.indexPath, stemmed, @"glove.6B.100dc.vec"); }
static void Main(string[] args) { Crawler c = new Crawler(); Indexer i = new Indexer(c); Ranker r = new Ranker(c, i); string words = ""; DateTime time = DateTime.Now; c.StartCrawling("https://www.cbsnews.com/"); var index = i.StartIndexing(); Console.WriteLine("\n" + (DateTime.Now - time)); /*while (!words.Equals("end")) * { * Console.WriteLine("type a word:"); * words = Console.ReadLine(); * r.PrintSearchResult(r.GetPagesWithWords(words, index), words); * } * * List<List<double>> a = r.MakePageRankMatrix(); * Console.WriteLine(); * foreach (var item in a[1]) * { * Console.Write(" " + item); * }*/ Console.ReadKey(); }
public static void Main() { Parser parser = new Parser(@"C:\Users\Hadar\Desktop\לימודים\סמסטר ז\אחזור\מנוע חלק ב\SearchEngine.v3.1\SearchEngine.v2.1\stop_words.txt", false); VariablesAjuster va = new VariablesAjuster(@"C:\Users\Hadar\Desktop\לימודים\סמסטר ז\אחזור\מנוע חלק ב\SearchEngine.v3.1\SearchEngine.v2.1\qrels.txt"); //QueryMutator qm = new QueryMutator(@"X:\Junk\glove.6B.100dc.vec", 1); Ranker ranker = new Ranker(@"C:\a\results", false, @"C:\a\glove.6B.100dc.vec"); double cosSimVal = 0.01; double bm25Val = 0.01; double maxCosSim = 0; double maxBM25 = 0; int max = 0; String queries = File.ReadAllText(@"C:\Users\Hadar\Desktop\לימודים\סמסטר ז\אחזור\מנוע חלק ב\SearchEngine.v3.1\SearchEngine.v2.1\queries.txt"); String[] q = queries.Split(new string[] { "\r\n\r\n\r\n" }, StringSplitOptions.RemoveEmptyEntries); Token[][] arr = new Token[15][]; Token[][] relevant = new Token[15][]; //Token[][] irrelevant = new Token[15][]; Query[] col = new Query[15]; for (int i = 0; i < 15; i++) { col[i] = new Query(q[i]); arr[i] = parser.processDoc(new Document(null, null, null, null, col[i].getQuery(), null)); relevant[i] = parser.processDoc(new Document(null, null, null, null, col[i].getRelevant(), null)); //irrelevant[i] = parser.processDoc(new Document(null, null, null, null, col[i].getNonRelevant(), null)); } for (int i = 0; i < 20; i++) { for (int j = 0; j < 20; j++) { //ranker.bm25Mult = bm25Val; //ranker.cosSimMult = cosSimVal; int score = 0; //calculate query and compare for (int k = 0; k < 15; k++) { //Token[][] mutated = qm.getPermutations(arr[k]); //Dictionary<string,double> queryResult= ranker.processQuery(arr[k],relevant[k]); Dictionary <string, double> queryResult = ranker.processQuerySemantically(arr[k], relevant[k]); //Dictionary<string, double> irrelevantResults = ranker.processQuery(irrelevant[k]); //queryResult = va.manipulateResults(queryResult, irrelevantResults, "substract"); score += va.compareResults(col[k], queryResult); } Console.WriteLine("BM=" + Math.Round(bm25Val, 2) + " CosSim=" + Math.Round(cosSimVal, 2) + " Score: " + score); //compare with max if larger - update if (score > max) { max = score; maxBM25 = bm25Val; maxCosSim = cosSimVal; } bm25Val += 0.05; } cosSimVal += 0.05; bm25Val = 0.01; } Console.WriteLine("MAX: BM=" + Math.Round(maxBM25, 2) + " CosSim=" + Math.Round(maxCosSim, 2) + " Score: " + max); }
public static void Main() { Parser parser = new Parser(@"stop_words.txt", false); VariablesAjuster va = new VariablesAjuster(@"qrel.txt"); //QueryMutator qm = new QueryMutator(@"X:\Junk\glove.6B.100dc.vec", 1); Ranker ranker = new Ranker(@"D:\Posting", false, @"C:\a\glove.6B.100dc.vec"); double b = 0.00; double k = 1.2; double maxB = 0; double maxK = 0; int max = 0; String queries = File.ReadAllText(@"queries.txt"); String[] q = queries.Split(new string[] { "\r\n\r\n\r\n" }, StringSplitOptions.RemoveEmptyEntries); Token[][] arr = new Token[15][]; Token[][] relevant = new Token[15][]; //Token[][] irrelevant = new Token[15][]; Query[] col = new Query[15]; for (int i = 0; i < 15; i++) { col[i] = new Query(q[i]); arr[i] = parser.processDoc(new Document(null, null, null, null, col[i].getQuery(), null)); relevant[i] = parser.processDoc(new Document(null, null, null, null, col[i].getRelevant(), null)); //irrelevant[i] = parser.processDoc(new Document(null, null, null, null, col[i].getNonRelevant(), null)); } for (int i = 0; i < 16; i++) { for (int j = 0; j < 20; j++) { ranker.b = b; ranker.k = k; int score = 0; //calculate query and compare for (int ki = 0; ki < 15; ki++) { //Token[][] mutated = qm.getPermutations(arr[k]); Dictionary <string, double> queryResult = ranker.processQuery(arr[ki], relevant[ki]); //Dictionary<string, double> queryResult = ranker.processQuerySemantically(arr[k], relevant[k]); //Dictionary<string, double> irrelevantResults = ranker.processQuery(irrelevant[k]); //queryResult = va.manipulateResults(queryResult, irrelevantResults, "substract"); score += va.compareResults(col[ki], queryResult); } Console.WriteLine("k=" + Math.Round(k, 2) + " b=" + Math.Round(b, 2) + " Score: " + score); //compare with max if larger - update if (score > max) { max = score; maxB = b; maxK = k; } b += 0.05; } k += 0.05; b = 0.00; } Console.WriteLine("MAX: B=" + Math.Round(maxB, 2) + " K=" + Math.Round(maxK, 2) + " Score: " + max); }
public List <KeyValuePair <string, double> > search(string Query) { Dictionary <string, Dictionary <string, int> > QueryDictionary = new Dictionary <string, Dictionary <string, int> >(); Parse p = new Parse(CorpusFolder, false); Ranker ranker = new Ranker(DocInfo, MainDictionary); Dictionary <string, int> afterParse = p.parseFile(new DocumentData(), Query); foreach (string term in afterParse.Keys) { QueryDictionary.Add(term, new Dictionary <string, int>()); if (MainDictionary.ContainsKey(term)) { string path = MainDictionary[term].Path; int line = MainDictionary[term].LineNum; string output; string[] splitedLine; using (var sr = new StreamReader(postingPath + path + ".txt")) { for (int i = 1; i < line; i++) { sr.ReadLine(); } output = sr.ReadLine(); } splitedLine = output.Split('^', '~'); int length = splitedLine.Length; length = length - 2; for (int j = 1; j < length; j = j + 2) { QueryDictionary[term].Add(splitedLine[j], int.Parse(splitedLine[j + 1])); } } else { QueryDictionary[term] = null; } } List <KeyValuePair <string, double> > queryResult = ranker.rank(QueryDictionary, afterParse); queryResult.Reverse(); return(queryResult); }
public List<KeyValuePair<string, double>> RankedResults() { if (QueryType() == "OWQ") { Ranker ranker = new Ranker(queryString, new List<string>(handleOWQ(queryString))); return ranker.RankedResults(); } else if (QueryType() == "MWQ") { Ranker ranker = new Ranker(queryString, new List<string>(handleMWQ())); return ranker.RankedResults(); } else { Ranker ranker = new Ranker(queryString, new List<string>(handlePQ())); return ranker.RankedResults(); } }
/// <summary> /// mail constructor /// </summary> public MainWindow() { ranker = new Ranker(); searcher = new Searcher(ranker); searcher.SearcherChanged += vSearcherChanged; }
//Load all files of part 2-corpus, stop words, cache, dictionary ,posting and rank private void Load2_click(object sender, RoutedEventArgs e) { //Folder Chooser var dlg = new FolderBrowserDialog(); System.Windows.Forms.DialogResult result = dlg.ShowDialog(this.GetIWin32Window()); //change the source path if (dlg.SelectedPath != "") { pathopen = dlg.SelectedPath; pathclose = dlg.SelectedPath; //init all the first part objects ind = new Indexer(pathclose, isStem); p = new Parser(pathopen + @"\stop_words.txt", isStem); r = new ReadFile(pathopen + @"\corpus\"); string dic; string cache; if (isStem)//check if stem { dic = pathclose + @"\CacheDic\dicStem.dicx"; cache = pathclose + @"\CacheDic\cacheStem.chex"; } else { dic = pathclose + @"\CacheDic\dic.dicx"; cache = pathclose + @"\CacheDic\cache.chex"; } try { //load dic using (FileStream fs = new FileStream(dic, FileMode.Open)) { IFormatter bf = new BinaryFormatter(); ind.dic = (Dictionary <string, DicRecord>)bf.Deserialize(fs);//read object } //load cache using (FileStream fs = new FileStream(cache, FileMode.Open)) { IFormatter bf = new BinaryFormatter(); ind.cache = (Dictionary <string, List <PostingInfo> >)bf.Deserialize(fs);//read object } } catch (IOException) { //cant find load and cache files in currect folder System.Windows.Forms.MessageBox.Show("Files Missing, can't Load", "ERROR!", MessageBoxButtons.OK, MessageBoxIcon.Error); return; } //for vieiwing ind.writeTextChache(); showcatch.IsEnabled = true; ind.writeTextDic(); showDic.IsEnabled = true; //new ranker and load the dictionaries of the class if the file exists in the selected folder rank = new Ranker(pathclose, p, r, ind, isStem); searcher = new Searcher(p, ind, rank, pathopen); //open the run btn runQuery.IsEnabled = true; //notify when finished System.Windows.Forms.MessageBox.Show("Ready To search!!", "Done!", MessageBoxButtons.OK, MessageBoxIcon.Information); } }