Ejemplo n.º 1
0
 public Searcher(bool stem)
 {
     stemmed  = stem;
     stemming = new StemmingSequence();
     parser   = new Parser(StartDialog.stopWordsPath, false);
     ranker   = new Ranker(StartDialog.indexPath, stemmed, @"C:\Users\welta\Desktop\New folder\glove.840B.300d\glove.840B.300d.vec" /*@"glove.6B.100dc.vec"*/);
 }
Ejemplo n.º 2
0
 public Searcher(Parser p, Indexer ind, Ranker r, string path)
 {
     this.p    = p;
     this.ind  = ind;
     this.r    = r;
     this.path = path;
 }
Ejemplo n.º 3
0
 public Searcher(bool stem)
 {
     stemmed  = stem;
     stemming = new StemmingSequence();
     parser   = new Parser(StartDialog.stopWordsPath, false);
     ranker   = new Ranker(StartDialog.indexPath, stemmed, @"glove.6B.100dc.vec");
 }
Ejemplo n.º 4
0
        static void Main(string[] args)
        {
            Crawler c     = new Crawler();
            Indexer i     = new Indexer(c);
            Ranker  r     = new Ranker(c, i);
            string  words = "";

            DateTime time = DateTime.Now;

            c.StartCrawling("https://www.cbsnews.com/");
            var index = i.StartIndexing();

            Console.WriteLine("\n" + (DateTime.Now - time));

            /*while (!words.Equals("end"))
             * {
             *  Console.WriteLine("type a word:");
             *  words = Console.ReadLine();
             *  r.PrintSearchResult(r.GetPagesWithWords(words, index), words);
             * }
             *
             * List<List<double>> a = r.MakePageRankMatrix();
             * Console.WriteLine();
             * foreach (var item in a[1])
             * {
             *  Console.Write(" " + item);
             * }*/
            Console.ReadKey();
        }
Ejemplo n.º 5
0
        public static void Main()
        {
            Parser           parser = new Parser(@"C:\Users\Hadar\Desktop\לימודים\סמסטר ז\אחזור\מנוע חלק ב\SearchEngine.v3.1\SearchEngine.v2.1\stop_words.txt", false);
            VariablesAjuster va     = new VariablesAjuster(@"C:\Users\Hadar\Desktop\לימודים\סמסטר ז\אחזור\מנוע חלק ב\SearchEngine.v3.1\SearchEngine.v2.1\qrels.txt");
            //QueryMutator qm = new QueryMutator(@"X:\Junk\glove.6B.100dc.vec", 1);
            Ranker ranker    = new Ranker(@"C:\a\results", false, @"C:\a\glove.6B.100dc.vec");
            double cosSimVal = 0.01;
            double bm25Val   = 0.01;
            double maxCosSim = 0;
            double maxBM25   = 0;
            int    max       = 0;
            String queries   = File.ReadAllText(@"C:\Users\Hadar\Desktop\לימודים\סמסטר ז\אחזור\מנוע חלק ב\SearchEngine.v3.1\SearchEngine.v2.1\queries.txt");

            String[]  q        = queries.Split(new string[] { "\r\n\r\n\r\n" }, StringSplitOptions.RemoveEmptyEntries);
            Token[][] arr      = new Token[15][];
            Token[][] relevant = new Token[15][];
            //Token[][] irrelevant = new Token[15][];
            Query[] col = new Query[15];
            for (int i = 0; i < 15; i++)
            {
                col[i]      = new Query(q[i]);
                arr[i]      = parser.processDoc(new Document(null, null, null, null, col[i].getQuery(), null));
                relevant[i] = parser.processDoc(new Document(null, null, null, null, col[i].getRelevant(), null));
                //irrelevant[i] = parser.processDoc(new Document(null, null, null, null, col[i].getNonRelevant(), null));
            }

            for (int i = 0; i < 20; i++)
            {
                for (int j = 0; j < 20; j++)
                {
                    //ranker.bm25Mult = bm25Val;
                    //ranker.cosSimMult = cosSimVal;
                    int score = 0;
                    //calculate query and compare
                    for (int k = 0; k < 15; k++)
                    {
                        //Token[][] mutated = qm.getPermutations(arr[k]);
                        //Dictionary<string,double> queryResult= ranker.processQuery(arr[k],relevant[k]);
                        Dictionary <string, double> queryResult = ranker.processQuerySemantically(arr[k], relevant[k]);

                        //Dictionary<string, double> irrelevantResults = ranker.processQuery(irrelevant[k]);
                        //queryResult = va.manipulateResults(queryResult, irrelevantResults, "substract");
                        score += va.compareResults(col[k], queryResult);
                    }
                    Console.WriteLine("BM=" + Math.Round(bm25Val, 2) + " CosSim=" + Math.Round(cosSimVal, 2) + " Score: " + score);
                    //compare with max if larger - update
                    if (score > max)
                    {
                        max       = score;
                        maxBM25   = bm25Val;
                        maxCosSim = cosSimVal;
                    }
                    bm25Val += 0.05;
                }
                cosSimVal += 0.05;
                bm25Val    = 0.01;
            }
            Console.WriteLine("MAX: BM=" + Math.Round(maxBM25, 2) + " CosSim=" + Math.Round(maxCosSim, 2) + " Score: " + max);
        }
Ejemplo n.º 6
0
        public static void Main()
        {
            Parser           parser = new Parser(@"stop_words.txt", false);
            VariablesAjuster va     = new VariablesAjuster(@"qrel.txt");
            //QueryMutator qm = new QueryMutator(@"X:\Junk\glove.6B.100dc.vec", 1);
            Ranker ranker  = new Ranker(@"D:\Posting", false, @"C:\a\glove.6B.100dc.vec");
            double b       = 0.00;
            double k       = 1.2;
            double maxB    = 0;
            double maxK    = 0;
            int    max     = 0;
            String queries = File.ReadAllText(@"queries.txt");

            String[]  q        = queries.Split(new string[] { "\r\n\r\n\r\n" }, StringSplitOptions.RemoveEmptyEntries);
            Token[][] arr      = new Token[15][];
            Token[][] relevant = new Token[15][];
            //Token[][] irrelevant = new Token[15][];
            Query[] col = new Query[15];
            for (int i = 0; i < 15; i++)
            {
                col[i]      = new Query(q[i]);
                arr[i]      = parser.processDoc(new Document(null, null, null, null, col[i].getQuery(), null));
                relevant[i] = parser.processDoc(new Document(null, null, null, null, col[i].getRelevant(), null));
                //irrelevant[i] = parser.processDoc(new Document(null, null, null, null, col[i].getNonRelevant(), null));
            }

            for (int i = 0; i < 16; i++)
            {
                for (int j = 0; j < 20; j++)
                {
                    ranker.b = b;
                    ranker.k = k;
                    int score = 0;
                    //calculate query and compare
                    for (int ki = 0; ki < 15; ki++)
                    {
                        //Token[][] mutated = qm.getPermutations(arr[k]);
                        Dictionary <string, double> queryResult = ranker.processQuery(arr[ki], relevant[ki]);
                        //Dictionary<string, double> queryResult = ranker.processQuerySemantically(arr[k], relevant[k]);

                        //Dictionary<string, double> irrelevantResults = ranker.processQuery(irrelevant[k]);
                        //queryResult = va.manipulateResults(queryResult, irrelevantResults, "substract");
                        score += va.compareResults(col[ki], queryResult);
                    }
                    Console.WriteLine("k=" + Math.Round(k, 2) + " b=" + Math.Round(b, 2) + " Score: " + score);
                    //compare with max if larger - update
                    if (score > max)
                    {
                        max  = score;
                        maxB = b;
                        maxK = k;
                    }
                    b += 0.05;
                }
                k += 0.05;
                b  = 0.00;
            }
            Console.WriteLine("MAX: B=" + Math.Round(maxB, 2) + " K=" + Math.Round(maxK, 2) + " Score: " + max);
        }
Ejemplo n.º 7
0
        public List <KeyValuePair <string, double> > search(string Query)
        {
            Dictionary <string, Dictionary <string, int> > QueryDictionary = new Dictionary <string, Dictionary <string, int> >();

            Parse  p      = new Parse(CorpusFolder, false);
            Ranker ranker = new Ranker(DocInfo, MainDictionary);
            Dictionary <string, int> afterParse = p.parseFile(new DocumentData(), Query);

            foreach (string term in afterParse.Keys)
            {
                QueryDictionary.Add(term, new Dictionary <string, int>());
                if (MainDictionary.ContainsKey(term))
                {
                    string   path = MainDictionary[term].Path;
                    int      line = MainDictionary[term].LineNum;
                    string   output;
                    string[] splitedLine;
                    using (var sr = new StreamReader(postingPath + path + ".txt"))
                    {
                        for (int i = 1; i < line; i++)
                        {
                            sr.ReadLine();
                        }
                        output = sr.ReadLine();
                    }

                    splitedLine = output.Split('^', '~');
                    int length = splitedLine.Length;
                    length = length - 2;
                    for (int j = 1; j < length; j = j + 2)
                    {
                        QueryDictionary[term].Add(splitedLine[j], int.Parse(splitedLine[j + 1]));
                    }
                }

                else
                {
                    QueryDictionary[term] = null;
                }
            }
            List <KeyValuePair <string, double> > queryResult = ranker.rank(QueryDictionary, afterParse);

            queryResult.Reverse();


            return(queryResult);
        }
Ejemplo n.º 8
0
 public List<KeyValuePair<string, double>> RankedResults()
 {
     if (QueryType() == "OWQ")
     {
         Ranker ranker = new Ranker(queryString, new List<string>(handleOWQ(queryString)));
         return ranker.RankedResults();
     }
     else if (QueryType() == "MWQ")
     {
         Ranker ranker = new Ranker(queryString, new List<string>(handleMWQ()));
         return ranker.RankedResults();
     }
     else
     {
         Ranker ranker = new Ranker(queryString, new List<string>(handlePQ()));
         return ranker.RankedResults();
     }
 }
Ejemplo n.º 9
0
 /// <summary>
 /// mail constructor
 /// </summary>
 public MainWindow()
 {
     ranker   = new Ranker();
     searcher = new Searcher(ranker);
     searcher.SearcherChanged += vSearcherChanged;
 }
Ejemplo n.º 10
0
        //Load all files of part 2-corpus, stop words, cache, dictionary ,posting and rank
        private void Load2_click(object sender, RoutedEventArgs e)
        {
            //Folder Chooser
            var dlg = new FolderBrowserDialog();

            System.Windows.Forms.DialogResult result = dlg.ShowDialog(this.GetIWin32Window());
            //change the source path
            if (dlg.SelectedPath != "")
            {
                pathopen  = dlg.SelectedPath;
                pathclose = dlg.SelectedPath;
                //init all the first part objects
                ind = new Indexer(pathclose, isStem);
                p   = new Parser(pathopen + @"\stop_words.txt", isStem);
                r   = new ReadFile(pathopen + @"\corpus\");
                string dic;
                string cache;
                if (isStem)//check if stem
                {
                    dic   = pathclose + @"\CacheDic\dicStem.dicx";
                    cache = pathclose + @"\CacheDic\cacheStem.chex";
                }
                else
                {
                    dic   = pathclose + @"\CacheDic\dic.dicx";
                    cache = pathclose + @"\CacheDic\cache.chex";
                }
                try
                {
                    //load dic
                    using (FileStream fs = new FileStream(dic, FileMode.Open))
                    {
                        IFormatter bf = new BinaryFormatter();
                        ind.dic = (Dictionary <string, DicRecord>)bf.Deserialize(fs);//read object
                    }

                    //load cache
                    using (FileStream fs = new FileStream(cache, FileMode.Open))
                    {
                        IFormatter bf = new BinaryFormatter();
                        ind.cache = (Dictionary <string, List <PostingInfo> >)bf.Deserialize(fs);//read object
                    }
                }
                catch (IOException)
                {
                    //cant find load and cache files in currect folder
                    System.Windows.Forms.MessageBox.Show("Files Missing, can't Load", "ERROR!", MessageBoxButtons.OK, MessageBoxIcon.Error);
                    return;
                }
                //for vieiwing
                ind.writeTextChache();
                showcatch.IsEnabled = true;
                ind.writeTextDic();
                showDic.IsEnabled = true;

                //new ranker and load the dictionaries of the class if the file exists in the selected folder
                rank     = new Ranker(pathclose, p, r, ind, isStem);
                searcher = new Searcher(p, ind, rank, pathopen);
                //open the run btn
                runQuery.IsEnabled = true;
                //notify when finished
                System.Windows.Forms.MessageBox.Show("Ready To search!!", "Done!", MessageBoxButtons.OK, MessageBoxIcon.Information);
            }
        }