示例#1
0
        public static void TestTokenSearcher(Index indexer, string searchstring, int documents)
        {
            var ts = new TokenSearcher(indexer);

            var tokens = Tokenizor.StringToToken(searchstring, indexer.Tokens).ToList();

            var searchres = ts.OrderByScore(tokens, ts.And(tokens), documents).OrderByDescending(x => x.Item1);

            Console.WriteLine("Found {0} results for \"{1}\":", searchres.Count(), searchstring);

            foreach (var t in searchres.Take(50))
            {
                Console.WriteLine("{0,10:N7}: {1}", t.Item1, t.Item2.Url);
            }
        }
示例#2
0
        static void Main(string[] args)
        {
            var websiteUrls = new List <Uri>();
            //websiteUrls.Add(@"https://en.wikipedia.org");
            //websiteUrls.Add(@"https://www.satai.dk");
            //websiteUrls.Add(@"https://www.google.dk");
            //websiteUrls.Add(@"http://stackoverflow.com");
            //websiteUrls.Add(@"https://news.ycombinator.com");
            //websiteUrls.Add(@"https://twitter.com");
            //websiteUrls.Add(@"http://www.mmo-champion.com");
            //websiteUrls.Add(@"http://www.imdb.com");
            //websiteUrls.Add(@"https://www.instagram.com");
            //websiteUrls.Add(@"https://www.youtube.com");



            var seedUrl = new List <Uri>
            {
                new Uri("http://dr.dk"),
                new Uri("https://en.wikipedia.org"),
                new Uri("https://news.ycombinator.com"),
                new Uri("http://www.mmo-champion.com"),
                new Uri("https://msdn.microsoft.com")
            };

            var crawler = SerializationHelper.RestoreCrawler(seedUrl, new List <Host>(), 1000);



            if (crawler.Limit >= crawler.SitesVisited.Count)
            {
                Task.Run(() => crawler.Run()).GetAwaiter().GetResult();
                SerializationHelper.SaveCrawler(crawler);
            }

            var indexer = new Index();

            var sw = Stopwatch.StartNew();

            foreach (var site in crawler.SitesVisited.Values)
            {
                Tokenizor.AddTokensToTokenList(site, indexer.Tokens);
            }
            Console.WriteLine("Indexing {0} documents took {1} ms ({2} pr. sec)", crawler.SitesVisited.Count, sw.ElapsedMilliseconds, (double)crawler.SitesVisited.Count / (sw.ElapsedMilliseconds / 1000));

            //SerializationHelper.SaveIndex(indexer);


            //TestTFIDF(indexer);
            //TestBooleanSearcher(indexer);
            //TestTokenSearcher(indexer);

            string s;

            do
            {
                Console.Write("Enter search Query: ");
                s = Console.ReadLine();
                if (s == null)
                {
                    continue;
                }

                TestTokenSearcher(indexer, s, crawler.SitesVisited.Count);
            } while (s != "");

            //Console.Write("Enter search Query: ");
            //while ((s = Console.ReadLine()) != "")
            //{
            //    if(s == null)
            //        continue;

            //    TestTokenSearcher(indexer, s);
            //    Console.Write("Enter search Query: ");
            //}
        }