public void Index(string uri, string fileContent) { int postingId = uri.GetHashCode(); _postingMap.Add(postingId, uri); IEnumerable<string> unStemmedTokens = _tokenizer.Tokenize(fileContent); foreach (var token in unStemmedTokens) { Stemmer stemmer = new Stemmer(); stemmer.Stem(token); string stemmedToken = stemmer.ToString(); Term term; if (_terms.ContainsKey(stemmedToken)) { term = _terms[stemmedToken]; } else { term = new Term(stemmedToken); _terms.Add(stemmedToken, term); } term.AddPosting(postingId); } }
static void Main(string[] args) { Crawler crawler = new Crawler("Ressources\\Seeds.txt"); Console.WriteLine("Starting..."); crawler.Start(1000); Console.WriteLine("Finished crawling..."); Console.WriteLine("Press any key to start indexing..."); Console.ReadLine(); Store fileStore = new Store(); Indexer indexer = new Indexer(); Dictionary<string, string> filesMap = fileStore.LoadFileMap(); Console.WriteLine("Indexing started"); DateTime start = DateTime.Now; int filesIndexed = 0; foreach (var file in filesMap) { string fileContent = File.ReadAllText(Path.Combine(fileStore.OutputPath, file.Value)); indexer.Index(file.Key, fileContent); Console.SetCursorPosition(0, 8); Console.WriteLine("Pages indexed {0} / {1}", ++filesIndexed, filesMap.Count); } DateTime end = DateTime.Now; TimeSpan timeElapsed = end.Subtract(start); double pagesPerSec = filesMap.Count / timeElapsed.TotalSeconds; Console.WriteLine("Indexed {0} pages per second", pagesPerSec); Console.WriteLine("Indexing finished in {0}", timeElapsed.ToString(@"hh\:mm\:ss")); Ranker ranker = new Ranker(); Console.Write("The search engine ready for your query: "); string searchQuery = Console.ReadLine(); Tokenizer tokenizer = new Tokenizer(); List<string> queryTerms = tokenizer.TokenizeQuery(searchQuery).ToList(); for (int i = 0; i < queryTerms.Count(); i++) { Stemmer stemmer = new Stemmer(); stemmer.Stem(queryTerms[i]); queryTerms[i] = stemmer.ToString(); } IEnumerable<KeyValuePair<int, double>> scores = ranker.Rank(queryTerms, indexer.Terms, indexer.TotalDocuments); int resultCount = 1; foreach (var item in scores) { string url = indexer.GetUrlFromHash(item.Key); if (url == null) continue; Console.WriteLine("{0}: {1}", resultCount++, url); if (resultCount > 10) break; } PageRank pageRank = new PageRank(); float[] vector = pageRank.DoRank(); for (int i = 0; i < vector.Length; i++) { Console.Write(vector[i]); } Console.WriteLine(); Console.WriteLine("Press any key to exit..."); Console.ReadLine(); }