public static void Main(string[] args) { if (args.Length != 2) { Console.WriteLine("usage: Spimi <folderpath> <DestinationIndexFilePath>"); return; } string directory = args[0]; string indexFilePath = args[1]; SpimiIndexer indexer = new SpimiIndexer(new BasicLexer()); DirectoryInfo dir = new DirectoryInfo(directory); foreach(FileInfo file in dir.GetFiles().Where(f => f.Extension.Equals(".sgm"))) indexer.Index(file.FullName, file.Open(FileMode.Open)); using (FileStream indexFileStream = File.Open(indexFilePath, FileMode.CreateNew)) { indexer.CreateIndex(indexFileStream); FileIndex index = FileIndex.Open(indexFileStream); QueryEngine queryEngine = new QueryEngine(index); while (true) { Console.Write("> "); string query = Console.ReadLine(); foreach (string docId in queryEngine.Query(query.ToLower())) { Console.WriteLine(docId); } } } }
public void testQuery() { QueryEngine engine = new QueryEngine(index, metadata); IList<long> foundPostings = engine.Query("foo bar", RankingMode.TFIDF); IList<Posting> expectedPostings = postingsWithFoo.Union(postingsWithBar).ToList(); foreach (Posting posting in expectedPostings) { Assert.IsTrue(foundPostings.Contains(posting.DocumentId)); } }
public IList<QueryResult> Query(string query, RankingMode rankingMode) { using (FileStream indexFileStream = File.Open(indexFilePath, FileMode.Open)) { using (FileStream metadataFileStream = File.Open(metadataFilePath, FileMode.Open)) { IndexMetadata indexMetadata = new IndexMetadata(metadataFileStream); TermIndex index = new TermIndex(indexFileStream); QueryEngine queryEngine = new QueryEngine(index, indexMetadata); IList<long> results = queryEngine.Query(query.ToLower(), rankingMode); IList<QueryResult> queryResults = new List<QueryResult>(); int i = 1; Console.WriteLine("rank\tscore\ttitle"); foreach (long docId in results.Take(500)) { DocumentInfo docInfo; if (indexMetadata.TryGetDocumentInfo(docId, out docInfo)) { QueryResult res = new QueryResult() { Title = docInfo.Title, Uri = docInfo.Uri, Score = queryEngine.Scores[docId] }; queryResults.Add(res); } else { Console.WriteLine("Found document id in posting list that wasn't indexed in metadata: " + docId); } } return queryResults; } } }