public HashSet<Document> ProcessQuery(string query) { var tokenizer = new PhraseDocumentTokenizer(new PartOfSpeechTagger()); var phrases = tokenizer.Tokenize(query); HashSet<Document> currentSet = null; foreach (var phrase in phrases) { var newSet = _inverceIndex.GetDocumentSet(phrase); HashSetHelper.AddToSet(newSet, Operator.AND, ref currentSet); } return currentSet ?? new HashSet<Document>(); }
private static void Main(string[] args) { var inputDirectory = args[0]; var documents = DocumentProvider.GetDocuments(inputDirectory); var tokenizer = new PhraseDocumentTokenizer(new PartOfSpeechTagger()); var inverseIndex = new InverceIndex<Phrase, Document>(tokenizer); inverseIndex.AddDocuments(documents); var searcher = new PhraseQueryExecutor(inverseIndex); while (true) { var input = Console.ReadLine(); if (!string.IsNullOrWhiteSpace(input)) { var resultDocs = searcher.ProcessQuery(input); Console.WriteLine(string.Join("\n", resultDocs.Select(d => d.FilePath))); } } }