public HashSet<Document> ProcessQuery(string query)
        {
            var tokenizer = new PhraseDocumentTokenizer(new PartOfSpeechTagger());
            var phrases = tokenizer.Tokenize(query);

            HashSet<Document> currentSet = null;

            foreach (var phrase in phrases)
            {
                var newSet = _inverceIndex.GetDocumentSet(phrase);
                HashSetHelper.AddToSet(newSet, Operator.AND, ref currentSet);
            }

            return currentSet ?? new HashSet<Document>();
        }
Пример #2
0
        private static void Main(string[] args)
        {
            var inputDirectory = args[0];

            var documents = DocumentProvider.GetDocuments(inputDirectory);

            var tokenizer = new PhraseDocumentTokenizer(new PartOfSpeechTagger());

            var inverseIndex = new InverceIndex<Phrase, Document>(tokenizer);

            inverseIndex.AddDocuments(documents);

            var searcher = new PhraseQueryExecutor(inverseIndex);

            while (true)
            {
                var input = Console.ReadLine();
                if (!string.IsNullOrWhiteSpace(input))
                {
                    var resultDocs = searcher.ProcessQuery(input);
                    Console.WriteLine(string.Join("\n", resultDocs.Select(d => d.FilePath)));
                }
            }
        }