public void Index(string contentPath, List <string> foldersToExclude) { if (foldersToExclude == null) { throw new ArgumentException("Must not have null collection of folders", "foldersToExclude"); } var indexDirectory = new SimpleFSDirectory(new DirectoryInfo(_configuration.IndexPath)); Log(string.Format("Begining to index {0}. Index location: {1}", contentPath, indexDirectory.Directory.FullName)); var stopWatch = new Stopwatch(); stopWatch.Start(); var analyzer = AnalyzerBuilder.CreateAnalyzer(); using (var writer = new IndexWriter(indexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED)) { IndexDirectory(writer, new DirectoryInfo(contentPath), foldersToExclude); } stopWatch.Stop(); Log(string.Format("Indexed {0:N0} files in {1:00}:{2:00}.{3:00}", _fileCount, stopWatch.Elapsed.Hours, stopWatch.Elapsed.Minutes, stopWatch.Elapsed.Seconds)); }
public ReadOnlyHitCollection Search(string searchExpression, Model.Filter filter = null, string sortField = null, int page = 1, int hitsPerPage = 10) { if (string.IsNullOrEmpty(searchExpression)) { throw new SearchException("Must have searchExpression"); } string defaultFieldName = Fields.Content; var analyzer = AnalyzerBuilder.CreateAnalyzer(); Query query; try { query = new QueryParser(Version.LUCENE_30, defaultFieldName, analyzer).Parse(searchExpression.ToLower()); } catch (ParseException ex) { throw new SearchException(string.Format("Sorry, '{0}' isn't something we can search for so far.", searchExpression), ex); } var indexDirectory = new SimpleFSDirectory(new DirectoryInfo(_configuration.IndexPath)); List <Hit> onePageOfHits; int totalHits; using (var reader = IndexReader.Open(indexDirectory, true)) { // Get one page of hits var hits = new List <Hit>(); var searcher = new IndexSearcher(reader); var termsFilter = filter != null && !string.IsNullOrEmpty(filter.Field) ? new FieldCacheTermsFilter(filter.Field, filter.Terms.ToArray()) : null; var sort = !string.IsNullOrEmpty(sortField) ? new Sort(new SortField(sortField, SortField.STRING)) : Sort.RELEVANCE; ScoreDoc[] scoreDocs = searcher.Search(query, termsFilter, MaxNumberOfHits, sort).ScoreDocs; totalHits = scoreDocs.Length; foreach (var scoreDoc in scoreDocs) { int docId = scoreDoc.Doc; string filePath = searcher.Doc(docId).Get(Fields.Path); string language = searcher.Doc(docId).Get(Fields.Language); var hit = new Hit(docId, _configuration.ContentRootPath, filePath, scoreDoc.Score, language); hits.Add(hit); } onePageOfHits = hits.GetPage(page, hitsPerPage).ToList(); // Get offsets and higlights on the page we are going to return foreach (var hit in onePageOfHits) { var primitiveQuery = query.Rewrite(reader); var terms = new HashSet <Term>(); primitiveQuery.ExtractTerms(terms); string searchField = string.Empty; if (terms.Count == 0) { // There can be all kinds of queires var prefixQuery = query as PrefixQuery; if (prefixQuery != null) { searchField = prefixQuery.Prefix.Field; primitiveQuery = prefixQuery; } } else { // TODO: There can be multiple term fields, like code: and method: searchField = terms.First().Field; } var termFreqVector = reader.GetTermFreqVector(hit.DocId, Fields.Content); var termPositionVector = termFreqVector as TermPositionVector; if (termFreqVector == null || termPositionVector == null) { throw new ArgumentException("Must have term frequencies and positions vectors"); } // No offsets for prefix and other non-term based queries const int maxOffsetNumber = 10; foreach (var term in terms) { int termIndex = termFreqVector.IndexOf(term.Text); // Meaning get me this term, not text yet. if (termIndex != -1) { foreach (var offset in termPositionVector.GetOffsets(termIndex)) { if (hit.Offsets.Count < maxOffsetNumber) { hit.Offsets.Add(new Offset { StartOffset = offset.StartOffset, EndOffset = offset.EndOffset }); } } } } // Highlighter from contrib package var tokenStream = TokenSources.GetTokenStream(termPositionVector); var scorer = new QueryScorer(primitiveQuery, searchField); var fragmenter = new SimpleSpanFragmenter(scorer); var formatter = new SimpleHTMLFormatter("<kbd>", "</kbd>"); var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = fragmenter }; string text; using (var sr = new StreamReader(hit.FilePath)) { text = sr.ReadToEnd(); } string bestFragment = highlighter.GetBestFragment(tokenStream, text); if (!string.IsNullOrEmpty(bestFragment)) { hit.BestFragment = EscapeHtmlMarkup(bestFragment); } } } return(new ReadOnlyHitCollection(onePageOfHits, totalHits)); }