public void CreateIndexFromDirectory() { string DirecotryPath = outputDirectoryName == null ? Path.GetDirectoryName(Assembly.GetEntryAssembly().Location) : Path.GetDirectoryName(outputDirectoryName); var directory = FSDirectory.Open(DirecotryPath); Analyzer analyzer = new TermAnalyzer(); IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); var files = FileHelper.GetFilesFromRelativePath(inputDirectoryName); writer.DeleteAll(); foreach (var fileInfo in files) { //var detailedPath = DirecotryPath + "\\" + fileInfo.Name; //var detailedDirectory = FSDirectory.Open(detailedPath); //IndexWriter detailedlWriter = new IndexWriter(detailedDirectory, analyzer, true, // IndexWriter.MaxFieldLength.UNLIMITED); Document doc = new Document(); Document detailedDocument = new Document(); var path = fileInfo.Directory + "\\" + fileInfo.Name; var reader = new StreamReader(path); doc.Add(new Field("content", reader, Field.TermVector.WITH_POSITIONS)); writer.AddDocument(doc); reader.Close(); reader = new StreamReader(path); detailedDocument.Add(new Field(fileInfo.Name, reader, Field.TermVector.WITH_POSITIONS)); //detailedlWriter.AddDocument(detailedDocument); //detailedlWriter.Optimize(); //detailedlWriter.Commit(); } writer.Optimize(); writer.Commit(); }
public List<ArticleInfo> SearchForString(string stringQuery) { var files = FileHelper.GetFilesFromRelativePath(inputDirectoryName); string DirecotryPath = outputDirectoryName == null ? Path.GetDirectoryName(Assembly.GetEntryAssembly().Location) : Path.GetDirectoryName(outputDirectoryName); var dir = FSDirectory.Open(DirecotryPath); IndexReader reader = DirectoryReader.Open(dir, true); IndexSearcher indexSearcher = new IndexSearcher(reader); Analyzer analyzer = new TermAnalyzer(); QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", analyzer); Query query = queryParser.Parse(stringQuery); Stopwatch searchStopwatch = new Stopwatch(); searchStopwatch.Start(); var hits = indexSearcher.Search(query, null, 100000); searchStopwatch.Stop(); this.LuceneTime = searchStopwatch.Elapsed; List<ArticleInfo> articles = new List<ArticleInfo>(); hits.ScoreDocs.ToList().ForEach(s => { var y = reader.GetTermFreqVector(s.Doc, "content"); ArticleInfo info = new ArticleInfo() { FileInfo = files[s.Doc], Rank = s.Score, //Terms = y.GetTerms().ToList() }; articles.Add(info); }); return articles; }