public async Task BuildIndex(string startPath) { await _logger.Debug("Indexing Directory: " + startPath); // Use TikaOnDotNet to extract the contents of the document. var textExtractor = new TextExtractor(); // Loop through all files in the passed in directory var directory = new DirectoryInfo(startPath); if (directory.Exists) { foreach (var file in directory.GetFiles()) { var contents = textExtractor.Extract(file.FullName); if (!string.IsNullOrWhiteSpace(contents.Text)) { await _logger.Debug("Indexing File " + file.FullName); var id = HashUtility.Hash(file.FullName); await _elasticAccess.IndexDocument(file.Name, file.FullName, contents.Text); } } foreach (var dir in directory.GetDirectories()) { await BuildIndex(dir.FullName); } } }
public async Task IndexDocument(string fileName, string filePath, string contents) { var id = HashUtility.Hash(filePath); var doc = new Doc() { Id = id, FileName = fileName, Path = filePath, Text = contents }; await CreateElasticClient().IndexDocumentAsync(doc); }