/// <summary> /// Provides the means to extract the text to be indexed from the file specified /// </summary> /// <param name="file"></param> /// <returns></returns> protected virtual string ExtractTextFromFile(FileInfo file) { if (!SupportedExtensions.Select(x => x.ToUpper()).Contains(file.Extension.ToUpper())) { throw new NotSupportedException("The file with the extension specified is not supported"); } var mediaParser = new MediaParser(); Action <Exception> onError = (e) => OnIndexingError(new IndexingErrorEventArgs("Could not read media item", -1, e)); var txt = mediaParser.ParseMediaText(file.FullName, onError, out _extractedMetaFromTika); return(txt); }
private ExtractionResult ExtractContentFromStream(Stream stream) { byte[] data; var metaData = new Dictionary <string, string>(); var extractionResult = new ExtractionResult(); Action <Exception> onError = (e) => OnIndexingError(new IndexingErrorEventArgs("Could not read media item", -1, e)); using (MemoryStream ms = new MemoryStream()) { stream.CopyTo(ms); data = ms.ToArray(); } if (data != null && data.Length > 0) { var mediaParser = new MediaParser(); extractionResult.ExtractedText = mediaParser.ParseMediaText(data, onError, out metaData); extractionResult.MetaData = metaData; } return(extractionResult); }