/// <summary> /// Provides the means to extract the text to be indexed from the file specified /// </summary> /// <param name="file"></param> /// <returns></returns> protected virtual string ExtractTextFromFile(FileInfo file) { if (!SupportedExtensions.Select(x => x.ToUpper()).Contains(file.Extension.ToUpper())) { throw new NotSupportedException("The file with the extension specified is not supported"); } var pdf = new PDFParserPdfBox(); Action <Exception> onError = (e) => OnIndexingError(new IndexingErrorEventArgs("Could not read PDF", -1, e)); var txt = pdf.GetTextFromAllPages(file.FullName, onError); return(txt); }
/// <summary> /// Provides the means to extract the text to be indexed from the file specified /// </summary> /// <param name="file"></param> /// <returns></returns> protected virtual string ExtractTextFromFile(FileInfo file) { if (!SupportedExtensions.Select(x => x.ToUpper()).Contains(file.Extension.ToUpper())) { throw new NotSupportedException("The file with the extension specified is not supported"); } var pdf = new PDFParserPdfBox(); Action<Exception> onError = (e) => OnIndexingError(new IndexingErrorEventArgs("Could not read PDF", -1, e)); var txt = pdf.GetTextFromAllPages(file.FullName, onError); return txt; }