public void PdfToTextConverterConstructorTest() { var infile = Input; var target = new PdfToTextConverter(infile); Check.That(infile).IsEqualTo(target.Original); }
/// <summary> /// Returns a Task<string> which when awaited yields all of the text in the PdfFile. /// </summary> /// <returns>A Task<string> which when awaited yields all of the text in the PdfFile.</returns> public override async Task <string> LoadTextAsync() { var converter = new PdfToTextConverter(this); var converted = await converter.ConvertFileAsync().ConfigureAwait(false); return(await converted.LoadTextAsync().ConfigureAwait(false)); }
public void LoadTextTest() { var target = new PdfFile(TestPdfFilePath); var expected = new PdfToTextConverter(target).ConvertFile().LoadText(); var actual = target.LoadText(); Check.That(actual).IsEqualTo(expected); }
public void ConvertFileAsyncTest() { var infile = Input; var target = new PdfToTextConverter(infile); var expected = infile.LoadText(); string actual; actual = target.ConvertFileAsync().Result.LoadText(); Check.That(actual).IsEqualTo(expected); }
public void Execute(IndexFieldEvent e) { var umbracoFileName = e.UmbracoProperty.Value != null?e.UmbracoProperty.Value.ToString() : string.Empty; var docPath = ConfigurationManager.AppSettings["Dexter:DocumentPath"]; var filePath = string.IsNullOrWhiteSpace(docPath) ? umbracoFileName : umbracoFileName.Replace("~/media", ConfigurationManager.AppSettings["Dexter:DocumentPath"]) .Replace("/media", ConfigurationManager.AppSettings["Dexter:DocumentPath"]); var text = string.Empty; switch (System.IO.Path.GetExtension(filePath)) { case ".pdf": text = new PdfToTextConverter().Convert(filePath); break; case ".doc": text = new DocToTextConverter().Convert(filePath); break; case ".xls": text = new XlsToTextConverter().Convert(filePath); break; case ".docx": text = new DocxToTextConverter().Convert(filePath); break; case ".xlsx": text = new XlsxToTextConverter().Convert(filePath); break; case ".pptx": text = new PptxToTextConverter().Convert(filePath); break; case ".ppt": text = System.IO.Path.GetFileName(filePath); break; case ".zip": text = new ZipToTextConverter().Convert(filePath); break; } e.Value = string.Join(" ", text.Split(new[] { ' ' }).Except(IGNORE)); }
/// <summary> /// Parses the contents of a raw, untagged PdfFile into a new Document instance. /// </summary> /// <param name="pdf">The raw, untagged PdfFile to parse.</param> /// <returns> /// The contents of the PdfFile composed into a fully reified /// <see cref="Document"/> instance. /// </returns> public Document DocumentFromPdf(PdfFile pdf) { var txt = new PdfToTextConverter(pdf).ConvertFile(); return(new TaggedSourceParser(new TaggedFile(new SharpNLPTagger(this.TaggerMode, txt.FullPath).ProcessFile())).LoadDocument(txt.NameSansExt)); }