コード例 #1
0
        public void PdfToTextConverterConstructorTest()
        {
            var infile = Input;
            var target = new PdfToTextConverter(infile);

            Check.That(infile).IsEqualTo(target.Original);
        }
コード例 #2
0
        /// <summary>
        /// Returns a Task&lt;string&gt; which when awaited yields all of the text in the PdfFile.
        /// </summary>
        /// <returns>A Task&lt;string&gt; which when awaited yields all of the text in the PdfFile.</returns>
        public override async Task <string> LoadTextAsync()
        {
            var converter = new PdfToTextConverter(this);
            var converted = await converter.ConvertFileAsync().ConfigureAwait(false);

            return(await converted.LoadTextAsync().ConfigureAwait(false));
        }
コード例 #3
0
        public void LoadTextTest()
        {
            var target   = new PdfFile(TestPdfFilePath);
            var expected = new PdfToTextConverter(target).ConvertFile().LoadText();
            var actual   = target.LoadText();

            Check.That(actual).IsEqualTo(expected);
        }
コード例 #4
0
        public void ConvertFileAsyncTest()
        {
            var    infile   = Input;
            var    target   = new PdfToTextConverter(infile);
            var    expected = infile.LoadText();
            string actual;

            actual = target.ConvertFileAsync().Result.LoadText();
            Check.That(actual).IsEqualTo(expected);
        }
コード例 #5
0
        public void Execute(IndexFieldEvent e)
        {
            var umbracoFileName = e.UmbracoProperty.Value != null?e.UmbracoProperty.Value.ToString() : string.Empty;

            var docPath  = ConfigurationManager.AppSettings["Dexter:DocumentPath"];
            var filePath = string.IsNullOrWhiteSpace(docPath)
                ? umbracoFileName
                : umbracoFileName.Replace("~/media", ConfigurationManager.AppSettings["Dexter:DocumentPath"])
                           .Replace("/media", ConfigurationManager.AppSettings["Dexter:DocumentPath"]);

            var text = string.Empty;

            switch (System.IO.Path.GetExtension(filePath))
            {
            case ".pdf":
                text = new PdfToTextConverter().Convert(filePath);
                break;

            case ".doc":
                text = new DocToTextConverter().Convert(filePath);
                break;

            case ".xls":
                text = new XlsToTextConverter().Convert(filePath);
                break;

            case ".docx":
                text = new DocxToTextConverter().Convert(filePath);
                break;

            case ".xlsx":
                text = new XlsxToTextConverter().Convert(filePath);
                break;

            case ".pptx":
                text = new PptxToTextConverter().Convert(filePath);
                break;

            case ".ppt":
                text = System.IO.Path.GetFileName(filePath);
                break;

            case ".zip":
                text = new ZipToTextConverter().Convert(filePath);
                break;
            }

            e.Value = string.Join(" ", text.Split(new[] { ' ' }).Except(IGNORE));
        }
コード例 #6
0
        /// <summary>
        /// Parses the contents of a raw, untagged PdfFile into a new Document instance.
        /// </summary>
        /// <param name="pdf">The raw, untagged PdfFile to parse.</param>
        /// <returns>
        /// The contents of the PdfFile composed into a fully reified
        /// <see cref="Document"/> instance.
        /// </returns>
        public Document DocumentFromPdf(PdfFile pdf)
        {
            var txt = new PdfToTextConverter(pdf).ConvertFile();

            return(new TaggedSourceParser(new TaggedFile(new SharpNLPTagger(this.TaggerMode, txt.FullPath).ProcessFile())).LoadDocument(txt.NameSansExt));
        }