コード例 #1
0
        /// <summary>
        /// Parses the contents of a raw, untagged DocFile into a new Document instance.
        /// </summary>
        /// <param name="doc">The raw, untagged DocFile to parse.</param>
        /// <returns>
        /// The contents of the DocFile composed into a fully reified
        /// <see cref="Document"/> instance.
        /// </returns>
        public Document DocumentFromDoc(DocFile doc)
        {
            var docx = new DocToDocXConverter(doc).ConvertFile() as DocXFile;
            var txt  = new DocxToTextConverter(docx).ConvertFile();

            return(new TaggedSourceParser(new TaggedFile(new SharpNLPTagger(this.TaggerMode, txt.FullPath).ProcessFile())).LoadDocument(txt.NameSansExt));
        }
コード例 #2
0
        /// <summary>
        /// Returns a Task&lt;string&gt; which when awaited yields all of the text in the DocXFile.
        /// </summary>
        /// <returns>A Task&lt;string&gt; which when awaited yields all of the text in the DocXFile.</returns>
        public override async Task <string> LoadTextAsync()
        {
            var converter = new DocxToTextConverter(this);
            var txtFile   = await converter.ConvertFileAsync().ConfigureAwait(false);

            return(await txtFile.LoadTextAsync().ConfigureAwait(false));
        }
コード例 #3
0
        public void ConvertFileTest()
        {
            var     target = new DocxToTextConverter(Input);
            TxtFile actual;

            actual = target.ConvertFile();
            Check.That(actual.FullPath).Satisfies(File.Exists);
        }
コード例 #4
0
        public async Task ConvertFileAsyncTest()
        {
            var     target = new DocxToTextConverter(Input);
            TxtFile actual;

            actual = await target.ConvertFileAsync();

            Check.That(FileInfo(actual.FullPath)).Satisfies(x => x.Exists);
        }
コード例 #5
0
        public void Execute(IndexFieldEvent e)
        {
            var umbracoFileName = e.UmbracoProperty.Value != null?e.UmbracoProperty.Value.ToString() : string.Empty;

            var docPath  = ConfigurationManager.AppSettings["Dexter:DocumentPath"];
            var filePath = string.IsNullOrWhiteSpace(docPath)
                ? umbracoFileName
                : umbracoFileName.Replace("~/media", ConfigurationManager.AppSettings["Dexter:DocumentPath"])
                           .Replace("/media", ConfigurationManager.AppSettings["Dexter:DocumentPath"]);

            var text = string.Empty;

            switch (System.IO.Path.GetExtension(filePath))
            {
            case ".pdf":
                text = new PdfToTextConverter().Convert(filePath);
                break;

            case ".doc":
                text = new DocToTextConverter().Convert(filePath);
                break;

            case ".xls":
                text = new XlsToTextConverter().Convert(filePath);
                break;

            case ".docx":
                text = new DocxToTextConverter().Convert(filePath);
                break;

            case ".xlsx":
                text = new XlsxToTextConverter().Convert(filePath);
                break;

            case ".pptx":
                text = new PptxToTextConverter().Convert(filePath);
                break;

            case ".ppt":
                text = System.IO.Path.GetFileName(filePath);
                break;

            case ".zip":
                text = new ZipToTextConverter().Convert(filePath);
                break;
            }

            e.Value = string.Join(" ", text.Split(new[] { ' ' }).Except(IGNORE));
        }
コード例 #6
0
        /// <summary>
        /// Returns a single string containing all of the text in the DocXFile.
        /// </summary>
        /// <returns>A string containing all of the text in the DocXFile.</returns>
        public override string LoadText()
        {
            var converter = new DocxToTextConverter(this);

            return(converter.ConvertFile().LoadText());
        }
コード例 #7
0
        public void DocxToTextConverterConstructorTest()
        {
            var target = new DocxToTextConverter(Input);

            Check.That(target.Original.FullPath).IsEqualTo(Input.FullPath);
        }