public static POCO.DocumentText ExtractText(IFormFile upload)
        {
            POCO.DocumentText text = new POCO.DocumentText();

            // Open the presentation as read-only.
            using (PresentationDocument presentationDocument = PresentationDocument.Open(upload.OpenReadStream(), false))
            {
                // Check for a null document object.
                if (presentationDocument == null)
                {
                    throw new ArgumentNullException("presentationDocument");
                }

                int numSlides = CountSlides(presentationDocument);

                for (int i = 0; i < numSlides; i++)
                {
                    // Get the text for the slide
                    string slideText = GetSlideIdAndText(presentationDocument, i);

                    // Create a new OfficePart object
                    POCO.DocumentPart slide = new POCO.DocumentPart();
                    slide.partnumber = i;
                    slide.body       = slideText;
                    slide.header     = string.Empty;
                    slide.footer     = string.Empty;

                    // Add to our parts collection
                    text.parts.Add(slide);
                }
            }

            return(text);
        }
Beispiel #2
0
        private static DocumentText ProcessFile(MemoryStream memstream)
        {
            int runCounter = 0;

            POCO.DocumentText text = new POCO.DocumentText();

            try

            {
                // Open the document as read-only.
                using (WordprocessingDocument wordDocument = WordprocessingDocument.Open(memstream, false))
                {
                    // Check for a null document object.
                    if (wordDocument == null)
                    {
                        throw new ArgumentNullException("wordDocument");
                    }

                    int paraCounter = 0;
                    foreach (var paragraph in wordDocument.MainDocumentPart.RootElement.Descendants <Paragraph>())
                    {
                        paraCounter++;

                        string paraText = string.Empty;
                        foreach (var run in paragraph.Elements <Run>())
                        {
                            runCounter++;

                            foreach (var texttype in run.Elements <TextType>())
                            {
                                paraText += texttype.Text;
                            }
                            //string textContent = run.Elements<DocumentFormat.OpenXml.Wordprocessing.TextType>().Aggregate("", (s, t) => s + t.Text);
                        }

                        // Check if any text was found
                        if (paraText != string.Empty)
                        {
                            POCO.DocumentPart part = new POCO.DocumentPart();
                            part.body       = paraText;
                            part.partnumber = paraCounter;
                            text.parts.Add(part);
                        }
                    }
                }
            }
            catch (OpenXmlPackageException packageEx)
            {
                if (packageEx.ToString().Contains("Invalid Hyperlink"))
                {
                    MemoryStream fixedMemStream = new MemoryStream();
                    fixedMemStream = Castlepoint.Text.FileHandlers.Utils.FixInvalidUri(memstream);
                    text           = ProcessFile(fixedMemStream);
                }
            }

            return(text);
        }
Beispiel #3
0
        public static POCO.DocumentText ExtractText(IFormFile upload, ILogger logger)
        {
            POCO.DocumentText text = new POCO.DocumentText();

            try
            {
                // Read the bytes from the stream
                System.IO.MemoryStream memstream = new System.IO.MemoryStream();
                upload.OpenReadStream().CopyTo(memstream);

                text = ProcessFile(memstream);



                return(text);
            }
            catch (Exception exWordExtractText)
            {
                logger.LogError("Word Extract Text: " + exWordExtractText.Message);
                throw;
            }
        }
Beispiel #4
0
        private static DocumentText ProcessFile2(MemoryStream memstream, string fileName, ILogger logger)
        {
            int runCounter = 0;

            POCO.DocumentText text = new POCO.DocumentText();

            // Auto-detect format, supports:
            //  - Binary Excel files (2.0-2003 format; *.xls)
            //  - OpenXml Excel files (2007 format; *.xlsx)
            using (var reader = ExcelReaderFactory.CreateReader(memstream))
            {
                var result = reader.AsDataSet();
                logger.LogInformation("ProcessFile: tables=" + result.Tables.Count.ToString() + " filename=" + fileName);

                int sheetCounter = 0;
                foreach (System.Data.DataTable table in result.Tables)
                {
                    sheetCounter++;
                    logger.LogInformation("ProcessFile: processing sheet#" + sheetCounter.ToString() + " filename=" + fileName);

                    StringBuilder sbSheet = new StringBuilder();

                    foreach (System.Data.DataRow row in table.Rows)
                    {
                        // Append each row of data as tab-separated
                        sbSheet.Append(string.Join("\t", row.ItemArray));
                        sbSheet.AppendLine();
                    }

                    // Add this sheet as a new part
                    POCO.DocumentPart part = new POCO.DocumentPart();
                    part.body       = sbSheet.ToString();
                    part.partnumber = sheetCounter;
                    text.parts.Add(part);
                }
            }

            return(text);
        }
Beispiel #5
0
        public static POCO.DocumentText GetCommentsFromDocument(WordprocessingDocument wordDocument)
        {
            POCO.DocumentText text = new POCO.DocumentText();

            WordprocessingCommentsPart commentsPart =
                wordDocument.MainDocumentPart.WordprocessingCommentsPart;

            int counterComments = 0;

            if (commentsPart != null && commentsPart.Comments != null)
            {
                foreach (Comment comment in commentsPart.Comments.Elements <Comment>())
                {
                    counterComments++;
                    POCO.DocumentPart part = new POCO.DocumentPart();
                    part.body       = comment.InnerText;
                    part.partnumber = counterComments;
                    text.parts.Add(part);
                }
            }

            return(text);
        }
Beispiel #6
0
        public async Task <IActionResult> Post([FromForm] UploadFile upload)
        {
            try
            {
                // Validate the upload
                if (upload.file == null ||
                    upload.filename == null ||
                    upload.mimetype == null)
                {
                    return(new BadRequestResult());
                }

                long fileLength = upload.file.Length;
                if (fileLength > 50000000)
                {
                    return(new BadRequestResult());
                }

                POCO.DocumentText text = new POCO.DocumentText();

                Stopwatch st = new Stopwatch();

                // Check the mimetype
                switch (upload.mimetype.ToLower().Trim())
                {
                case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                case "application/vnd.openxmlformats-officedocument.wordprocessingml.template":
                    // DOCX
                    st.Start();
                    text = Word.ExtractText(upload.file, _logger);
                    st.Stop();
                    _logger.LogInformation("TEXT [" + st.ElapsedMilliseconds + "ms] DOCX: " + upload.filename);
                    break;

                case "application/vnd.openxmlformats-officedocument.presentationml.presentation":
                case "application/vnd.openxmlformats-officedocument.presentationml.template":
                case "application/vnd.openxmlformats-officedocument.presentationml.slideshow":
                    // PPTX
                    st.Start();
                    text = PowerPoint.ExtractText(upload.file);
                    st.Stop();
                    _logger.LogInformation("TEXT [" + st.ElapsedMilliseconds + "ms] PPTX: " + upload.filename);
                    break;

                case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
                case "application/vnd.openxmlformats-officedocument.spreadsheetml.template":
                    // XLSX
                    st.Start();
                    text = Excel.ExtractText(upload.file, _logger);
                    st.Stop();
                    _logger.LogInformation("TEXT [" + st.ElapsedMilliseconds + "ms] XLSX: " + upload.filename);
                    break;

                default:
                    return(new BadRequestResult());
                }

                // Serialize the extract result
                string jsonText = JsonConvert.SerializeObject(text);

                ObjectResult result = new ObjectResult(jsonText);
                return(result);
            }
            catch (Exception exPost)
            {
                _logger.LogError("ExtractTextController: " + exPost.Message);
                return(StatusCode((int)System.Net.HttpStatusCode.InternalServerError));
            }
        }