public static POCO.DocumentText ExtractText(IFormFile upload) { POCO.DocumentText text = new POCO.DocumentText(); // Open the presentation as read-only. using (PresentationDocument presentationDocument = PresentationDocument.Open(upload.OpenReadStream(), false)) { // Check for a null document object. if (presentationDocument == null) { throw new ArgumentNullException("presentationDocument"); } int numSlides = CountSlides(presentationDocument); for (int i = 0; i < numSlides; i++) { // Get the text for the slide string slideText = GetSlideIdAndText(presentationDocument, i); // Create a new OfficePart object POCO.DocumentPart slide = new POCO.DocumentPart(); slide.partnumber = i; slide.body = slideText; slide.header = string.Empty; slide.footer = string.Empty; // Add to our parts collection text.parts.Add(slide); } } return(text); }
private static DocumentText ProcessFile(MemoryStream memstream) { int runCounter = 0; POCO.DocumentText text = new POCO.DocumentText(); try { // Open the document as read-only. using (WordprocessingDocument wordDocument = WordprocessingDocument.Open(memstream, false)) { // Check for a null document object. if (wordDocument == null) { throw new ArgumentNullException("wordDocument"); } int paraCounter = 0; foreach (var paragraph in wordDocument.MainDocumentPart.RootElement.Descendants <Paragraph>()) { paraCounter++; string paraText = string.Empty; foreach (var run in paragraph.Elements <Run>()) { runCounter++; foreach (var texttype in run.Elements <TextType>()) { paraText += texttype.Text; } //string textContent = run.Elements<DocumentFormat.OpenXml.Wordprocessing.TextType>().Aggregate("", (s, t) => s + t.Text); } // Check if any text was found if (paraText != string.Empty) { POCO.DocumentPart part = new POCO.DocumentPart(); part.body = paraText; part.partnumber = paraCounter; text.parts.Add(part); } } } } catch (OpenXmlPackageException packageEx) { if (packageEx.ToString().Contains("Invalid Hyperlink")) { MemoryStream fixedMemStream = new MemoryStream(); fixedMemStream = Castlepoint.Text.FileHandlers.Utils.FixInvalidUri(memstream); text = ProcessFile(fixedMemStream); } } return(text); }
public static POCO.DocumentText ExtractText(IFormFile upload, ILogger logger) { POCO.DocumentText text = new POCO.DocumentText(); try { // Read the bytes from the stream System.IO.MemoryStream memstream = new System.IO.MemoryStream(); upload.OpenReadStream().CopyTo(memstream); text = ProcessFile(memstream); return(text); } catch (Exception exWordExtractText) { logger.LogError("Word Extract Text: " + exWordExtractText.Message); throw; } }
private static DocumentText ProcessFile2(MemoryStream memstream, string fileName, ILogger logger) { int runCounter = 0; POCO.DocumentText text = new POCO.DocumentText(); // Auto-detect format, supports: // - Binary Excel files (2.0-2003 format; *.xls) // - OpenXml Excel files (2007 format; *.xlsx) using (var reader = ExcelReaderFactory.CreateReader(memstream)) { var result = reader.AsDataSet(); logger.LogInformation("ProcessFile: tables=" + result.Tables.Count.ToString() + " filename=" + fileName); int sheetCounter = 0; foreach (System.Data.DataTable table in result.Tables) { sheetCounter++; logger.LogInformation("ProcessFile: processing sheet#" + sheetCounter.ToString() + " filename=" + fileName); StringBuilder sbSheet = new StringBuilder(); foreach (System.Data.DataRow row in table.Rows) { // Append each row of data as tab-separated sbSheet.Append(string.Join("\t", row.ItemArray)); sbSheet.AppendLine(); } // Add this sheet as a new part POCO.DocumentPart part = new POCO.DocumentPart(); part.body = sbSheet.ToString(); part.partnumber = sheetCounter; text.parts.Add(part); } } return(text); }
public static POCO.DocumentText GetCommentsFromDocument(WordprocessingDocument wordDocument) { POCO.DocumentText text = new POCO.DocumentText(); WordprocessingCommentsPart commentsPart = wordDocument.MainDocumentPart.WordprocessingCommentsPart; int counterComments = 0; if (commentsPart != null && commentsPart.Comments != null) { foreach (Comment comment in commentsPart.Comments.Elements <Comment>()) { counterComments++; POCO.DocumentPart part = new POCO.DocumentPart(); part.body = comment.InnerText; part.partnumber = counterComments; text.parts.Add(part); } } return(text); }
public async Task <IActionResult> Post([FromForm] UploadFile upload) { try { // Validate the upload if (upload.file == null || upload.filename == null || upload.mimetype == null) { return(new BadRequestResult()); } long fileLength = upload.file.Length; if (fileLength > 50000000) { return(new BadRequestResult()); } POCO.DocumentText text = new POCO.DocumentText(); Stopwatch st = new Stopwatch(); // Check the mimetype switch (upload.mimetype.ToLower().Trim()) { case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": case "application/vnd.openxmlformats-officedocument.wordprocessingml.template": // DOCX st.Start(); text = Word.ExtractText(upload.file, _logger); st.Stop(); _logger.LogInformation("TEXT [" + st.ElapsedMilliseconds + "ms] DOCX: " + upload.filename); break; case "application/vnd.openxmlformats-officedocument.presentationml.presentation": case "application/vnd.openxmlformats-officedocument.presentationml.template": case "application/vnd.openxmlformats-officedocument.presentationml.slideshow": // PPTX st.Start(); text = PowerPoint.ExtractText(upload.file); st.Stop(); _logger.LogInformation("TEXT [" + st.ElapsedMilliseconds + "ms] PPTX: " + upload.filename); break; case "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": case "application/vnd.openxmlformats-officedocument.spreadsheetml.template": // XLSX st.Start(); text = Excel.ExtractText(upload.file, _logger); st.Stop(); _logger.LogInformation("TEXT [" + st.ElapsedMilliseconds + "ms] XLSX: " + upload.filename); break; default: return(new BadRequestResult()); } // Serialize the extract result string jsonText = JsonConvert.SerializeObject(text); ObjectResult result = new ObjectResult(jsonText); return(result); } catch (Exception exPost) { _logger.LogError("ExtractTextController: " + exPost.Message); return(StatusCode((int)System.Net.HttpStatusCode.InternalServerError)); } }