public static POCO.DocumentText ExtractText(IFormFile upload) { POCO.DocumentText text = new POCO.DocumentText(); // Open the presentation as read-only. using (PresentationDocument presentationDocument = PresentationDocument.Open(upload.OpenReadStream(), false)) { // Check for a null document object. if (presentationDocument == null) { throw new ArgumentNullException("presentationDocument"); } int numSlides = CountSlides(presentationDocument); for (int i = 0; i < numSlides; i++) { // Get the text for the slide string slideText = GetSlideIdAndText(presentationDocument, i); // Create a new OfficePart object POCO.DocumentPart slide = new POCO.DocumentPart(); slide.partnumber = i; slide.body = slideText; slide.header = string.Empty; slide.footer = string.Empty; // Add to our parts collection text.parts.Add(slide); } } return(text); }
private static DocumentText ProcessFile(MemoryStream memstream) { int runCounter = 0; POCO.DocumentText text = new POCO.DocumentText(); try { // Open the document as read-only. using (WordprocessingDocument wordDocument = WordprocessingDocument.Open(memstream, false)) { // Check for a null document object. if (wordDocument == null) { throw new ArgumentNullException("wordDocument"); } int paraCounter = 0; foreach (var paragraph in wordDocument.MainDocumentPart.RootElement.Descendants <Paragraph>()) { paraCounter++; string paraText = string.Empty; foreach (var run in paragraph.Elements <Run>()) { runCounter++; foreach (var texttype in run.Elements <TextType>()) { paraText += texttype.Text; } //string textContent = run.Elements<DocumentFormat.OpenXml.Wordprocessing.TextType>().Aggregate("", (s, t) => s + t.Text); } // Check if any text was found if (paraText != string.Empty) { POCO.DocumentPart part = new POCO.DocumentPart(); part.body = paraText; part.partnumber = paraCounter; text.parts.Add(part); } } } } catch (OpenXmlPackageException packageEx) { if (packageEx.ToString().Contains("Invalid Hyperlink")) { MemoryStream fixedMemStream = new MemoryStream(); fixedMemStream = Castlepoint.Text.FileHandlers.Utils.FixInvalidUri(memstream); text = ProcessFile(fixedMemStream); } } return(text); }
private static DocumentText ProcessFile2(MemoryStream memstream, string fileName, ILogger logger) { int runCounter = 0; POCO.DocumentText text = new POCO.DocumentText(); // Auto-detect format, supports: // - Binary Excel files (2.0-2003 format; *.xls) // - OpenXml Excel files (2007 format; *.xlsx) using (var reader = ExcelReaderFactory.CreateReader(memstream)) { var result = reader.AsDataSet(); logger.LogInformation("ProcessFile: tables=" + result.Tables.Count.ToString() + " filename=" + fileName); int sheetCounter = 0; foreach (System.Data.DataTable table in result.Tables) { sheetCounter++; logger.LogInformation("ProcessFile: processing sheet#" + sheetCounter.ToString() + " filename=" + fileName); StringBuilder sbSheet = new StringBuilder(); foreach (System.Data.DataRow row in table.Rows) { // Append each row of data as tab-separated sbSheet.Append(string.Join("\t", row.ItemArray)); sbSheet.AppendLine(); } // Add this sheet as a new part POCO.DocumentPart part = new POCO.DocumentPart(); part.body = sbSheet.ToString(); part.partnumber = sheetCounter; text.parts.Add(part); } } return(text); }
public static POCO.DocumentText GetCommentsFromDocument(WordprocessingDocument wordDocument) { POCO.DocumentText text = new POCO.DocumentText(); WordprocessingCommentsPart commentsPart = wordDocument.MainDocumentPart.WordprocessingCommentsPart; int counterComments = 0; if (commentsPart != null && commentsPart.Comments != null) { foreach (Comment comment in commentsPart.Comments.Elements <Comment>()) { counterComments++; POCO.DocumentPart part = new POCO.DocumentPart(); part.body = comment.InnerText; part.partnumber = counterComments; text.parts.Add(part); } } return(text); }