private void btnExport_Click(object sender, EventArgs e) { lblMessage.Enabled = false; lblMessage.Text = "Parsing.."; var result = this.folderBrowser.ShowDialog(this); if (result != DialogResult.OK) { return; } var folderPath = FileIO.GetPath(folderBrowser.SelectedPath); var ext = new List <string> { ".ppt", ".pptx" }; var presos = Directory.GetFiles(folderPath, "*.*", SearchOption.TopDirectoryOnly) .Where(s => ext.Any(x => s.EndsWith(x))).ToList(); var pdfext = new List <string> { ".pdf" }; var pdfs = Directory.GetFiles(folderPath, "*.*", SearchOption.TopDirectoryOnly) .Where(s => pdfext.Any(x => s.EndsWith(x))).ToList(); if (presos.Count() == 0 && pdfs.Count() == 0) { lblMessage.Text = "no presentations or Pdfs found"; return; } var tempDir = Directory.CreateDirectory(folderPath + @"\_temp"); Microsoft.Office.Interop.PowerPoint.Application PowerPoint_App = null; if (presos.Count() != 0) { try { PowerPoint_App = PowerPointEx.ConvertToPPTX(presos, tempDir.FullName); } catch (Exception ex) { MessageBox.Show("Error initializing, close Powerpoint if it is open"); return; } } Microsoft.Office.Interop.PowerPoint.Presentations multi_presentations = PowerPoint_App.Presentations; string outFile = ""; foreach (var preso in presos) { Microsoft.Office.Interop.PowerPoint.Presentation presentation = multi_presentations.Open(preso, MsoTriState.msoFalse, MsoTriState.msoFalse, MsoTriState.msoFalse); var pptName = Path.GetFileName(preso); var underscoreIndex = pptName.IndexOf('_'); var newPptName = pptName.Remove(0, underscoreIndex + 1); if (string.IsNullOrEmpty(outFile)) { outFile = Path.GetFileNameWithoutExtension(newPptName); outFile += ".docx"; var outDir = Directory.CreateDirectory(folderPath + @"\_output"); _fullFilePath = Path.Combine(outDir.FullName, outFile); WordEx.CreateDoc(_fullFilePath); } WordEx.AddTitle(newPptName); foreach (Microsoft.Office.Interop.PowerPoint.Slide slide in presentation.Slides) { bool firstLine = true; //ParseWithOpenXML(slideId,preso); string pptx = Path.GetFileNameWithoutExtension(preso) + ".pptx"; string pptxFile = Path.Combine(tempDir.FullName, pptx); _slideInfos.Add(new SlideInfo() { Id = slide.SlideNumber, path = pptxFile }); var prevNoteText = ""; foreach (var item in slide.Shapes) { //firstLine = true; var shape = (Microsoft.Office.Interop.PowerPoint.Shape)item; if (shape.HasTextFrame == MsoTriState.msoTrue) { if (shape.TextFrame2.HasText == MsoTriState.msoTrue) { if (shape.TextFrame2.TextRange.ParagraphFormat.Bullet.Type != MsoBulletType.msoBulletNone) { List <string> bullets = new List <string>(); foreach (TextRange2 para in shape.TextFrame2.TextRange.Paragraphs) { bullets.Add(para.Text); //var bulletText = "- " + para.Text; //buffer.AppendLine(bulletText); } WordEx.AddBulletList(bullets); //string text = shape.TextFrame2.TextRange.Text; //WordEx.AddBulletList(text); } else { var text = shape.TextFrame2.TextRange.Text; //slideText += text + " "; //buffer.AppendLine(text); if (firstLine) { WordEx.AddHeader(text); firstLine = false; } else { WordEx.AddText(text); } } } else if (shape.TextFrame.HasText == MsoTriState.msoTrue) { var text = shape.TextFrame.TextRange.Text; //slideText += text + " "; //buffer.AppendLine(text); if (firstLine) { WordEx.AddHeader(text); firstLine = false; } else { WordEx.AddText(text); } } firstLine = false; } if (slide.HasNotesPage == MsoTriState.msoTrue) { //bool processedNotes = false; foreach (var note in slide.NotesPage.Shapes) { //if (processedNotes) // break; var noteShape = (Microsoft.Office.Interop.PowerPoint.Shape)note; if (noteShape.HasTextFrame == MsoTriState.msoTrue) { if (noteShape.TextFrame2.HasText == MsoTriState.msoTrue) { var text1 = noteShape.TextFrame2.TextRange.Text; if (text1 == prevNoteText || WordEx.IsNumeric(text1)) { continue; } //processedNotes = true; //go to next since notes are duplicated in interop. if (noteShape.TextFrame2.TextRange.ParagraphFormat.Bullet.Type != MsoBulletType.msoBulletNone) { List <string> bullets = new List <string>(); foreach (TextRange2 para in noteShape.TextFrame2.TextRange.Paragraphs) { bullets.Add(para.Text); } WordEx.AddBulletList(bullets, true); prevNoteText = noteShape.TextFrame2.TextRange.Text; } else { var text = noteShape.TextFrame2.TextRange.Text; WordEx.AddText(text, true); } } else if (noteShape.TextFrame.HasText == MsoTriState.msoTrue) { var text = noteShape.TextFrame.TextRange.Text; WordEx.AddText(text, true); } } } } } } presentation.Close(); //var presoSource = Path.GetFileName(preso); //string processedPreso = Path.Combine(processedDir.FullName, presoSource); //if (!File.Exists(processedPreso)) // File.Copy(preso, processedPreso); } try { PowerPointEx.Close(PowerPoint_App); AddImages(); AddPdfs(pdfs, tempDir.FullName); WordEx.Save(); lblMessage.Text = "Created Doc: " + _fullFilePath; lblMessage.Enabled = true; } catch (Exception ex) { MessageBox.Show(ex.Message); } }
public static void ConvertToDoc(string pdfFilePath, string tempDir) { var pdfHeader = System.IO.Path.GetFileName(pdfFilePath); var underscoreIndex = pdfHeader.IndexOf('_'); pdfHeader = pdfHeader.Remove(0, underscoreIndex + 1); WordEx.AddTitle(pdfHeader); _pdfDoc = new PDDocument(); try { _pdfDoc = PDDocument.load(pdfFilePath); } catch { MessageBox.Show("Cant load pdf, try re-downloading:" + Environment.NewLine + pdfFilePath, "PDF Error"); return; } var pagelist = _pdfDoc.getDocumentCatalog().getAllPages(); for (int x = 0; x < pagelist.size(); x++) { //string pageTxt = GetPageText(x); //PDFPage pdfPage = GetPageHeader(pageTxt); //WordEx.AddHeader(pdfPage.Header); PDPage page = (PDPage)pagelist.get(x); PDResources pdResources = page.getResources(); Map pageImages = pdResources.getImages(); if (pageImages != null) { Iterator imageIter = pageImages.keySet().iterator(); while (imageIter.hasNext()) { String key = (String)imageIter.next(); PDXObjectImage pdxObjectImage = (PDXObjectImage)pageImages.get(key); var buffImage = pdxObjectImage.getRGBImage(); Bitmap theImage = buffImage.getBitmap(); if (!ContainsDocImage(theImage)) { //WordEx.AddImage(theImage, pdfPage.Header, pdfHeader); WordEx.AddImage(theImage, pdfHeader, pdfHeader); } else { theImage.Dispose(); } } } } string docText = _stripper.getText(_pdfDoc); WordEx.AddText(docText); foreach (Bitmap btmap in _docImages) { btmap.Dispose(); } _docImages.Clear(); _pdfDoc.close(); _pdfDoc = null; }