public void ProcessWordDocument(string docFilePath) { tableIndex = 1; mathIndex = 1; imageIndex = 1; textIndex = 1; using (WordprocessingDocument doc = WordprocessingDocument.Open(docFilePath, false)) { foreach (var table in doc.MainDocumentPart.Document.Descendants<DocumentFormat.OpenXml.Wordprocessing.Table>()) { int trows = table.Descendants<DocumentFormat.OpenXml.Wordprocessing.TableRow>().Count(); int tcols = table.Descendants<DocumentFormat.OpenXml.Wordprocessing.TableRow>().First().Descendants<DocumentFormat.OpenXml.Wordprocessing.TableCell>().Count(); WordTable wordTable = new WordTable(trows, tcols); //create a table class and add the text from the rows and cells int row = 0, cell = 0; foreach (var tableRow in table.Descendants<DocumentFormat.OpenXml.Wordprocessing.TableRow>()) { foreach (var tableCell in tableRow.Descendants<DocumentFormat.OpenXml.Wordprocessing.TableCell>()) { string text = tableCell.InnerText; wordTable.AddText(row, cell, text); cell++; //save the cell into a table class for later processing with row info } cell = 0; row++; } DocumentFormat.OpenXml.Wordprocessing.Paragraph para = new DocumentFormat.OpenXml.Wordprocessing.Paragraph(); Run run = para.AppendChild(new Run()); string IDplaceholder = "%Table-&" + tableIndex; run.AppendChild(new Text(IDplaceholder)); table.Parent.ReplaceChild(para, table); //table.Remove(); tableIndex++; //store the table TableList.Add(IDplaceholder, wordTable); } foreach (var formula in doc.MainDocumentPart.Document.Descendants<DocumentFormat.OpenXml.Math.OfficeMath>()) { string wordDocXml = formula.OuterXml; XslCompiledTransform xslTransform = new XslCompiledTransform(); xslTransform.Load(officeMathMLSchemaFilePath); string mmlFormula = null; using (TextReader tr = new StringReader(wordDocXml)) { // Load the xml of your main document part. using (XmlReader reader = XmlReader.Create(tr)) { XmlWriterSettings settings = xslTransform.OutputSettings.Clone(); // Configure xml writer to omit xml declaration. settings.ConformanceLevel = ConformanceLevel.Fragment; settings.OmitXmlDeclaration = true; using (MemoryStream ms = new MemoryStream()) { XmlWriter xw = XmlWriter.Create(ms, settings); // Transform our OfficeMathML to MathML. xslTransform.Transform(reader, xw); ms.Seek(0, SeekOrigin.Begin); using (StreamReader sr = new StreamReader(ms, Encoding.UTF8)) { mmlFormula = sr.ReadToEnd(); } } } DocumentFormat.OpenXml.Wordprocessing.Paragraph para = formula.Parent.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph()); Run run = para.AppendChild(new Run()); string IDplaceholder = "%Math-&" + mathIndex; run.AppendChild(new Text(IDplaceholder)); mathIndex++; formula.Remove(); if (mmlFormula != null) { MathList.Add(IDplaceholder, mmlFormula); } } } foreach (var graphic in doc.MainDocumentPart.Document.Descendants<DocumentFormat.OpenXml.Drawing.Graphic>()) { DocumentFormat.OpenXml.Drawing.Blip blip = graphic.FirstChild.Descendants<DocumentFormat.OpenXml.Drawing.Blip>().First(); string imageId = blip.Embed.Value; ImagePart imagePart = (ImagePart)doc.MainDocumentPart.GetPartById(imageId); var uri = imagePart.Uri; var filename = uri.ToString().Split('/').Last(); var stream = doc.Package.GetPart(uri).GetStream(); Bitmap b = new Bitmap(stream); string imagePath = TempImageFolder + filename; b.Save(imagePath); DocumentFormat.OpenXml.Wordprocessing.Paragraph para = graphic.Parent.Parent.Parent.Parent.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph()); Run run = para.AppendChild(new Run()); string IDplaceholder = "%Image-&" + imageIndex; run.AppendChild(new Text(IDplaceholder)); imageIndex++; ImageList.Add(IDplaceholder, imagePath); } try { foreach (var video in doc.MainDocumentPart.Document.Descendants<DocumentFormat.OpenXml.Drawing.VideoFromFile>()) { string localName = video.LocalName; string innerXml = video.InnerXml; } foreach (var video in doc.MainDocumentPart.EmbeddedObjectParts) { string vct = video.ContentType; } } catch { } foreach (var element in doc.MainDocumentPart.Document.Descendants<DocumentFormat.OpenXml.Wordprocessing.Paragraph>()) { try { var psID = element.ParagraphProperties.ParagraphStyleId; string type = null; switch (psID.Val.ToString().ToLowerInvariant()) { //for each case save the inner text of the paragraph and remove it case "heading1": { type = "h1-"; break; } case "heading2": { type = "h2-"; break; } case "heading3": { type = "h3-"; break; } case "heading4": { type = "h4-"; break; } case "heading5": { type = "h5-"; break; } case "title": { type = "title-"; break; } case "subtitle": { type = "subtitle-"; break; } default: break; } if (type != null) { string id = "%" + type + "&" + textIndex; PlainTextList.Add(id, element.InnerText); textIndex++; element.RemoveAllChildren(); DocumentFormat.OpenXml.Wordprocessing.Paragraph para = element.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph()); Run run = para.AppendChild(new Run()); run.AppendChild(new Text(id)); } } catch { //do nothing } } PlaceholderIDList = ExtractTextAndCreatePlaceholderList(doc); if (textBuilder.Length > 0) { string s2 = AddTextToTextList(); if (s2 != null) PlaceholderIDList.Add(s2); } } }
public void ProcessWordDocument(string docFilePath) { tableIndex = 1; mathIndex = 1; imageIndex = 1; videoIndex = 1; textIndex = 1; using (WordprocessingDocument doc = WordprocessingDocument.Open(docFilePath, false)) { foreach (var table in doc.MainDocumentPart.Document.Descendants <DocumentFormat.OpenXml.Wordprocessing.Table>()) { int trows = table.Descendants <DocumentFormat.OpenXml.Wordprocessing.TableRow>().Count(); int tcols = table.Descendants <DocumentFormat.OpenXml.Wordprocessing.TableRow>().First().Descendants <DocumentFormat.OpenXml.Wordprocessing.TableCell>().Count(); WordTable wordTable = new WordTable(trows, tcols); //create a table class and add the text from the rows and cells int row = 0, cell = 0; foreach (var tableRow in table.Descendants <DocumentFormat.OpenXml.Wordprocessing.TableRow>()) { foreach (var tableCell in tableRow.Descendants <DocumentFormat.OpenXml.Wordprocessing.TableCell>()) { string text = tableCell.InnerText; wordTable.AddText(row, cell, text); cell++; //save the cell into a table class for later processing with row info } cell = 0; row++; } DocumentFormat.OpenXml.Wordprocessing.Paragraph para = new DocumentFormat.OpenXml.Wordprocessing.Paragraph(); Run run = para.AppendChild(new Run()); string IDplaceholder = "%Table-&" + tableIndex; run.AppendChild(new Text(IDplaceholder)); table.Parent.ReplaceChild(para, table); //table.Remove(); tableIndex++; //store the table TableList.Add(IDplaceholder, wordTable); } foreach (var formula in doc.MainDocumentPart.Document.Descendants <DocumentFormat.OpenXml.Math.OfficeMath>()) { string wordDocXml = formula.OuterXml; XslCompiledTransform xslTransform = new XslCompiledTransform(); xslTransform.Load(officeMathMLSchemaFilePath); string mmlFormula = null; using (TextReader tr = new StringReader(wordDocXml)) { // Load the xml of your main document part. using (XmlReader reader = XmlReader.Create(tr)) { XmlWriterSettings settings = xslTransform.OutputSettings.Clone(); // Configure xml writer to omit xml declaration. settings.ConformanceLevel = ConformanceLevel.Fragment; settings.OmitXmlDeclaration = true; using (MemoryStream ms = new MemoryStream()) { XmlWriter xw = XmlWriter.Create(ms, settings); // Transform our OfficeMathML to MathML. xslTransform.Transform(reader, xw); ms.Seek(0, SeekOrigin.Begin); using (StreamReader sr = new StreamReader(ms, Encoding.UTF8)) { mmlFormula = sr.ReadToEnd(); } } } DocumentFormat.OpenXml.Wordprocessing.Paragraph para = formula.Parent.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph()); Run run = para.AppendChild(new Run()); string IDplaceholder = "%Math-&" + mathIndex; run.AppendChild(new Text(IDplaceholder)); mathIndex++; formula.Remove(); if (mmlFormula != null) { MathList.Add(IDplaceholder, mmlFormula); } } } foreach (var graphic in doc.MainDocumentPart.Document.Descendants <DocumentFormat.OpenXml.Drawing.Graphic>()) { DocumentFormat.OpenXml.Drawing.Blip blip = graphic.FirstChild.Descendants <DocumentFormat.OpenXml.Drawing.Blip>().First(); string imageId = blip.Embed.Value; ImagePart imagePart = (ImagePart)doc.MainDocumentPart.GetPartById(imageId); var uri = imagePart.Uri; var filename = uri.ToString().Split('/').Last(); var stream = doc.Package.GetPart(uri).GetStream(); Bitmap b = new Bitmap(stream); string imagePath = TempImageFolder + filename; b.Save(imagePath); DocumentFormat.OpenXml.Wordprocessing.Paragraph para = graphic.Parent.Parent.Parent.Parent.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph()); Run run = para.AppendChild(new Run()); string IDplaceholder = "%Image-&" + imageIndex; run.AppendChild(new Text(IDplaceholder)); imageIndex++; ImageList.Add(IDplaceholder, imagePath); } try { foreach (var video in doc.MainDocumentPart.Document.Descendants <DocumentFormat.OpenXml.Drawing.VideoFromFile>()) { //extract video bytes from word document DocumentFormat.OpenXml.Drawing.Blip blip = video.FirstChild.Descendants <DocumentFormat.OpenXml.Drawing.Blip>().First(); var vid = doc.MainDocumentPart.GetPartById(blip.Embed.Value); var uri = vid.Uri; var filename = uri.ToString().Split('/').Last(); var stream = doc.Package.GetPart(uri).GetStream(); byte[] videoBytes; using (BinaryReader br = new BinaryReader(stream)) { videoBytes = br.ReadBytes((int)stream.Length); } //TODO set these up universally somewhere string videoFolder = @"C:\websites\RoboBraille.Web.Api\dist\"; string urlDistribution = @"http://2.109.50.18:5150/dist/" + filename; //write bytes to shared web folder File.WriteAllBytes(videoFolder + filename, videoBytes); //send post video request to Amara //put placeholder //TODO check that video.Parent.Parent ...points to the right parent DocumentFormat.OpenXml.Wordprocessing.Paragraph para = video.Parent.Parent.Parent.Parent.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph()); Run run = para.AppendChild(new Run()); string IDplaceholder = "%Video-&" + videoIndex; run.AppendChild(new Text(IDplaceholder)); videoIndex++; //make somehow an list of videos sent to amara and their source location and store that info somewhere? maybe as jobs? } } catch { } foreach (var element in doc.MainDocumentPart.Document.Descendants <DocumentFormat.OpenXml.Wordprocessing.Paragraph>()) { try { var psID = element.ParagraphProperties.ParagraphStyleId; string type = null; switch (psID.Val.ToString().ToLowerInvariant()) { //for each case save the inner text of the paragraph and remove it case "heading1": { type = "h1-"; break; } case "heading2": { type = "h2-"; break; } case "heading3": { type = "h3-"; break; } case "heading4": { type = "h4-"; break; } case "heading5": { type = "h5-"; break; } case "title": { type = "title-"; break; } case "subtitle": { type = "subtitle-"; break; } default: break; } if (type != null) { string id = "%" + type + "&" + textIndex; PlainTextList.Add(id, element.InnerText); textIndex++; element.RemoveAllChildren(); DocumentFormat.OpenXml.Wordprocessing.Paragraph para = element.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph()); Run run = para.AppendChild(new Run()); run.AppendChild(new Text(id)); } } catch { //do nothing } } PlaceholderIDList = ExtractTextAndCreatePlaceholderList(doc); if (textBuilder.Length > 0) { string s2 = AddTextToTextList(); if (s2 != null) { PlaceholderIDList.Add(s2); } } } }