コード例 #1
0
        public void ProcessWordDocument(string docFilePath)
        {
            tableIndex = 1;
            mathIndex = 1;
            imageIndex = 1;
            textIndex = 1;
            using (WordprocessingDocument doc = WordprocessingDocument.Open(docFilePath, false))
            {
                foreach (var table in doc.MainDocumentPart.Document.Descendants<DocumentFormat.OpenXml.Wordprocessing.Table>())
                {
                    int trows = table.Descendants<DocumentFormat.OpenXml.Wordprocessing.TableRow>().Count();
                    int tcols = table.Descendants<DocumentFormat.OpenXml.Wordprocessing.TableRow>().First().Descendants<DocumentFormat.OpenXml.Wordprocessing.TableCell>().Count();
                    WordTable wordTable = new WordTable(trows, tcols);
                    //create a table class and add the text from the rows and cells
                    int row = 0, cell = 0;
                    foreach (var tableRow in table.Descendants<DocumentFormat.OpenXml.Wordprocessing.TableRow>())
                    {
                        foreach (var tableCell in tableRow.Descendants<DocumentFormat.OpenXml.Wordprocessing.TableCell>())
                        {
                            string text = tableCell.InnerText;
                            wordTable.AddText(row, cell, text);
                            cell++;
                            //save the cell into a table class for later processing with row info
                        }
                        cell = 0;
                        row++;
                    }
                    DocumentFormat.OpenXml.Wordprocessing.Paragraph para = new DocumentFormat.OpenXml.Wordprocessing.Paragraph();
                    Run run = para.AppendChild(new Run());
                    string IDplaceholder = "%Table-&" + tableIndex;
                    run.AppendChild(new Text(IDplaceholder));
                    table.Parent.ReplaceChild(para, table);
                    //table.Remove();
                    tableIndex++;
                    //store the table
                    TableList.Add(IDplaceholder, wordTable);
                }
                foreach (var formula in doc.MainDocumentPart.Document.Descendants<DocumentFormat.OpenXml.Math.OfficeMath>())
                {
                    string wordDocXml = formula.OuterXml;
                    XslCompiledTransform xslTransform = new XslCompiledTransform();
                    xslTransform.Load(officeMathMLSchemaFilePath);
                    string mmlFormula = null;

                    using (TextReader tr = new StringReader(wordDocXml))
                    {
                        // Load the xml of your main document part.
                        using (XmlReader reader = XmlReader.Create(tr))
                        {
                            XmlWriterSettings settings = xslTransform.OutputSettings.Clone();

                            // Configure xml writer to omit xml declaration.
                            settings.ConformanceLevel = ConformanceLevel.Fragment;
                            settings.OmitXmlDeclaration = true;

                            using (MemoryStream ms = new MemoryStream())
                            {
                                XmlWriter xw = XmlWriter.Create(ms, settings);

                                // Transform our OfficeMathML to MathML.
                                xslTransform.Transform(reader, xw);
                                ms.Seek(0, SeekOrigin.Begin);
                                using (StreamReader sr = new StreamReader(ms, Encoding.UTF8))
                                {
                                    mmlFormula = sr.ReadToEnd();
                                }
                            }
                        }
                        DocumentFormat.OpenXml.Wordprocessing.Paragraph para = formula.Parent.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph());
                        Run run = para.AppendChild(new Run());
                        string IDplaceholder = "%Math-&" + mathIndex;
                        run.AppendChild(new Text(IDplaceholder));
                        mathIndex++;
                        formula.Remove();
                        if (mmlFormula != null)
                        {
                            MathList.Add(IDplaceholder, mmlFormula);
                        }

                    }
                }
                foreach (var graphic in doc.MainDocumentPart.Document.Descendants<DocumentFormat.OpenXml.Drawing.Graphic>())
                {
                    DocumentFormat.OpenXml.Drawing.Blip blip = graphic.FirstChild.Descendants<DocumentFormat.OpenXml.Drawing.Blip>().First();
                    string imageId = blip.Embed.Value;
                    ImagePart imagePart = (ImagePart)doc.MainDocumentPart.GetPartById(imageId);
                    var uri = imagePart.Uri;
                    var filename = uri.ToString().Split('/').Last();
                    var stream = doc.Package.GetPart(uri).GetStream();
                    Bitmap b = new Bitmap(stream);
                    string imagePath = TempImageFolder + filename;
                    b.Save(imagePath);
                    DocumentFormat.OpenXml.Wordprocessing.Paragraph para = graphic.Parent.Parent.Parent.Parent.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph());
                    Run run = para.AppendChild(new Run());
                    string IDplaceholder = "%Image-&" + imageIndex;
                    run.AppendChild(new Text(IDplaceholder));
                    imageIndex++;
                    ImageList.Add(IDplaceholder, imagePath);
                }
                try
                {
                    foreach (var video in doc.MainDocumentPart.Document.Descendants<DocumentFormat.OpenXml.Drawing.VideoFromFile>())
                    {
                        string localName = video.LocalName;
                        string innerXml = video.InnerXml;
                    }
                    foreach (var video in doc.MainDocumentPart.EmbeddedObjectParts)
                    {
                        string vct = video.ContentType;
                    }
                } catch
                {

                }
                foreach (var element in doc.MainDocumentPart.Document.Descendants<DocumentFormat.OpenXml.Wordprocessing.Paragraph>())
                {
                    try
                    {
                        var psID = element.ParagraphProperties.ParagraphStyleId;
                        string type = null;
                        switch (psID.Val.ToString().ToLowerInvariant())
                        {
                            //for each case save the inner text of the paragraph and remove it
                            case "heading1": { type = "h1-"; break; }
                            case "heading2": { type = "h2-"; break; }
                            case "heading3": { type = "h3-"; break; }
                            case "heading4": { type = "h4-"; break; }
                            case "heading5": { type = "h5-"; break; }
                            case "title": { type = "title-"; break; }
                            case "subtitle": { type = "subtitle-"; break; }
                            default: break;
                        }
                        if (type != null)
                        {
                            string id = "%" + type + "&" + textIndex;
                            PlainTextList.Add(id, element.InnerText);
                            textIndex++;
                            element.RemoveAllChildren();
                            DocumentFormat.OpenXml.Wordprocessing.Paragraph para = element.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph());
                            Run run = para.AppendChild(new Run());
                            run.AppendChild(new Text(id));
                        }
                    }
                    catch
                    { //do nothing 
                    }
                }

                PlaceholderIDList = ExtractTextAndCreatePlaceholderList(doc);
                if (textBuilder.Length > 0)
                {
                    string s2 = AddTextToTextList();
                    if (s2 != null)
                        PlaceholderIDList.Add(s2);
                }
            }
        }
コード例 #2
0
        public void ProcessWordDocument(string docFilePath)
        {
            tableIndex = 1;
            mathIndex  = 1;
            imageIndex = 1;
            videoIndex = 1;
            textIndex  = 1;
            using (WordprocessingDocument doc = WordprocessingDocument.Open(docFilePath, false))
            {
                foreach (var table in doc.MainDocumentPart.Document.Descendants <DocumentFormat.OpenXml.Wordprocessing.Table>())
                {
                    int       trows     = table.Descendants <DocumentFormat.OpenXml.Wordprocessing.TableRow>().Count();
                    int       tcols     = table.Descendants <DocumentFormat.OpenXml.Wordprocessing.TableRow>().First().Descendants <DocumentFormat.OpenXml.Wordprocessing.TableCell>().Count();
                    WordTable wordTable = new WordTable(trows, tcols);
                    //create a table class and add the text from the rows and cells
                    int row = 0, cell = 0;
                    foreach (var tableRow in table.Descendants <DocumentFormat.OpenXml.Wordprocessing.TableRow>())
                    {
                        foreach (var tableCell in tableRow.Descendants <DocumentFormat.OpenXml.Wordprocessing.TableCell>())
                        {
                            string text = tableCell.InnerText;
                            wordTable.AddText(row, cell, text);
                            cell++;
                            //save the cell into a table class for later processing with row info
                        }
                        cell = 0;
                        row++;
                    }
                    DocumentFormat.OpenXml.Wordprocessing.Paragraph para = new DocumentFormat.OpenXml.Wordprocessing.Paragraph();
                    Run    run           = para.AppendChild(new Run());
                    string IDplaceholder = "%Table-&" + tableIndex;
                    run.AppendChild(new Text(IDplaceholder));
                    table.Parent.ReplaceChild(para, table);
                    //table.Remove();
                    tableIndex++;
                    //store the table
                    TableList.Add(IDplaceholder, wordTable);
                }
                foreach (var formula in doc.MainDocumentPart.Document.Descendants <DocumentFormat.OpenXml.Math.OfficeMath>())
                {
                    string wordDocXml = formula.OuterXml;
                    XslCompiledTransform xslTransform = new XslCompiledTransform();
                    xslTransform.Load(officeMathMLSchemaFilePath);
                    string mmlFormula = null;

                    using (TextReader tr = new StringReader(wordDocXml))
                    {
                        // Load the xml of your main document part.
                        using (XmlReader reader = XmlReader.Create(tr))
                        {
                            XmlWriterSettings settings = xslTransform.OutputSettings.Clone();

                            // Configure xml writer to omit xml declaration.
                            settings.ConformanceLevel   = ConformanceLevel.Fragment;
                            settings.OmitXmlDeclaration = true;

                            using (MemoryStream ms = new MemoryStream())
                            {
                                XmlWriter xw = XmlWriter.Create(ms, settings);

                                // Transform our OfficeMathML to MathML.
                                xslTransform.Transform(reader, xw);
                                ms.Seek(0, SeekOrigin.Begin);
                                using (StreamReader sr = new StreamReader(ms, Encoding.UTF8))
                                {
                                    mmlFormula = sr.ReadToEnd();
                                }
                            }
                        }
                        DocumentFormat.OpenXml.Wordprocessing.Paragraph para = formula.Parent.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph());
                        Run    run           = para.AppendChild(new Run());
                        string IDplaceholder = "%Math-&" + mathIndex;
                        run.AppendChild(new Text(IDplaceholder));
                        mathIndex++;
                        formula.Remove();
                        if (mmlFormula != null)
                        {
                            MathList.Add(IDplaceholder, mmlFormula);
                        }
                    }
                }
                foreach (var graphic in doc.MainDocumentPart.Document.Descendants <DocumentFormat.OpenXml.Drawing.Graphic>())
                {
                    DocumentFormat.OpenXml.Drawing.Blip blip = graphic.FirstChild.Descendants <DocumentFormat.OpenXml.Drawing.Blip>().First();
                    string    imageId   = blip.Embed.Value;
                    ImagePart imagePart = (ImagePart)doc.MainDocumentPart.GetPartById(imageId);
                    var       uri       = imagePart.Uri;
                    var       filename  = uri.ToString().Split('/').Last();
                    var       stream    = doc.Package.GetPart(uri).GetStream();
                    Bitmap    b         = new Bitmap(stream);
                    string    imagePath = TempImageFolder + filename;
                    b.Save(imagePath);
                    DocumentFormat.OpenXml.Wordprocessing.Paragraph para = graphic.Parent.Parent.Parent.Parent.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph());
                    Run    run           = para.AppendChild(new Run());
                    string IDplaceholder = "%Image-&" + imageIndex;
                    run.AppendChild(new Text(IDplaceholder));
                    imageIndex++;
                    ImageList.Add(IDplaceholder, imagePath);
                }
                try
                {
                    foreach (var video in doc.MainDocumentPart.Document.Descendants <DocumentFormat.OpenXml.Drawing.VideoFromFile>())
                    {
                        //extract video bytes from word document
                        DocumentFormat.OpenXml.Drawing.Blip blip = video.FirstChild.Descendants <DocumentFormat.OpenXml.Drawing.Blip>().First();
                        var    vid      = doc.MainDocumentPart.GetPartById(blip.Embed.Value);
                        var    uri      = vid.Uri;
                        var    filename = uri.ToString().Split('/').Last();
                        var    stream   = doc.Package.GetPart(uri).GetStream();
                        byte[] videoBytes;
                        using (BinaryReader br = new BinaryReader(stream))
                        {
                            videoBytes = br.ReadBytes((int)stream.Length);
                        }
                        //TODO set these up universally somewhere
                        string videoFolder     = @"C:\websites\RoboBraille.Web.Api\dist\";
                        string urlDistribution = @"http://2.109.50.18:5150/dist/" + filename;

                        //write bytes to shared web folder
                        File.WriteAllBytes(videoFolder + filename, videoBytes);
                        //send post video request to Amara

                        //put placeholder
                        //TODO check that video.Parent.Parent ...points to the right parent
                        DocumentFormat.OpenXml.Wordprocessing.Paragraph para = video.Parent.Parent.Parent.Parent.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph());
                        Run    run           = para.AppendChild(new Run());
                        string IDplaceholder = "%Video-&" + videoIndex;
                        run.AppendChild(new Text(IDplaceholder));
                        videoIndex++;
                        //make somehow an list of videos sent to amara and their source location and store that info somewhere? maybe as jobs?
                    }
                } catch
                {
                }
                foreach (var element in doc.MainDocumentPart.Document.Descendants <DocumentFormat.OpenXml.Wordprocessing.Paragraph>())
                {
                    try
                    {
                        var    psID = element.ParagraphProperties.ParagraphStyleId;
                        string type = null;
                        switch (psID.Val.ToString().ToLowerInvariant())
                        {
                        //for each case save the inner text of the paragraph and remove it
                        case "heading1": { type = "h1-"; break; }

                        case "heading2": { type = "h2-"; break; }

                        case "heading3": { type = "h3-"; break; }

                        case "heading4": { type = "h4-"; break; }

                        case "heading5": { type = "h5-"; break; }

                        case "title": { type = "title-"; break; }

                        case "subtitle": { type = "subtitle-"; break; }

                        default: break;
                        }
                        if (type != null)
                        {
                            string id = "%" + type + "&" + textIndex;
                            PlainTextList.Add(id, element.InnerText);
                            textIndex++;
                            element.RemoveAllChildren();
                            DocumentFormat.OpenXml.Wordprocessing.Paragraph para = element.AppendChild(new DocumentFormat.OpenXml.Wordprocessing.Paragraph());
                            Run run = para.AppendChild(new Run());
                            run.AppendChild(new Text(id));
                        }
                    }
                    catch
                    { //do nothing
                    }
                }

                PlaceholderIDList = ExtractTextAndCreatePlaceholderList(doc);
                if (textBuilder.Length > 0)
                {
                    string s2 = AddTextToTextList();
                    if (s2 != null)
                    {
                        PlaceholderIDList.Add(s2);
                    }
                }
            }
        }