Пример #1
0
        public static void DoConversionViaWord(FileInfo newAltChunkBeforeFi, FileInfo newAltChunkAfterFi, XElement html)
        {
            var blankAltChunkFi = new DirectoryInfo(Path.Combine(TestUtil.SourceDir.FullName, "Blank-altchunk.docx"));

            File.Copy(blankAltChunkFi.FullName, newAltChunkBeforeFi.FullName);
            using (WordprocessingDocument myDoc = WordprocessingDocument.Open(newAltChunkBeforeFi.FullName, true))
            {
                string                      altChunkId = "AltChunkId1";
                MainDocumentPart            mainPart   = myDoc.MainDocumentPart;
                AlternativeFormatImportPart chunk      = mainPart.AddAlternativeFormatImportPart(
                    "application/xhtml+xml", altChunkId);
                using (Stream chunkStream = chunk.GetStream(FileMode.Create, FileAccess.Write))
                    using (StreamWriter stringStream = new StreamWriter(chunkStream))
                        stringStream.Write(html.ToString());
                XElement altChunk = new XElement(W.altChunk,
                                                 new XAttribute(R.id, altChunkId)
                                                 );
                XDocument mainDocumentXDoc = myDoc.MainDocumentPart.GetXDocument();
                mainDocumentXDoc.Root
                .Element(W.body)
                .AddFirst(altChunk);
                myDoc.MainDocumentPart.PutXDocument();
            }

            WordAutomationUtilities.OpenAndSaveAs(newAltChunkBeforeFi.FullName, newAltChunkAfterFi.FullName);

            while (true)
            {
                try
                {
                    using (WordprocessingDocument wDoc = WordprocessingDocument.Open(newAltChunkAfterFi.FullName, true))
                    {
                        SimplifyMarkupSettings settings2 = new SimplifyMarkupSettings
                        {
                            RemoveMarkupForDocumentComparison = true,
                        };
                        MarkupSimplifier.SimplifyMarkup(wDoc, settings2);
                        XElement newRoot = (XElement)RemoveDivTransform(wDoc.MainDocumentPart.GetXDocument().Root);
                        wDoc.MainDocumentPart.GetXDocument().Root.ReplaceWith(newRoot);
                        wDoc.MainDocumentPart.PutXDocumentWithFormatting();
                    }
                    break;
                }
                catch (IOException)
                {
                    System.Threading.Thread.Sleep(50);
                    continue;
                }
            }
        }
        public void HW010(string name)
        {
            var sourceDocxFi = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name));

            var sourceCopiedToDestDocxFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-2-Source.docx")));
            var sourceCopiedToDestHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-2-Source.html")));
            var destCssFi       = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-3.css")));
            var destDocxFi      = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-4-ConvertedByHtmlToWml.docx")));
            var annotatedHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-5-Annotated.txt")));

            File.Copy(sourceDocxFi.FullName, sourceCopiedToDestDocxFi.FullName);

            SaveAsHtmlUsingHtmlConverter(sourceCopiedToDestDocxFi.FullName, sourceCopiedToDestDocxFi.DirectoryName);
            XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceCopiedToDestHtmlFi);

            string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style"));

            File.WriteAllText(destCssFi.FullName, usedAuthorCss);

            var settingsWmlDocument             = new WmlDocument(sourceCopiedToDestDocxFi.FullName);
            HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings(settingsWmlDocument);

            // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory
            // that contains the HTML files
            settings.BaseUriForImages = Path.Combine(TestUtil.TempDir.FullName);

            WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml(
                defaultCss,
                usedAuthorCss,
                userCss,
                html,
                settings,
                null,  // use the default EmptyDocument
                s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null);

            Assert.NotNull(doc);

            if (doc != null)
            {
                SaveValidateAndFormatMainDocPart(destDocxFi, doc);
            }

#if DO_CONVERSION_VIA_WORD
            var newAltChunkBeforeFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-5-AltChunkBefore.docx")));
            var newAltChunkAfterFi  = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-6-ConvertedViaWord.docx")));
            WordAutomationUtilities.DoConversionViaWord(newAltChunkBeforeFi, newAltChunkAfterFi, html);
#endif
        }
Пример #3
0
        public void LIR001(string file)
        {
            FileInfo    lirFile = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, file));
            WmlDocument wmlDoc  = new WmlDocument(lirFile.FullName);

            var wordHtmlFile = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, lirFile.Name.Replace(".docx", "-Word.html")));

            WordAutomationUtilities.SaveAsHtmlUsingWord(lirFile, wordHtmlFile);

            var ptHtmlFile = ConvertToHtml(lirFile.FullName, TestUtil.TempDir.FullName);
            var fiPtXml    = SaveHtmlAsXml(ptHtmlFile);

            // read and write to get the BOM on the file
            var wh = File.ReadAllText(wordHtmlFile.FullName, Encoding.Default);

            File.WriteAllText(wordHtmlFile.FullName, wh, Encoding.UTF8);

            var wordXml = SaveHtmlAsXml(wordHtmlFile);

            CompareNumbering(fiPtXml, wordXml);
        }
        public void HW001(string name)
        {
#if false
            string[] cssFilter = new[] {
                "text-indent",
                "margin-left",
                "margin-right",
                "padding-left",
                "padding-right",
            };
#else
            string[] cssFilter = null;
#endif

#if false
            string[] htmlFilter = new[] {
                "img",
            };
#else
            string[] htmlFilter = null;
#endif

            var sourceHtmlFi  = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name));
            var sourceImageDi = new DirectoryInfo(Path.Combine(TestUtil.SourceDir.FullName, sourceHtmlFi.Name.Replace(".html", "_files")));

            var destImageDi = new DirectoryInfo(Path.Combine(TestUtil.TempDir.FullName, sourceImageDi.Name));
            var sourceCopiedToDestHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-1-Source.html")));
            var destCssFi       = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-2.css")));
            var destDocxFi      = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-3-ConvertedByHtmlToWml.docx")));
            var annotatedHtmlFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, sourceHtmlFi.Name.Replace(".html", "-4-Annotated.txt")));

            if (!sourceCopiedToDestHtmlFi.Exists)
            {
                File.Copy(sourceHtmlFi.FullName, sourceCopiedToDestHtmlFi.FullName);
            }
            XElement html = HtmlToWmlReadAsXElement.ReadAsXElement(sourceCopiedToDestHtmlFi);

            string htmlString = html.ToString();
            if (htmlFilter != null && htmlFilter.Any())
            {
                bool found = false;
                foreach (var item in htmlFilter)
                {
                    if (htmlString.Contains(item))
                    {
                        found = true;
                        break;
                    }
                }
                if (!found)
                {
                    sourceCopiedToDestHtmlFi.Delete();
                    return;
                }
            }

            string usedAuthorCss = HtmlToWmlConverter.CleanUpCss((string)html.Descendants().FirstOrDefault(d => d.Name.LocalName.ToLower() == "style"));
            File.WriteAllText(destCssFi.FullName, usedAuthorCss);

            if (cssFilter != null && cssFilter.Any())
            {
                bool found = false;
                foreach (var item in cssFilter)
                {
                    if (usedAuthorCss.Contains(item))
                    {
                        found = true;
                        break;
                    }
                }
                if (!found)
                {
                    sourceCopiedToDestHtmlFi.Delete();
                    destCssFi.Delete();
                    return;
                }
            }

            if (sourceImageDi.Exists)
            {
                destImageDi.Create();
                foreach (var file in sourceImageDi.GetFiles())
                {
                    File.Copy(file.FullName, destImageDi.FullName + "/" + file.Name);
                }
            }

            HtmlToWmlConverterSettings settings = HtmlToWmlConverter.GetDefaultSettings();
            // image references in HTML files contain the path to the subdir that contains the images, so base URI is the name of the directory
            // that contains the HTML files
            settings.BaseUriForImages = Path.Combine(TestUtil.TempDir.FullName);

            WmlDocument doc = HtmlToWmlConverter.ConvertHtmlToWml(defaultCss, usedAuthorCss, userCss, html, settings, null, s_ProduceAnnotatedHtml ? annotatedHtmlFi.FullName : null);
            Assert.NotNull(doc);
            if (doc != null)
            {
                SaveValidateAndFormatMainDocPart(destDocxFi, doc);
            }

#if DO_CONVERSION_VIA_WORD
            var newAltChunkBeforeFi = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".html", "-5-AltChunkBefore.docx")));
            var newAltChunkAfterFi  = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".html", "-6-ConvertedViaWord.docx")));
            WordAutomationUtilities.DoConversionViaWord(newAltChunkBeforeFi, newAltChunkAfterFi, html);
#endif
        }