private XElement GenerateToc(TableOfContentResult result)
        {
            XDocument doc = new XDocument(new XElement(nsItj + "tableOfContent"));

            doc.Root.Add(GenerateList(result.Sections));
            return(doc.Root);
        }
        private XElement GenerateHwTable(TableOfContentResult result)
        {
            XDocument doc = new XDocument(new XElement(nsItj + "headwordsTable"));

            doc.Root.Add(GenerateTable(result.HeadwordsList));
            return(doc.Root);
        }
Beispiel #3
0
        private static XElement GenerateHwList(TableOfContentResult result)
        {
            XDocument doc = new XDocument(new XElement(nsItj + "headwordsList"));

            doc.Root.Add(GenerateList(result.HeadwordsList));
            return(doc.Root);
        }
Beispiel #4
0
        private static void GenerateTocXml(TableOfContentResult result)
        {
            XElement toc = GenerateToc(result);

            Console.WriteLine(toc);
            Console.WriteLine();
        }
Beispiel #5
0
 private static void GenerateTocText(TableOfContentResult result)
 {
     Console.WriteLine("<tableOfContent>");
     Console.WriteLine("<list>");
     foreach (TableOfContentItem item in result.Sections)
     {
         //WriteTocInfo(item);
         WriteTocInfoXml(item, 1);
     }
     Console.WriteLine("</list>");
     Console.WriteLine("</tableOfContent>");
 }
Beispiel #6
0
        private static void TestDocument(string file)
        {
            ContentInfoBuilder builder = new ContentInfoBuilder();

            builder.XmlFile         = file;
            builder.StartingElement = "body";
            TableOfContentResult result = builder.MakeTableOfContent();

            //GenerateTocXml(result);
            SaveToc(result, Path.Combine(@"V:\Projekty\BitBucket\itjakub\UJCSystem\ITJakub.Xml.Conversion\Ujc.Ovj.Ooxml.Conversion.Test\Data\Metadata",
                                         file.Substring(file.LastIndexOf('\\') + 1).Replace(".xml", ".cnt")));

            //GenerateTocText(result);
        }
        //private void GenerateConversionMetadataFile(SplittingResult splittingResult,
        //	string documentType,
        //	string finalOutputFileFullPath)
        //{
        //	GenerateConversionMetadataFile(splittingResult, new TableOfContentResult(), documentType, finalOutputFileFullPath);
        //}

        private void GenerateConversionMetadataFile(SplittingResult splittingResult, TableOfContentResult tableOfContentResult,
                                                    string documentType, string finalOutputFileFullPath, string finalOutputFileName, string finalOutputMetadataFileName)
        {
            XDocument metada = new XDocument();

            XDocument teiDocument = XDocument.Load(finalOutputFileFullPath);

            metada.Add(new XElement(nsItj + "document",
                                    new XAttribute("doctype", documentType),
                                    new XAttribute("versionId", _currentVersionInfoSkeleton.Id),
                                    new XAttribute(nsXml + "lang", "cs"),
                                    new XAttribute("n", _documentId),
                                    new XAttribute("xmlns", nsTei),
                                    new XAttribute(XNamespace.Xmlns + "itj", nsItj),
                                    new XAttribute(XNamespace.Xmlns + "nlp", nsNlp),
                                    new XAttribute(XNamespace.Xmlns + "xml", XNamespace.Xml)
                                    )
                       );

            XElement header = teiDocument.Descendants(nsTei + "teiHeader").FirstOrDefault();

            metada.Root.Add(header);

            XElement toc         = GenerateToc(tableOfContentResult);
            XElement hws         = GenerateHwList(tableOfContentResult);
            XElement hwt         = GenerateHwTable(hws);
            XElement accessories = new XElement(nsItj + "accessories",
                                                new XElement(nsItj + "file", new XAttribute("type", "content"), new XAttribute("name", finalOutputFileName)));

            if (splittingResult != null)             //generovat pouze v případě, že k rozdělení na strany došlo
            {
                XElement pages = new XElement(nsItj + "pages",
                                              from info in splittingResult.PageBreaksSplitInfo
                                              select new XElement(nsItj + "page",
                                                                  info.Number == null ? null : new XAttribute("n", info.Number),
                                                                  info.Id == null ? null : new XAttribute(nsXml + "id", info.Id),
                                                                  info.FileName == null ? null : new XAttribute("resource", info.FileName),
                                                                  info.Facsimile == null ? null : new XAttribute("facs", info.Facsimile)
                                                                  )
                                              );

                metada.Root.Add(pages);
            }
            metada.Root.Add(toc);
            metada.Root.Add(hwt);
            metada.Root.Add(hws);
            metada.Root.Add(accessories);
            metada.Save(finalOutputMetadataFileName);
        }
Beispiel #8
0
        private static void SaveToc(TableOfContentResult result, string filepath)
        {
            XDocument doc = new XDocument(new XElement(nsItj + "metadata"));
            XElement  toc = new XElement(nsItj + "tableOfContent");

            toc.Add(GenerateList(result.Sections));

            XElement hw = new XElement(nsItj + "headwordsList");

            hw.Add(GenerateList(result.HeadwordsList));


            doc.Root.Add(toc);
            doc.Root.Add(hw);
            doc.Save(filepath);
        }
        public ConversionResult Convert(DocxToTeiConverterSettings settings)
        {
            ConverterSettings = settings;
            CheckIfDirectoryPathsExists(ConverterSettings);
            _result = new ConversionResult();
            string documentType = null;

            //get metadata only for first (by alphabet) uploaded file
            var inputFileName = ConverterSettings.InputFilesPath.Select(filePath => new FileInfo(filePath)).Select(fileInfo => fileInfo.Name).First();

            ResolveDefaultSettingsValues(ConverterSettings);

            var prepis = GetPrepisy(ConverterSettings, inputFileName);

            if (prepis == null)
            {
                //dokument v evidenci neexistuje, nabídnout zanesení dokumentu do evidence
                // mělo by stačit přiřazení typu dokumentu
                _result.Errors.Add(new DocumentNotInEvidenceException(String.Format("Dokument s uvedeným jménem souboru '{0}' neexistuje v evidenci.", inputFileName)));
                return(_result);
            }

            if (prepis.FazeZpracovani < FazeZpracovani.Exportovat)
            {
                _result.Errors.Add(new DocumentNotInRequredStateException("Dokument s uvedeným jménem souboru není připraven pro export."));
                return(_result);
            }

            documentType = GetDocumentType(prepis.TypPrepisu);
            _documentId  = prepis.GUID;

            if (documentType == null)
            {
                //dokument má v evidenci přiřazen typ dokumentu, který není podporován
                _result.Errors.Add(new NotSupportedFileFormatException("Dokument má v evidenci přiřazen typ dokumentu, který není podporován."));
                return(_result);
            }

            string tempDirectoryPath = ConverterSettings.TempDirectoryPath;
            //vytvoří se adresářová struktura, pokud neexistuje, pro ukládání výsledných a dočasných souborů
            AdresarovaStruktura ads = new AdresarovaStruktura(tempDirectoryPath, documentType);

            ads.VytvorStrukturu();


            string docxToXmlFilePath       = Path.Combine(GetDataDirectoryPath(), "AllStylesConvert.2xml");
            string xsltTemplatesPath       = GetXsltTemplatesPath();
            string xsltTransformationsPath = GetXsltTransformationsPath();

            string fileNameWithoutExtension = prepis.Soubor.NazevBezPripony;
            string xmlOutpuFileName         = fileNameWithoutExtension + XmlExtension;

            string finalOutputDirectory = ads.DejVystup;             // Path.Combine(ads.DejVystup, fileNameWithoutExtension);
            string finalOutputFileName  = Path.Combine(finalOutputDirectory, xmlOutpuFileName);

            //Zatím pouze konverze z DOCX do základního XML
            IList <string> xmlOutputFiles = new List <string>();

            try
            {
                var filePart = 0;
                foreach (var inputFilePath in ConverterSettings.InputFilesPath)
                {
                    xmlOutputFiles.Add(GetDocxToXmlOutput(ads, prepis.Soubor.NazevBezPripony, filePart, ConverterSettings.InputFilesPath.Length > 1));

                    ConvertDocxToXml(inputFilePath, docxToXmlFilePath, xmlOutputFiles.Last());
                    filePart++;
                }
            }
            catch (Exception exception)
            {
                _result.Errors.Add(exception);
                return(_result);
            }

            if (!Directory.Exists(finalOutputDirectory))
            {
                Directory.CreateDirectory(finalOutputDirectory);
            }

            IExportNastaveni exportSettings = GetExportSettings(documentType, ConverterSettings, xsltTransformationsPath, xsltTemplatesPath, ads, prepis);
            ExportBase       export         = GetExportModule(documentType, exportSettings);

            if (export == null || exportSettings == null)
            {
                //Objekt pro export se nepodažřilo vytvořit, není podporován.
                return(_result);
            }

            try
            {
                export.Exportuj(exportSettings.Prepis, xmlOutputFiles, ConverterSettings.UploadedFilesPath);
                _result.IsConverted = true;
            }
            catch (Exception exception)
            {
                _result.Errors.Add(exception);
                return(_result);
            }

            if (!settings.Debug)
            {
                foreach (var xmlOutputFile in xmlOutputFiles.Where(File.Exists))
                {
                    File.Delete(xmlOutputFile);
                }
            }

            var versions = settings.GetVersionList(_documentId);

            _currentVersionInfoSkeleton = versions.Last();


            WriteListChange(finalOutputFileName, versions, _currentVersionInfoSkeleton);
            var xmlFinalOutputPath = Path.Combine(settings.OutputDirectoryPath, xmlOutpuFileName);

            File.Copy(finalOutputFileName, xmlFinalOutputPath, true);
            _result.MetadataFilePath = settings.OutputMetadataFilePath;
            //GetConversionMetadataFileFullPath(settings.OutputFilePath);

            if (export.UsePersonalizedXmdGenerator)
            {
                export.GenerateConversionMetadataFile(documentType, xmlFinalOutputPath, xmlOutpuFileName, settings.OutputMetadataFilePath);
            }
            else
            {
                SplittingResult splittingResult = null;
                if (settings.SplitDocumentByPageBreaks)
                {
                    splittingResult = SplitDocumentByPageBreaks(xmlFinalOutputPath, fileNameWithoutExtension);
                    if (!splittingResult.IsSplitted)
                    {
                        _result.IsConverted = false;
                        _result.Errors.Add(new DocumentSplittingException("Vyskytla se chyba při rozdělení souboru podle hranice stran."));
                    }
                }

                TableOfContentResult tocResult  = null;
                ContentInfoBuilder   tocBuilder = new ContentInfoBuilder();
                tocResult = tocBuilder.MakeTableOfContent(xmlFinalOutputPath, "body");

                GenerateConversionMetadataFile(splittingResult, tocResult, documentType, xmlFinalOutputPath, xmlOutpuFileName, settings.OutputMetadataFilePath);
            }

            if (!settings.Debug)
            {
                try
                {
                    Directory.Delete(settings.TempDirectoryPath, true);
                }
                catch (IOException exception)
                {
                    Directory.Delete(settings.TempDirectoryPath, true);
                }
            }

            return(_result);
        }
        /// <summary>
        /// The extract table of content.
        /// </summary>
        /// <returns>
        /// The <see cref="TableOfContentResult"/>.
        /// </returns>
        public TableOfContentResult ExtractTableOfContent(string fileName)
        {
            var tableOfContentResult = new TableOfContentResult();

            if (!File.Exists(fileName))
            {
                tableOfContentResult.Result    = false;
                tableOfContentResult.LastError = "File not found!";
                return(tableOfContentResult);
            }
            Microsoft.Office.Interop.Word.Application wordApplication = null;
            Microsoft.Office.Interop.Word.Document    document        = null;
            bool breakPoint = false;

            try
            {
                object templatePath = fileName;
                object missingObj   = Missing.Value;
                wordApplication = new Microsoft.Office.Interop.Word.Application();
                document        = wordApplication.Documents.Open(ref templatePath, ref missingObj, ref missingObj, ref missingObj);

                var content         = string.Empty;
                var hyperlinksCount = document.TablesOfContents[1].Range.Hyperlinks.Count;
                int NextStart       = 0;
                for (var i = hyperlinksCount; i > 0; i--)
                {
                    var tableOfContentData = new TableOfContentData();
                    var myRange            = document.TablesOfContents[1].Range.Hyperlinks[i].Range;

                    if (myRange.Text.Contains("Защита оборудования ТСО от воздействия молниевых разрядов"))
                    {
                        var xxx = myRange.CharacterStyle;
                        var yyy = myRange.FormattedText;
                        breakPoint = true;
                    }

                    var splites = myRange.Text.Split('\t');
                    if (splites.Length > 1)
                    {
                        int page;
                        var result = int.TryParse(splites[splites.Length - 1], out page);
                        if (result)
                        {
                            for (var j = 0; j < splites.Length - 1; j++)
                            {
                                content += splites[j] + " ";
                            }
                            tableOfContentData.Page    = page;
                            tableOfContentData.Content = content.Trim().Replace("  ", " ");
                            content = string.Empty;
                        }
                        else
                        {
                            content += myRange.Text;
                        }
                    }
                    else
                    {
                        content += myRange.Text;
                    }

                    var rd = new RangeDetails();

                    rd.HyperLinkStart = myRange.Start;
                    rd.HyperLinkEnd   = myRange.End;
                    rd.LinkText       = myRange.Text;

                    try
                    {
                        var sadd = document.TablesOfContents[1].Range.Hyperlinks[i].SubAddress;
                        var wb   = document.Bookmarks[sadd];
                        if (wb != null)
                        {
                            Microsoft.Office.Interop.Word.Style style = wb.Range.get_Style();
                            tableOfContentData.StyleInfo = style.NameLocal;
                            if (breakPoint)
                            {
                                var ss = wb.Range.Text;
                                breakPoint = false;
                            }

                            rd.ContentRangeStart = wb.Range.End + 1; // content starts after the Heading so we take the end of heading
                            if (i == document.TablesOfContents[1].Range.Hyperlinks.Count)
                            {
                                // then it is the last Range and the "End" will be the endofDoc.
                                object oEndOfDoc = @"\endofdoc";
                                rd.ContentRangeEnd = document.Bookmarks.get_Item(ref oEndOfDoc).Range.End;
                            }
                            else
                            {
                                rd.ContentRangeEnd = NextStart - 1;
                            }

                            NextStart = rd.ContentRangeStart;
                            string text = document.Range(rd.ContentRangeStart, rd.ContentRangeEnd).Text;
                            var    xxx  = document.Range(rd.ContentRangeStart, rd.ContentRangeEnd).Fields;
                            tableOfContentResult.Items.Add(tableOfContentData);
                        }
                    }
                    catch (Exception ex)
                    {
                        return(null);
                    }
                    //contRange.Ranges.Add(rd);
                }

                document.Close();
                document = null;
                wordApplication.Quit();
                wordApplication = null;
            }
            catch (Exception ex)
            {
                tableOfContentResult.Result    = false;
                tableOfContentResult.LastError = ex.Message;
            }
            finally
            {
                document?.Close();
                wordApplication?.Quit();
            }

            tableOfContentResult.Items.Reverse();
            return(tableOfContentResult);
        }