private XElement GenerateToc(TableOfContentResult result) { XDocument doc = new XDocument(new XElement(nsItj + "tableOfContent")); doc.Root.Add(GenerateList(result.Sections)); return(doc.Root); }
private XElement GenerateHwTable(TableOfContentResult result) { XDocument doc = new XDocument(new XElement(nsItj + "headwordsTable")); doc.Root.Add(GenerateTable(result.HeadwordsList)); return(doc.Root); }
private static XElement GenerateHwList(TableOfContentResult result) { XDocument doc = new XDocument(new XElement(nsItj + "headwordsList")); doc.Root.Add(GenerateList(result.HeadwordsList)); return(doc.Root); }
private static void GenerateTocXml(TableOfContentResult result) { XElement toc = GenerateToc(result); Console.WriteLine(toc); Console.WriteLine(); }
private static void GenerateTocText(TableOfContentResult result) { Console.WriteLine("<tableOfContent>"); Console.WriteLine("<list>"); foreach (TableOfContentItem item in result.Sections) { //WriteTocInfo(item); WriteTocInfoXml(item, 1); } Console.WriteLine("</list>"); Console.WriteLine("</tableOfContent>"); }
private static void TestDocument(string file) { ContentInfoBuilder builder = new ContentInfoBuilder(); builder.XmlFile = file; builder.StartingElement = "body"; TableOfContentResult result = builder.MakeTableOfContent(); //GenerateTocXml(result); SaveToc(result, Path.Combine(@"V:\Projekty\BitBucket\itjakub\UJCSystem\ITJakub.Xml.Conversion\Ujc.Ovj.Ooxml.Conversion.Test\Data\Metadata", file.Substring(file.LastIndexOf('\\') + 1).Replace(".xml", ".cnt"))); //GenerateTocText(result); }
//private void GenerateConversionMetadataFile(SplittingResult splittingResult, // string documentType, // string finalOutputFileFullPath) //{ // GenerateConversionMetadataFile(splittingResult, new TableOfContentResult(), documentType, finalOutputFileFullPath); //} private void GenerateConversionMetadataFile(SplittingResult splittingResult, TableOfContentResult tableOfContentResult, string documentType, string finalOutputFileFullPath, string finalOutputFileName, string finalOutputMetadataFileName) { XDocument metada = new XDocument(); XDocument teiDocument = XDocument.Load(finalOutputFileFullPath); metada.Add(new XElement(nsItj + "document", new XAttribute("doctype", documentType), new XAttribute("versionId", _currentVersionInfoSkeleton.Id), new XAttribute(nsXml + "lang", "cs"), new XAttribute("n", _documentId), new XAttribute("xmlns", nsTei), new XAttribute(XNamespace.Xmlns + "itj", nsItj), new XAttribute(XNamespace.Xmlns + "nlp", nsNlp), new XAttribute(XNamespace.Xmlns + "xml", XNamespace.Xml) ) ); XElement header = teiDocument.Descendants(nsTei + "teiHeader").FirstOrDefault(); metada.Root.Add(header); XElement toc = GenerateToc(tableOfContentResult); XElement hws = GenerateHwList(tableOfContentResult); XElement hwt = GenerateHwTable(hws); XElement accessories = new XElement(nsItj + "accessories", new XElement(nsItj + "file", new XAttribute("type", "content"), new XAttribute("name", finalOutputFileName))); if (splittingResult != null) //generovat pouze v případě, že k rozdělení na strany došlo { XElement pages = new XElement(nsItj + "pages", from info in splittingResult.PageBreaksSplitInfo select new XElement(nsItj + "page", info.Number == null ? null : new XAttribute("n", info.Number), info.Id == null ? null : new XAttribute(nsXml + "id", info.Id), info.FileName == null ? null : new XAttribute("resource", info.FileName), info.Facsimile == null ? null : new XAttribute("facs", info.Facsimile) ) ); metada.Root.Add(pages); } metada.Root.Add(toc); metada.Root.Add(hwt); metada.Root.Add(hws); metada.Root.Add(accessories); metada.Save(finalOutputMetadataFileName); }
private static void SaveToc(TableOfContentResult result, string filepath) { XDocument doc = new XDocument(new XElement(nsItj + "metadata")); XElement toc = new XElement(nsItj + "tableOfContent"); toc.Add(GenerateList(result.Sections)); XElement hw = new XElement(nsItj + "headwordsList"); hw.Add(GenerateList(result.HeadwordsList)); doc.Root.Add(toc); doc.Root.Add(hw); doc.Save(filepath); }
public ConversionResult Convert(DocxToTeiConverterSettings settings) { ConverterSettings = settings; CheckIfDirectoryPathsExists(ConverterSettings); _result = new ConversionResult(); string documentType = null; //get metadata only for first (by alphabet) uploaded file var inputFileName = ConverterSettings.InputFilesPath.Select(filePath => new FileInfo(filePath)).Select(fileInfo => fileInfo.Name).First(); ResolveDefaultSettingsValues(ConverterSettings); var prepis = GetPrepisy(ConverterSettings, inputFileName); if (prepis == null) { //dokument v evidenci neexistuje, nabídnout zanesení dokumentu do evidence // mělo by stačit přiřazení typu dokumentu _result.Errors.Add(new DocumentNotInEvidenceException(String.Format("Dokument s uvedeným jménem souboru '{0}' neexistuje v evidenci.", inputFileName))); return(_result); } if (prepis.FazeZpracovani < FazeZpracovani.Exportovat) { _result.Errors.Add(new DocumentNotInRequredStateException("Dokument s uvedeným jménem souboru není připraven pro export.")); return(_result); } documentType = GetDocumentType(prepis.TypPrepisu); _documentId = prepis.GUID; if (documentType == null) { //dokument má v evidenci přiřazen typ dokumentu, který není podporován _result.Errors.Add(new NotSupportedFileFormatException("Dokument má v evidenci přiřazen typ dokumentu, který není podporován.")); return(_result); } string tempDirectoryPath = ConverterSettings.TempDirectoryPath; //vytvoří se adresářová struktura, pokud neexistuje, pro ukládání výsledných a dočasných souborů AdresarovaStruktura ads = new AdresarovaStruktura(tempDirectoryPath, documentType); ads.VytvorStrukturu(); string docxToXmlFilePath = Path.Combine(GetDataDirectoryPath(), "AllStylesConvert.2xml"); string xsltTemplatesPath = GetXsltTemplatesPath(); string xsltTransformationsPath = GetXsltTransformationsPath(); string fileNameWithoutExtension = prepis.Soubor.NazevBezPripony; string xmlOutpuFileName = fileNameWithoutExtension + XmlExtension; string finalOutputDirectory = ads.DejVystup; // Path.Combine(ads.DejVystup, fileNameWithoutExtension); string finalOutputFileName = Path.Combine(finalOutputDirectory, xmlOutpuFileName); //Zatím pouze konverze z DOCX do základního XML IList <string> xmlOutputFiles = new List <string>(); try { var filePart = 0; foreach (var inputFilePath in ConverterSettings.InputFilesPath) { xmlOutputFiles.Add(GetDocxToXmlOutput(ads, prepis.Soubor.NazevBezPripony, filePart, ConverterSettings.InputFilesPath.Length > 1)); ConvertDocxToXml(inputFilePath, docxToXmlFilePath, xmlOutputFiles.Last()); filePart++; } } catch (Exception exception) { _result.Errors.Add(exception); return(_result); } if (!Directory.Exists(finalOutputDirectory)) { Directory.CreateDirectory(finalOutputDirectory); } IExportNastaveni exportSettings = GetExportSettings(documentType, ConverterSettings, xsltTransformationsPath, xsltTemplatesPath, ads, prepis); ExportBase export = GetExportModule(documentType, exportSettings); if (export == null || exportSettings == null) { //Objekt pro export se nepodažřilo vytvořit, není podporován. return(_result); } try { export.Exportuj(exportSettings.Prepis, xmlOutputFiles, ConverterSettings.UploadedFilesPath); _result.IsConverted = true; } catch (Exception exception) { _result.Errors.Add(exception); return(_result); } if (!settings.Debug) { foreach (var xmlOutputFile in xmlOutputFiles.Where(File.Exists)) { File.Delete(xmlOutputFile); } } var versions = settings.GetVersionList(_documentId); _currentVersionInfoSkeleton = versions.Last(); WriteListChange(finalOutputFileName, versions, _currentVersionInfoSkeleton); var xmlFinalOutputPath = Path.Combine(settings.OutputDirectoryPath, xmlOutpuFileName); File.Copy(finalOutputFileName, xmlFinalOutputPath, true); _result.MetadataFilePath = settings.OutputMetadataFilePath; //GetConversionMetadataFileFullPath(settings.OutputFilePath); if (export.UsePersonalizedXmdGenerator) { export.GenerateConversionMetadataFile(documentType, xmlFinalOutputPath, xmlOutpuFileName, settings.OutputMetadataFilePath); } else { SplittingResult splittingResult = null; if (settings.SplitDocumentByPageBreaks) { splittingResult = SplitDocumentByPageBreaks(xmlFinalOutputPath, fileNameWithoutExtension); if (!splittingResult.IsSplitted) { _result.IsConverted = false; _result.Errors.Add(new DocumentSplittingException("Vyskytla se chyba při rozdělení souboru podle hranice stran.")); } } TableOfContentResult tocResult = null; ContentInfoBuilder tocBuilder = new ContentInfoBuilder(); tocResult = tocBuilder.MakeTableOfContent(xmlFinalOutputPath, "body"); GenerateConversionMetadataFile(splittingResult, tocResult, documentType, xmlFinalOutputPath, xmlOutpuFileName, settings.OutputMetadataFilePath); } if (!settings.Debug) { try { Directory.Delete(settings.TempDirectoryPath, true); } catch (IOException exception) { Directory.Delete(settings.TempDirectoryPath, true); } } return(_result); }
/// <summary> /// The extract table of content. /// </summary> /// <returns> /// The <see cref="TableOfContentResult"/>. /// </returns> public TableOfContentResult ExtractTableOfContent(string fileName) { var tableOfContentResult = new TableOfContentResult(); if (!File.Exists(fileName)) { tableOfContentResult.Result = false; tableOfContentResult.LastError = "File not found!"; return(tableOfContentResult); } Microsoft.Office.Interop.Word.Application wordApplication = null; Microsoft.Office.Interop.Word.Document document = null; bool breakPoint = false; try { object templatePath = fileName; object missingObj = Missing.Value; wordApplication = new Microsoft.Office.Interop.Word.Application(); document = wordApplication.Documents.Open(ref templatePath, ref missingObj, ref missingObj, ref missingObj); var content = string.Empty; var hyperlinksCount = document.TablesOfContents[1].Range.Hyperlinks.Count; int NextStart = 0; for (var i = hyperlinksCount; i > 0; i--) { var tableOfContentData = new TableOfContentData(); var myRange = document.TablesOfContents[1].Range.Hyperlinks[i].Range; if (myRange.Text.Contains("Защита оборудования ТСО от воздействия молниевых разрядов")) { var xxx = myRange.CharacterStyle; var yyy = myRange.FormattedText; breakPoint = true; } var splites = myRange.Text.Split('\t'); if (splites.Length > 1) { int page; var result = int.TryParse(splites[splites.Length - 1], out page); if (result) { for (var j = 0; j < splites.Length - 1; j++) { content += splites[j] + " "; } tableOfContentData.Page = page; tableOfContentData.Content = content.Trim().Replace(" ", " "); content = string.Empty; } else { content += myRange.Text; } } else { content += myRange.Text; } var rd = new RangeDetails(); rd.HyperLinkStart = myRange.Start; rd.HyperLinkEnd = myRange.End; rd.LinkText = myRange.Text; try { var sadd = document.TablesOfContents[1].Range.Hyperlinks[i].SubAddress; var wb = document.Bookmarks[sadd]; if (wb != null) { Microsoft.Office.Interop.Word.Style style = wb.Range.get_Style(); tableOfContentData.StyleInfo = style.NameLocal; if (breakPoint) { var ss = wb.Range.Text; breakPoint = false; } rd.ContentRangeStart = wb.Range.End + 1; // content starts after the Heading so we take the end of heading if (i == document.TablesOfContents[1].Range.Hyperlinks.Count) { // then it is the last Range and the "End" will be the endofDoc. object oEndOfDoc = @"\endofdoc"; rd.ContentRangeEnd = document.Bookmarks.get_Item(ref oEndOfDoc).Range.End; } else { rd.ContentRangeEnd = NextStart - 1; } NextStart = rd.ContentRangeStart; string text = document.Range(rd.ContentRangeStart, rd.ContentRangeEnd).Text; var xxx = document.Range(rd.ContentRangeStart, rd.ContentRangeEnd).Fields; tableOfContentResult.Items.Add(tableOfContentData); } } catch (Exception ex) { return(null); } //contRange.Ranges.Add(rd); } document.Close(); document = null; wordApplication.Quit(); wordApplication = null; } catch (Exception ex) { tableOfContentResult.Result = false; tableOfContentResult.LastError = ex.Message; } finally { document?.Close(); wordApplication?.Quit(); } tableOfContentResult.Items.Reverse(); return(tableOfContentResult); }