public XElement ParagraphsToXml(Paragraphs paragraphs, TableBuilder tableBuilder, Object builder = null) { var lop = paragraphs.Cast <Paragraph>().ToList(); return(ParagraphsToXml(lop, tableBuilder, builder)); }
/// <summary> /// Takes the list of paragraphs and returns the XML representation of these elements. /// </summary> /// <param name="paragraphs">The list of paragraphs to parse.</param> /// <param name="tableBuilder"></param> /// <param name="builder"></param> /// <returns>An XElement representation of the document.</returns> public XElement ParagraphsToXml(List <Paragraph> paragraphs, TableBuilder tableBuilder, Object builder = null) { SidebarArticleParser = new SidebarArticleParser(); WdListType previouslistType = paragraphs[0].Range.ListFormat.ListType; bool previousWasList = false; string previousIFrameStyle = string.Empty; bool previousWasIFrame = false; bool desktopCodeFound = false; bool currentIframeIsInsecure = false; bool currentContainsInvalidNodes = false; List <string> insecureIFrames = new List <string>(); bool containsInvalidNodes = false; string iframeGroupId = String.Empty; XElement iframeGroupElement = new XElement("div"); iframeGroupElement.SetAttributeValue("class", "iframe-component"); bool previousWasBlockquote = false; var contiguousListElements = new List <Paragraph>(); var contiguousBlockquoteElements = new List <Paragraph>(); List <string> StylesToIgnore = ArticleDocumentMetadataParser.GetInstance().MetadataStyles; Paragraph lastParagraph = null; //var xData = new XElement("root"); var xData = new XElement("div"); xData.SetAttributeValue("class", "root"); Errors = new List <string>(); int imageTagCount = 0; XElement divElement = null; for (int i = 0; i < paragraphs.Count; i++) { Paragraph paragraph = paragraphs[i]; int tIndex = tableBuilder == null ? -1 : tableBuilder.GetTableIndexFor(paragraph.Range); Style style = (Style)paragraph.get_Style(); if (tableBuilder != null && tIndex != -1 && tableBuilder.HasRetrieved(tIndex)) { continue; } var currentStyle = (Style)paragraph.get_Style(); if (StylesToIgnore.Contains(currentStyle.NameLocal)) { continue; } XElement xElement = null; bool isList = paragraph.Range.ListParagraphs.Count > 0; bool isBlockquote = currentStyle.NameLocal == BlockquoteName; WdListType currentListType = paragraph.Range.ListFormat.ListType; if (tableBuilder != null && tIndex != -1 && !tableBuilder.HasRetrieved(tIndex)) { xData.Add(tableBuilder.ParseTable(tIndex)); continue; } if (builder != _quickFactsSideboxParser && _quickFactsSideboxParser != null && _quickFactsSideboxParser.Match(currentStyle.NameLocal)) { _quickFactsSideboxParser.Add(paragraph); continue; } if (_quickFactsSideboxParser != null && !_quickFactsSideboxParser.Match(currentStyle.NameLocal) && !_quickFactsSideboxParser.IsEmpty()) { xData.Add(_quickFactsSideboxParser.GetSidebox(this)); _quickFactsSideboxParser.Clear(); } lastParagraph = paragraph; if (currentStyle.NameLocal == SidebarArticleParser.SidebarStyle) { try { xData.Add(paragraph.Range.Text); SidebarArticleParser.RetrieveSidebarToken(paragraph); } catch (ArgumentException e) { Errors.Add(e.Message); } continue; } if (!isList && previousWasList) { xData.Add(GetListStyleElement(contiguousListElements)); contiguousListElements = new List <Paragraph>(); previousWasList = false; } if (!isBlockquote && previousWasBlockquote) { xData.Add(BlockquoteTransformer.Generate(contiguousBlockquoteElements, CharacterStyleTransformer)); contiguousBlockquoteElements = new List <Paragraph>(); previousWasBlockquote = false; } if (isBlockquote) { contiguousBlockquoteElements.Add(paragraph); previousWasBlockquote = true; //just forming contiguous blocks, no further processing continue; } if (isList) { if (previouslistType != currentListType && contiguousListElements.Count > 0) { xElement = GetListStyleElement(contiguousListElements); xData.Add(xElement); contiguousListElements = new List <Paragraph>(); } //add paragraph to bloc, //even if new list has been started, we are adding it now contiguousListElements.Add(paragraph); previousWasList = true; previouslistType = paragraph.Range.ListFormat.ListType; //just forming contiguous blocks, no further processing continue; } if (style.NameLocal == DocumentAndParagraphStyles.IFrameCodeStyle || style.NameLocal == DocumentAndParagraphStyles.IFrameMobileCodeStyle) { var iframeElement = new XElement("div"); if (!previousWasIFrame) { iframeGroupId = Guid.NewGuid().ToString("N"); previousWasIFrame = true; iframeGroupElement.SetAttributeValue("class", "iframe-component"); } string cssStyle = string.Empty; if (style.NameLocal == DocumentAndParagraphStyles.IFrameCodeStyle) { desktopCodeFound = true; cssStyle = String.Format("ewf-desktop-iframe_{0}", iframeGroupId); iframeElement.SetAttributeValue("class", $"iframe-component__desktop {cssStyle}"); iframeElement.SetAttributeValue("data-mediaid", iframeGroupId); } if (style.NameLocal == DocumentAndParagraphStyles.IFrameMobileCodeStyle) { cssStyle = String.Format("ewf-mobile-iframe_{0}", iframeGroupId); iframeElement.SetAttributeValue("class", $"iframe-component__mobile {cssStyle}"); } var insecureIFramesInParagraph = HTMLTools.CheckForInsecureIFrames(paragraph.Range.Text); currentIframeIsInsecure = insecureIFramesInParagraph.Any() || currentIframeIsInsecure; insecureIFrames.AddRange(insecureIFramesInParagraph); currentContainsInvalidNodes = HTMLTools.ContainsForExternalNodes(paragraph.Range.Text); containsInvalidNodes = containsInvalidNodes || currentContainsInvalidNodes; XElement embedElement = IFrameEmbedBuilder.Parse(paragraph, cssStyle, true); if (embedElement != null) { iframeElement.SetAttributeValue("data-embed-link", "enabled"); iframeElement.Add(embedElement); iframeGroupElement.Add(iframeElement); } if (style.NameLocal == DocumentAndParagraphStyles.IFrameMobileCodeStyle) { xData.Add(iframeGroupElement); iframeGroupElement = new XElement("div"); iframeGroupElement.SetAttributeValue("class", "iframe-component"); } previousIFrameStyle = style.NameLocal; continue; } if (IFrameEmbedBuilder.IFrameStyles.Contains(style.NameLocal)) { WordStyleStruct w = new WordStyleStruct(); //base styles are used becuase the parent level styles only exist in the plugin var baseStyle = (Style)style.get_BaseStyle(); if (baseStyle != null) { ParagraphStyles.TryGetValue(baseStyle.NameLocal, out w); } XElement curElement = new XElement("p"); string stylesection = String.Empty; bool isNewIframe = false; if (style.NameLocal == DocumentAndParagraphStyles.IFrameHeaderStyle) { stylesection = "header"; isNewIframe = true; } if (style.NameLocal == DocumentAndParagraphStyles.IFrameTitleStyle) { stylesection = "title"; isNewIframe = previousIFrameStyle != DocumentAndParagraphStyles.IFrameHeaderStyle; } if (style.NameLocal == DocumentAndParagraphStyles.IFrameCaptionStyle) { stylesection = "caption"; isNewIframe = previousIFrameStyle != DocumentAndParagraphStyles.IFrameMobileCodeStyle; } if (style.NameLocal == DocumentAndParagraphStyles.IFrameSourceStyle) { stylesection = "source"; isNewIframe = previousIFrameStyle != DocumentAndParagraphStyles.IFrameCaptionStyle && previousIFrameStyle != DocumentAndParagraphStyles.IFrameMobileCodeStyle; } if (!previousWasIFrame && isNewIframe) { iframeGroupId = Guid.NewGuid().ToString("N"); previousWasIFrame = true; } string cssClass = w == null ? string.Empty : w.CssClass; string iframeIdClass = String.Format("{0}-{1}_{2}", IFrameEmbedBuilder.IFrameClassName, stylesection, iframeGroupId); curElement.SetAttributeValue("class", String.Format("{0} {1}", cssClass, iframeIdClass)); curElement = CharacterStyleTransformer.GetCharacterStyledElement(curElement, paragraph, CharacterStyleFactory.GetCharacterStyles(), false); xData.Add(curElement); previousIFrameStyle = style.NameLocal; continue; } else //redundant else but adds code clarity { if (currentIframeIsInsecure || currentContainsInvalidNodes || (!desktopCodeFound && previousWasIFrame)) { xData.Elements() .Where(x => x.Attribute("class") != null && x.Attribute("class").Value.Contains(iframeGroupId)) .Remove(); } previousIFrameStyle = String.Empty; previousWasIFrame = false; currentIframeIsInsecure = false; currentContainsInvalidNodes = false; desktopCodeFound = false; } if (ImageReferenceBuilder.Parse(paragraph) != null) { if (imageTagCount == 0) { divElement = new XElement("section"); divElement.SetAttributeValue("class", "article-exhibit"); xData.Add(divElement); } //Get the float Value from the image hyperlink (if it is an image) and set it to the article-image element var hyprlnk = paragraph.Range.Hyperlinks.Cast <Hyperlink>().FirstOrDefault(); if (hyprlnk != null && string.IsNullOrEmpty(hyprlnk.ScreenTip) == false) { string classValue; classValue = imageFloatDictionary.TryGetValue(hyprlnk.ScreenTip.ToLower(), out classValue) ? classValue : string.Empty; divElement.SetAttributeValue("class", classValue); } var imageTag = ImageReferenceBuilder.Parse(paragraph); divElement.Add(imageTag); imageTagCount++; continue; } imageTagCount = 0; WordStyleStruct styleStruct; if (ParagraphStyles.TryGetValue(currentStyle.NameLocal, out styleStruct)) { //if there is a special configuration for the paragraph style, have it configured properly string element = styleStruct.CssElement; if (element.IsNullOrEmpty()) { element = null; } string clas = styleStruct.CssClass; if (clas.IsNullOrEmpty()) { clas = null; } XElement curElement = element != null ? new XElement(element) : new XElement("p"); if (!string.IsNullOrWhiteSpace(clas)) { curElement.SetAttributeValue("class", clas); } xElement = CharacterStyleTransformer.GetCharacterStyledElement(curElement, paragraph, CharacterStyleFactory.GetCharacterStyles(), false); //The next section is group multiple paragraphs belonging to the same answer under the same Answer element to prevent multiple answer styling on front-end for a single answer if (clas == "article-interview__answer") { while (paragraphs.Count > i + 1)//If there are still more element to inspect { //Get the styling of the next element var tempAnswerStyle = (Style)paragraphs[i + 1].get_Style(); WordStyleStruct tempAnswerStyleStruct; //Get the cssClass of the next element ParagraphStyles.TryGetValue(tempAnswerStyle.NameLocal, out tempAnswerStyleStruct); //if it is also an answer paragraph if (tempAnswerStyleStruct.CssClass == clas) { //add the the next paragraph content into the current answer body xElement.Add(CharacterStyleTransformer.GetCharacterStyledElement(new XElement("p"), paragraphs[i + 1], CharacterStyleFactory.GetCharacterStyles(), false)); i++; } else { break; } } } } else { xElement = new XElement("p"); xElement = CharacterStyleTransformer.GetCharacterStyledElement(xElement, paragraph, CharacterStyleFactory.GetCharacterStyles(), false); } xData.Add(xElement); previouslistType = paragraph.Range.ListFormat.ListType; } if (insecureIFrames.Any()) { var confirmSave = MessageBox.Show("You have inserted multimedia items using non-secure links. " + "Click 'OK' to continue saving the article without this content or 'Cancel'" + " to go back into the article and edit your multimedia items.", "Continue with save?", MessageBoxButtons.OKCancel, MessageBoxIcon.Exclamation); if (confirmSave != DialogResult.OK) { throw new InsecureIFrameException(insecureIFrames); } } if (containsInvalidNodes) { var confirmSave = MessageBox.Show("You have inserted multimedia items with invalid or non-permitted HTML code. " + "Click 'OK' to continue saving the article without this content or 'Cancel' to " + "go back into the article and edit your multimedia items.", "Continue with save?", MessageBoxButtons.OKCancel, MessageBoxIcon.Exclamation); if (confirmSave != DialogResult.OK) { throw new InvalidHtmlException(); } } if (lastParagraph != null && lastParagraph.Range.ListParagraphs.Count > 0) { xData.Add(GetListStyleElement(contiguousListElements)); } if (lastParagraph != null && ((Style)lastParagraph.get_Style()).NameLocal == BlockquoteName) { xData.Add(BlockquoteTransformer.Generate(contiguousBlockquoteElements, CharacterStyleTransformer)); } if (_quickFactsSideboxParser != null && builder != _quickFactsSideboxParser && !_quickFactsSideboxParser.IsEmpty()) { xData.Add(_quickFactsSideboxParser.GetSidebox(this)); _quickFactsSideboxParser.Clear(); } return(xData); }
//private float TableWidth(Table table) //{ // int colIndex = 1; // float width = 0; // try // { // while (true) // { //major hacks because documentation is lacking // and i can't find another solution // table.Rows is inaccessble if there are merged cells // across rows // table.Columns is inaccessible if there are merged // cells across columns // so how do i find out the number of cells in a table row? // WE KEEP GOING TILL WE BUST // width += table.Cell(1, colIndex).Width; // colIndex++; // } // } catch // { // return width; // } //} public XElement ParseTable(int index) { Table table = GetTable(index); _retrieved[index] = true; var root = new XElement("table"); root.SetAttributeValue("class", "data"); var tbody = new XElement("tbody"); root.Add(tbody); //var rows = table.Rows; var wordUtils = new WordUtils(); var tableAnalyzer = new TableAnalyzer(table); const int maxTableWidth = 544; decimal curSetWidth = Math.Truncate(tableAnalyzer.TableCellWidthSums.Last()); if (curSetWidth < maxTableWidth) { root.SetAttributeValue("width", curSetWidth); } bool first = true; for (int r = 1; r <= table.Rows.Count; r++) { //Row row = rows[r]; var currentRow = new XElement("tr"); tbody.Add(currentRow); //var rowCellEnumerator = row.Cells.Cast<Cell>().GetEnumerator(); decimal currentWidthSum = 0; int numColSpansUsed = 0; //while (rowCellEnumerator.MoveNext()) for (int c = 1; c <= table.Columns.Count; c++) { try { var curCell = table.Cell(r, c); //rowCellEnumerator.Current); curCell.Select(); var app = curCell.Application; string cellType = first ? "th" : "td"; var xmlCell = new XElement(cellType); currentRow.Add(xmlCell); Paragraphs paragraphs = curCell.Range.Paragraphs; var tableBuilder = new TableBuilder(curCell.Tables); XNode currentDescendent = wordUtils.ParagraphsToXml(paragraphs, tableBuilder).FirstNode; while (currentDescendent != null) { xmlCell.Add(currentDescendent); currentDescendent = currentDescendent.NextNode; } currentWidthSum += (decimal)curCell.Width; int widthIndex = tableAnalyzer.TableCellWidthSums.IndexOf(currentWidthSum); int curNumCol = widthIndex + 1 - numColSpansUsed; xmlCell.SetAttributeValue("colspan", curNumCol); xmlCell.SetAttributeValue("rowspan", app.Selection.Information[WdInformation.wdEndOfRangeRowNumber] - app.Selection.Information[WdInformation.wdStartOfRangeRowNumber] + 1); decimal cellWidth = 0; for (int i = widthIndex; i >= widthIndex - curNumCol + 1; i--) { cellWidth += tableAnalyzer.CellWidthPercentages[i]; } xmlCell.SetAttributeValue("width", cellWidth + "%"); numColSpansUsed += curNumCol; } catch (System.Runtime.InteropServices.COMException) { //it seems that the only way to tell if a cell at a particular [row, col] index //exists is to call table.Cell(row, col) and see if it throws an exception //the reason it may not exist is rowspans and colspans int nr = r; while (nr > 0) { try { currentWidthSum += (decimal)table.Cell(nr, c).Width; int widthIndex = tableAnalyzer.TableCellWidthSums.IndexOf(currentWidthSum); int curNumCol = widthIndex + 1 - numColSpansUsed; numColSpansUsed += curNumCol; break; } catch (System.Runtime.InteropServices.COMException) { nr--; } } } //var colspan = (int)(curCell.Width / smallestWidth); //if (colspan > 1) xmlCell.SetAttributeValue("colspan", colspan); } first = false; } return(root); }