Esempio n. 1
0
        public XElement ParagraphsToXml(Paragraphs paragraphs, TableBuilder tableBuilder, Object builder = null)
        {
            var lop = paragraphs.Cast <Paragraph>().ToList();

            return(ParagraphsToXml(lop, tableBuilder, builder));
        }
Esempio n. 2
0
        /// <summary>
        /// Takes the list of paragraphs and returns the XML representation of these elements.
        /// </summary>
        /// <param name="paragraphs">The list of paragraphs to parse.</param>
        /// <param name="tableBuilder"></param>
        /// <param name="builder"></param>
        /// <returns>An XElement representation of the document.</returns>
        public XElement ParagraphsToXml(List <Paragraph> paragraphs, TableBuilder tableBuilder, Object builder = null)
        {
            SidebarArticleParser = new SidebarArticleParser();
            WdListType previouslistType = paragraphs[0].Range.ListFormat.ListType;
            bool       previousWasList  = false;

            string previousIFrameStyle = string.Empty;

            bool          previousWasIFrame           = false;
            bool          desktopCodeFound            = false;
            bool          currentIframeIsInsecure     = false;
            bool          currentContainsInvalidNodes = false;
            List <string> insecureIFrames             = new List <string>();
            bool          containsInvalidNodes        = false;

            string   iframeGroupId      = String.Empty;
            XElement iframeGroupElement = new XElement("div");

            iframeGroupElement.SetAttributeValue("class", "iframe-component");

            bool          previousWasBlockquote        = false;
            var           contiguousListElements       = new List <Paragraph>();
            var           contiguousBlockquoteElements = new List <Paragraph>();
            List <string> StylesToIgnore = ArticleDocumentMetadataParser.GetInstance().MetadataStyles;
            Paragraph     lastParagraph  = null;
            //var xData = new XElement("root");
            var xData = new XElement("div");

            xData.SetAttributeValue("class", "root");
            Errors = new List <string>();
            int      imageTagCount = 0;
            XElement divElement    = null;

            for (int i = 0; i < paragraphs.Count; i++)
            {
                Paragraph paragraph = paragraphs[i];
                int       tIndex    = tableBuilder == null ? -1 : tableBuilder.GetTableIndexFor(paragraph.Range);

                Style style = (Style)paragraph.get_Style();


                if (tableBuilder != null && tIndex != -1 && tableBuilder.HasRetrieved(tIndex))
                {
                    continue;
                }

                var currentStyle = (Style)paragraph.get_Style();
                if (StylesToIgnore.Contains(currentStyle.NameLocal))
                {
                    continue;
                }

                XElement   xElement        = null;
                bool       isList          = paragraph.Range.ListParagraphs.Count > 0;
                bool       isBlockquote    = currentStyle.NameLocal == BlockquoteName;
                WdListType currentListType = paragraph.Range.ListFormat.ListType;

                if (tableBuilder != null && tIndex != -1 && !tableBuilder.HasRetrieved(tIndex))
                {
                    xData.Add(tableBuilder.ParseTable(tIndex));
                    continue;
                }


                if (builder != _quickFactsSideboxParser && _quickFactsSideboxParser != null &&
                    _quickFactsSideboxParser.Match(currentStyle.NameLocal))
                {
                    _quickFactsSideboxParser.Add(paragraph);
                    continue;
                }
                if (_quickFactsSideboxParser != null && !_quickFactsSideboxParser.Match(currentStyle.NameLocal) &&
                    !_quickFactsSideboxParser.IsEmpty())
                {
                    xData.Add(_quickFactsSideboxParser.GetSidebox(this));
                    _quickFactsSideboxParser.Clear();
                }

                lastParagraph = paragraph;
                if (currentStyle.NameLocal == SidebarArticleParser.SidebarStyle)
                {
                    try
                    {
                        xData.Add(paragraph.Range.Text);
                        SidebarArticleParser.RetrieveSidebarToken(paragraph);
                    }
                    catch (ArgumentException e)
                    {
                        Errors.Add(e.Message);
                    }
                    continue;
                }
                if (!isList && previousWasList)
                {
                    xData.Add(GetListStyleElement(contiguousListElements));
                    contiguousListElements = new List <Paragraph>();
                    previousWasList        = false;
                }
                if (!isBlockquote && previousWasBlockquote)
                {
                    xData.Add(BlockquoteTransformer.Generate(contiguousBlockquoteElements, CharacterStyleTransformer));
                    contiguousBlockquoteElements = new List <Paragraph>();
                    previousWasBlockquote        = false;
                }

                if (isBlockquote)
                {
                    contiguousBlockquoteElements.Add(paragraph);
                    previousWasBlockquote = true;
                    //just forming contiguous blocks, no further processing
                    continue;
                }
                if (isList)
                {
                    if (previouslistType != currentListType && contiguousListElements.Count > 0)
                    {
                        xElement = GetListStyleElement(contiguousListElements);

                        xData.Add(xElement);
                        contiguousListElements = new List <Paragraph>();
                    }
                    //add paragraph to bloc,
                    //even if new list has been started, we are adding it now
                    contiguousListElements.Add(paragraph);
                    previousWasList  = true;
                    previouslistType = paragraph.Range.ListFormat.ListType;
                    //just forming contiguous blocks, no further processing
                    continue;
                }


                if (style.NameLocal == DocumentAndParagraphStyles.IFrameCodeStyle ||
                    style.NameLocal == DocumentAndParagraphStyles.IFrameMobileCodeStyle)
                {
                    var iframeElement = new XElement("div");

                    if (!previousWasIFrame)
                    {
                        iframeGroupId = Guid.NewGuid().ToString("N");

                        previousWasIFrame = true;
                        iframeGroupElement.SetAttributeValue("class", "iframe-component");
                    }
                    string cssStyle = string.Empty;

                    if (style.NameLocal == DocumentAndParagraphStyles.IFrameCodeStyle)
                    {
                        desktopCodeFound = true;
                        cssStyle         = String.Format("ewf-desktop-iframe_{0}", iframeGroupId);
                        iframeElement.SetAttributeValue("class", $"iframe-component__desktop {cssStyle}");
                        iframeElement.SetAttributeValue("data-mediaid", iframeGroupId);
                    }

                    if (style.NameLocal == DocumentAndParagraphStyles.IFrameMobileCodeStyle)
                    {
                        cssStyle = String.Format("ewf-mobile-iframe_{0}", iframeGroupId);
                        iframeElement.SetAttributeValue("class", $"iframe-component__mobile {cssStyle}");
                    }

                    var insecureIFramesInParagraph = HTMLTools.CheckForInsecureIFrames(paragraph.Range.Text);
                    currentIframeIsInsecure = insecureIFramesInParagraph.Any() || currentIframeIsInsecure;
                    insecureIFrames.AddRange(insecureIFramesInParagraph);

                    currentContainsInvalidNodes = HTMLTools.ContainsForExternalNodes(paragraph.Range.Text);
                    containsInvalidNodes        = containsInvalidNodes || currentContainsInvalidNodes;
                    XElement embedElement = IFrameEmbedBuilder.Parse(paragraph, cssStyle, true);
                    if (embedElement != null)
                    {
                        iframeElement.SetAttributeValue("data-embed-link", "enabled");
                        iframeElement.Add(embedElement);
                        iframeGroupElement.Add(iframeElement);
                    }
                    if (style.NameLocal == DocumentAndParagraphStyles.IFrameMobileCodeStyle)
                    {
                        xData.Add(iframeGroupElement);
                        iframeGroupElement = new XElement("div");
                        iframeGroupElement.SetAttributeValue("class", "iframe-component");
                    }

                    previousIFrameStyle = style.NameLocal;
                    continue;
                }

                if (IFrameEmbedBuilder.IFrameStyles.Contains(style.NameLocal))
                {
                    WordStyleStruct w = new WordStyleStruct();
                    //base styles are used becuase the parent level styles only exist in the plugin
                    var baseStyle = (Style)style.get_BaseStyle();
                    if (baseStyle != null)
                    {
                        ParagraphStyles.TryGetValue(baseStyle.NameLocal, out w);
                    }

                    XElement curElement   = new XElement("p");
                    string   stylesection = String.Empty;
                    bool     isNewIframe  = false;
                    if (style.NameLocal == DocumentAndParagraphStyles.IFrameHeaderStyle)
                    {
                        stylesection = "header";
                        isNewIframe  = true;
                    }
                    if (style.NameLocal == DocumentAndParagraphStyles.IFrameTitleStyle)
                    {
                        stylesection = "title";
                        isNewIframe  = previousIFrameStyle != DocumentAndParagraphStyles.IFrameHeaderStyle;
                    }
                    if (style.NameLocal == DocumentAndParagraphStyles.IFrameCaptionStyle)
                    {
                        stylesection = "caption";
                        isNewIframe  = previousIFrameStyle != DocumentAndParagraphStyles.IFrameMobileCodeStyle;
                    }

                    if (style.NameLocal == DocumentAndParagraphStyles.IFrameSourceStyle)
                    {
                        stylesection = "source";
                        isNewIframe  = previousIFrameStyle != DocumentAndParagraphStyles.IFrameCaptionStyle &&
                                       previousIFrameStyle != DocumentAndParagraphStyles.IFrameMobileCodeStyle;
                    }


                    if (!previousWasIFrame && isNewIframe)
                    {
                        iframeGroupId     = Guid.NewGuid().ToString("N");
                        previousWasIFrame = true;
                    }

                    string cssClass      = w == null ? string.Empty : w.CssClass;
                    string iframeIdClass = String.Format("{0}-{1}_{2}", IFrameEmbedBuilder.IFrameClassName, stylesection,
                                                         iframeGroupId);

                    curElement.SetAttributeValue("class",
                                                 String.Format("{0} {1}",
                                                               cssClass,
                                                               iframeIdClass));

                    curElement = CharacterStyleTransformer.GetCharacterStyledElement(curElement, paragraph,
                                                                                     CharacterStyleFactory.GetCharacterStyles(), false);
                    xData.Add(curElement);

                    previousIFrameStyle = style.NameLocal;
                    continue;
                }
                else //redundant else but adds code clarity
                {
                    if (currentIframeIsInsecure || currentContainsInvalidNodes ||
                        (!desktopCodeFound && previousWasIFrame))
                    {
                        xData.Elements()
                        .Where(x => x.Attribute("class") != null &&
                               x.Attribute("class").Value.Contains(iframeGroupId))
                        .Remove();
                    }
                    previousIFrameStyle         = String.Empty;
                    previousWasIFrame           = false;
                    currentIframeIsInsecure     = false;
                    currentContainsInvalidNodes = false;
                    desktopCodeFound            = false;
                }

                if (ImageReferenceBuilder.Parse(paragraph) != null)
                {
                    if (imageTagCount == 0)
                    {
                        divElement = new XElement("section");
                        divElement.SetAttributeValue("class", "article-exhibit");
                        xData.Add(divElement);
                    }

                    //Get the float Value from the image hyperlink (if it is an image) and set it to the article-image element
                    var hyprlnk = paragraph.Range.Hyperlinks.Cast <Hyperlink>().FirstOrDefault();
                    if (hyprlnk != null && string.IsNullOrEmpty(hyprlnk.ScreenTip) == false)
                    {
                        string classValue;
                        classValue = imageFloatDictionary.TryGetValue(hyprlnk.ScreenTip.ToLower(), out classValue) ? classValue : string.Empty;
                        divElement.SetAttributeValue("class", classValue);
                    }

                    var imageTag = ImageReferenceBuilder.Parse(paragraph);
                    divElement.Add(imageTag);
                    imageTagCount++;
                    continue;
                }

                imageTagCount = 0;

                WordStyleStruct styleStruct;
                if (ParagraphStyles.TryGetValue(currentStyle.NameLocal, out styleStruct))
                {
                    //if there is a special configuration for the paragraph style, have it configured properly
                    string element = styleStruct.CssElement;
                    if (element.IsNullOrEmpty())
                    {
                        element = null;
                    }
                    string clas = styleStruct.CssClass;
                    if (clas.IsNullOrEmpty())
                    {
                        clas = null;
                    }
                    XElement curElement = element != null ? new XElement(element) : new XElement("p");
                    if (!string.IsNullOrWhiteSpace(clas))
                    {
                        curElement.SetAttributeValue("class", clas);
                    }

                    xElement = CharacterStyleTransformer.GetCharacterStyledElement(curElement, paragraph,
                                                                                   CharacterStyleFactory.GetCharacterStyles(), false);

                    //The next section is group multiple paragraphs belonging to the same answer under the same Answer element to prevent multiple answer styling on front-end for a single answer
                    if (clas == "article-interview__answer")
                    {
                        while (paragraphs.Count > i + 1)//If there are still more element to inspect
                        {
                            //Get the styling of the next element
                            var             tempAnswerStyle = (Style)paragraphs[i + 1].get_Style();
                            WordStyleStruct tempAnswerStyleStruct;
                            //Get the cssClass of the next element
                            ParagraphStyles.TryGetValue(tempAnswerStyle.NameLocal, out tempAnswerStyleStruct);
                            //if it is also an answer paragraph
                            if (tempAnswerStyleStruct.CssClass == clas)
                            {
                                //add the the next paragraph content into the current answer body
                                xElement.Add(CharacterStyleTransformer.GetCharacterStyledElement(new XElement("p"), paragraphs[i + 1], CharacterStyleFactory.GetCharacterStyles(), false));
                                i++;
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                }
                else
                {
                    xElement = new XElement("p");
                    xElement = CharacterStyleTransformer.GetCharacterStyledElement(xElement, paragraph,
                                                                                   CharacterStyleFactory.GetCharacterStyles(), false);
                }
                xData.Add(xElement);
                previouslistType = paragraph.Range.ListFormat.ListType;
            }
            if (insecureIFrames.Any())
            {
                var confirmSave =
                    MessageBox.Show("You have inserted multimedia items using non-secure links. " +
                                    "Click 'OK' to continue saving the article without this content or 'Cancel'" +
                                    " to go back into the article and edit your multimedia items.",
                                    "Continue with save?",
                                    MessageBoxButtons.OKCancel, MessageBoxIcon.Exclamation);
                if (confirmSave != DialogResult.OK)
                {
                    throw new InsecureIFrameException(insecureIFrames);
                }
            }
            if (containsInvalidNodes)
            {
                var confirmSave =
                    MessageBox.Show("You have inserted multimedia items with invalid or non-permitted HTML code. " +
                                    "Click 'OK' to continue saving the article without this content or 'Cancel' to " +
                                    "go back into the article and edit your multimedia items.", "Continue with save?",
                                    MessageBoxButtons.OKCancel, MessageBoxIcon.Exclamation);
                if (confirmSave != DialogResult.OK)
                {
                    throw new InvalidHtmlException();
                }
            }
            if (lastParagraph != null && lastParagraph.Range.ListParagraphs.Count > 0)
            {
                xData.Add(GetListStyleElement(contiguousListElements));
            }
            if (lastParagraph != null && ((Style)lastParagraph.get_Style()).NameLocal == BlockquoteName)
            {
                xData.Add(BlockquoteTransformer.Generate(contiguousBlockquoteElements, CharacterStyleTransformer));
            }
            if (_quickFactsSideboxParser != null && builder != _quickFactsSideboxParser &&
                !_quickFactsSideboxParser.IsEmpty())
            {
                xData.Add(_quickFactsSideboxParser.GetSidebox(this));
                _quickFactsSideboxParser.Clear();
            }
            return(xData);
        }
Esempio n. 3
0
        //private float TableWidth(Table table)
        //{
        //    int colIndex = 1;
        //    float width = 0;
        //    try
        //    {
        //        while (true)
        //        {	//major hacks because documentation is lacking
        //            and i can't find another solution

        //            table.Rows is inaccessble if there are merged cells
        //            across rows

        //            table.Columns is inaccessible if there are merged
        //            cells across columns

        //            so how do i find out the number of cells in a table row?
        //            WE KEEP GOING TILL WE BUST
        //            width += table.Cell(1, colIndex).Width;
        //            colIndex++;
        //        }
        //    } catch
        //    {
        //        return width;
        //    }

        //}

        public XElement ParseTable(int index)
        {
            Table table = GetTable(index);

            _retrieved[index] = true;
            var root = new XElement("table");

            root.SetAttributeValue("class", "data");
            var tbody = new XElement("tbody");

            root.Add(tbody);
            //var rows = table.Rows;
            var       wordUtils     = new WordUtils();
            var       tableAnalyzer = new TableAnalyzer(table);
            const int maxTableWidth = 544;
            decimal   curSetWidth   = Math.Truncate(tableAnalyzer.TableCellWidthSums.Last());

            if (curSetWidth < maxTableWidth)
            {
                root.SetAttributeValue("width", curSetWidth);
            }
            bool first = true;

            for (int r = 1; r <= table.Rows.Count; r++)
            {
                //Row row = rows[r];
                var currentRow = new XElement("tr");
                tbody.Add(currentRow);
                //var rowCellEnumerator = row.Cells.Cast<Cell>().GetEnumerator();
                decimal currentWidthSum = 0;
                int     numColSpansUsed = 0;
                //while (rowCellEnumerator.MoveNext())
                for (int c = 1; c <= table.Columns.Count; c++)
                {
                    try
                    {
                        var curCell = table.Cell(r, c);                         //rowCellEnumerator.Current);
                        curCell.Select();
                        var    app      = curCell.Application;
                        string cellType = first ? "th" : "td";
                        var    xmlCell  = new XElement(cellType);
                        currentRow.Add(xmlCell);
                        Paragraphs paragraphs        = curCell.Range.Paragraphs;
                        var        tableBuilder      = new TableBuilder(curCell.Tables);
                        XNode      currentDescendent = wordUtils.ParagraphsToXml(paragraphs, tableBuilder).FirstNode;
                        while (currentDescendent != null)
                        {
                            xmlCell.Add(currentDescendent);
                            currentDescendent = currentDescendent.NextNode;
                        }
                        currentWidthSum += (decimal)curCell.Width;
                        int widthIndex = tableAnalyzer.TableCellWidthSums.IndexOf(currentWidthSum);
                        int curNumCol  = widthIndex + 1 - numColSpansUsed;
                        xmlCell.SetAttributeValue("colspan", curNumCol);
                        xmlCell.SetAttributeValue("rowspan",
                                                  app.Selection.Information[WdInformation.wdEndOfRangeRowNumber] -
                                                  app.Selection.Information[WdInformation.wdStartOfRangeRowNumber] + 1);
                        decimal cellWidth = 0;
                        for (int i = widthIndex; i >= widthIndex - curNumCol + 1; i--)
                        {
                            cellWidth += tableAnalyzer.CellWidthPercentages[i];
                        }
                        xmlCell.SetAttributeValue("width", cellWidth + "%");
                        numColSpansUsed += curNumCol;
                    }
                    catch (System.Runtime.InteropServices.COMException)
                    {                     //it seems that the only way to tell if a cell at a particular [row, col] index
                        //exists is to call table.Cell(row, col) and see if it throws an exception
                        //the reason it may not exist is rowspans and colspans
                        int nr = r;
                        while (nr > 0)
                        {
                            try
                            {
                                currentWidthSum += (decimal)table.Cell(nr, c).Width;
                                int widthIndex = tableAnalyzer.TableCellWidthSums.IndexOf(currentWidthSum);
                                int curNumCol  = widthIndex + 1 - numColSpansUsed;
                                numColSpansUsed += curNumCol;
                                break;
                            }
                            catch (System.Runtime.InteropServices.COMException)
                            {
                                nr--;
                            }
                        }
                    }
                    //var colspan = (int)(curCell.Width / smallestWidth);
                    //if (colspan > 1) xmlCell.SetAttributeValue("colspan", colspan);
                }
                first = false;
            }
            return(root);
        }