/// <summary> /// Analyzes the given htmlElement expecting it to be converted /// into some of xaml Block elements and adds the converted block /// to the children collection of xamlParentElement. /// Analyzes the given XmlElement htmlElement, recognizes it as some HTML element /// and adds it as a child to a xamlParentElement. /// In some cases several following siblings of the given htmlElement /// will be consumed too (e.g. LIs encountered without wrapping UL/OL, /// which must be collected together and wrapped into one implicit List element). /// </summary> /// <param name="xamlParentElement"> /// Parent xaml element, to which new converted element will be added /// </param> /// <param name="htmlNode"></param> /// <param name="inheritedProperties"> /// Properties inherited from an outer context. /// </param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> /// <returns> /// Last processed html node. Normally it should be the same htmlElement /// as was passed as a paramater, but in some irregular cases /// it could one of its following siblings. /// The caller must use this node to get to next sibling from it. /// </returns> private static XmlNode AddBlock( XmlElement xamlParentElement, XmlNode htmlNode, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { var node = htmlNode as XmlComment; if (node != null) { DefineInlineFragmentParent(node, /*xamlParentElement:*/null); } else if (htmlNode is XmlText) { htmlNode = AddImplicitParagraph ( xamlParentElement, htmlNode, inheritedProperties, stylesheet, sourceContext); } else if (htmlNode is XmlElement) { // Identify element name var htmlElement = (XmlElement) htmlNode; string htmlElementName = htmlElement.LocalName; // Keep the name case-sensitive to check xml names string htmlElementNamespace = htmlElement.NamespaceURI; if (htmlElementNamespace == HtmlParser.XhtmlNamespace) { sourceContext.Add(htmlElement); // Convert the name to lowercase, because html elements are case-insensitive htmlElementName = htmlElementName.ToLower(); // Switch to an appropriate kind of processing depending on html element name switch (htmlElementName) { // Sections: case "html": case "body": case "div": case "form": // not a block according to xhtml spec case "pre": // Renders text in a fixed-width font case "blockquote": case "caption": case "center": case "cite": AddSection ( xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext); break; // Paragraphs: case "p": case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": case "nsrtitle": case "textarea": case "dd": // ??? case "dl": // ??? case "dt": // ??? case "tt": // ??? AddParagraph ( xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext); break; case "ol": case "ul": case "dir": // treat as UL element case "menu": // treat as UL element // List element conversion AddList ( xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext); break; case "li": // LI outside of OL/UL // Collect all sibling LIs, wrap them into a List and then proceed with the element following the last of LIs htmlNode = AddOrphanListItems ( xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext); break; case "img": // TODO: Add image processing AddImage ( xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext); break; case "table": // hand off to table parsing function which will perform special table syntax checks AddTable ( xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext); break; case "tbody": case "tfoot": case "thead": case "tr": case "td": case "th": // Table stuff without table wrapper // TODO: add special-case processing here for elements that should be within tables when the // parent element is NOT a table. If the parent element is a table they can be processed normally. // we need to compare against the parent element here, we can't just break on a switch goto default; // Thus we will skip this element as unknown, but still recurse into it. case "style": // We already pre-processed all style elements. Ignore it now case "meta": case "head": case "title": case "script": // Ignore these elements break; default: // Wrap a sequence of inlines into an implicit paragraph htmlNode = AddImplicitParagraph ( xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext); break; } // Remove the element from the stack Debug.Assert(sourceContext.Count > 0 && sourceContext[sourceContext.Count - 1] == htmlElement); sourceContext.RemoveAt(sourceContext.Count - 1); } else { // Non-html element. skip it // Isn't it too agressive? What if this is just an error in html tag name? // TODO: Consider skipping just a wparrer in recursing into the element tree, // which may produce some garbage though coming from xml fragments. return htmlElement; } // Put source element to the stack } // Return last processed node return htmlNode; }
/// <summary> /// Performs a parsing pass over a tbody to read information about column width and rowspan attributes. Information /// read about width /// attributes is stored in the reference ArrayList parameter columnStarts, which contains a list of all starting /// positions of all columns in the table, ordered from left to right. Row spans are taken into consideration when /// computing column starts /// </summary> /// <param name="htmlTbodyElement"> /// XmlElement representing Html tbody whose structure is to be analyzed /// </param> /// <param name="columnStarts"> /// ArrayList of type double which contains the function output. If analysis fails, this parameter is set to null /// </param> /// <param name="activeRowSpans"></param> /// <param name="stylesheet"></param> /// <returns> /// Calculated width of a tbody. /// In case of non-analizable column width structure return 0; /// </returns> private static double AnalyzeTbodyStructure( XmlNode htmlTbodyElement, ArrayList columnStarts, ArrayList activeRowSpans, CssStylesheet stylesheet) { // Parameter validation Debug.Assert(htmlTbodyElement.LocalName.ToLower() == "tbody"); Debug.Assert(columnStarts != null); double tbodyWidth = 0; bool columnWidthsAvailable = true; if (!htmlTbodyElement.HasChildNodes) { return tbodyWidth; } // Set active row spans to 0 - thus ignoring row spans crossing tbody boundaries ClearActiveRowSpans(activeRowSpans); XmlNode htmlChildNode = htmlTbodyElement.FirstChild; // Analyze tr elements while (htmlChildNode != null && columnWidthsAvailable) { switch (htmlChildNode.LocalName.ToLower()) { case "tr": double trWidth = AnalyzeTrStructure ( htmlChildNode, columnStarts, activeRowSpans, stylesheet); if (trWidth > tbodyWidth) { tbodyWidth = trWidth; } break; case "td": columnWidthsAvailable = false; // interrupt the analisys break; } htmlChildNode = htmlChildNode.NextSibling; } // Set active row spans to 0 - thus ignoring row spans crossing tbody boundaries ClearActiveRowSpans(activeRowSpans); return columnWidthsAvailable ? tbodyWidth : 0; }
/// <summary> /// Performs a parsing pass over a tr element to read information about column width and rowspan attributes. /// </summary> /// <param name="htmlTrElement"> /// XmlElement representing Html tr element whose structure is to be analyzed /// </param> /// <param name="columnStarts"> /// ArrayList of type double which contains the function output. If analysis is successful, this ArrayList contains /// all the points which are the starting position of any column in the tr, ordered from left to right. If analysis /// fails, /// the ArrayList is set to null /// </param> /// <param name="activeRowSpans"> /// ArrayList representing all columns currently spanned by an earlier row span attribute. These columns should /// not be used for data in this row. The ArrayList actually contains notation for all columns in the table, if the /// active row span is set to 0 that column is not presently spanned but if it is > 0 the column is presently spanned /// </param> /// <param name="stylesheet"></param> private static double AnalyzeTrStructure( XmlNode htmlTrElement, ArrayList columnStarts, ArrayList activeRowSpans, CssStylesheet stylesheet) { if (stylesheet == null) { throw new ArgumentNullException("stylesheet"); } // Parameter validation Debug.Assert(htmlTrElement.LocalName.ToLower() == "tr"); Debug.Assert(columnStarts != null); Debug.Assert(activeRowSpans != null); Debug.Assert(columnStarts.Count == activeRowSpans.Count); if (!htmlTrElement.HasChildNodes) { return 0; } bool columnWidthsAvailable = true; double columnStart = 0; // starting position of current column XmlNode htmlChildNode = htmlTrElement.FirstChild; int columnIndex = 0; // Skip spanned columns to get to real column start if (columnIndex < activeRowSpans.Count) { Debug.Assert((double) columnStarts[columnIndex] >= columnStart); const double epsilon = 0; if (Math.Abs((double) columnStarts[columnIndex] - columnStart) < epsilon) { // The new column may be in a spanned area while (columnIndex < activeRowSpans.Count && (int) activeRowSpans[columnIndex] > 0) { activeRowSpans[columnIndex] = (int) activeRowSpans[columnIndex] - 1; Debug.Assert((int) activeRowSpans[columnIndex] >= 0); columnIndex++; columnStart = (double) columnStarts[columnIndex]; } } } while (htmlChildNode != null && columnWidthsAvailable) { Debug.Assert(columnStarts.Count == activeRowSpans.Count); VerifyColumnStartsAscendingOrder(columnStarts); switch (htmlChildNode.LocalName.ToLower()) { case "td": Debug.Assert(columnIndex <= columnStarts.Count); if (columnIndex < columnStarts.Count) { Debug.Assert(columnStart <= (double) columnStarts[columnIndex]); if (columnStart < (double) columnStarts[columnIndex]) { columnStarts.Insert ( columnIndex, columnStart); // There can be no row spans now - the column data will appear here // Row spans may appear only during the column analysis activeRowSpans.Insert ( columnIndex, 0); } } else { // Column start is greater than all previous starts. Row span must still be 0 because // we are either adding after another column of the same row, in which case it should not inherit // the previous column's span. Otherwise we are adding after the last column of some previous // row, and assuming the table widths line up, we should not be spanned by it. If there is // an incorrect tbale structure where a columns starts in the middle of a row span, we do not // guarantee correct output columnStarts.Add(columnStart); activeRowSpans.Add(0); } double columnWidth = GetColumnWidth((XmlElement) htmlChildNode); const double epsilon = 0; if (Math.Abs(columnWidth - -1) > epsilon) { int rowSpan = GetRowSpan((XmlElement) htmlChildNode); int nextColumnIndex = GetNextColumnIndex ( columnIndex, columnWidth, columnStarts, activeRowSpans); if (nextColumnIndex != -1) { // Entire column width can be processed without hitting conflicting row span. This means that // column widths line up and we can process them Debug.Assert(nextColumnIndex <= columnStarts.Count); // Apply row span to affected columns for (int spannedColumnIndex = columnIndex; spannedColumnIndex < nextColumnIndex; spannedColumnIndex++) { activeRowSpans[spannedColumnIndex] = rowSpan - 1; Debug.Assert((int) activeRowSpans[spannedColumnIndex] >= 0); } columnIndex = nextColumnIndex; // Calculate columnsStart for the next cell columnStart = columnStart + columnWidth; if (columnIndex < activeRowSpans.Count) { Debug.Assert((double) columnStarts[columnIndex] >= columnStart); if (Math.Abs((double) columnStarts[columnIndex] - columnStart) < epsilon) { // The new column may be in a spanned area while (columnIndex < activeRowSpans.Count && (int) activeRowSpans[columnIndex] > 0) { activeRowSpans[columnIndex] = (int) activeRowSpans[columnIndex] - 1; Debug.Assert((int) activeRowSpans[columnIndex] >= 0); columnIndex++; columnStart = (double) columnStarts[columnIndex]; } } // else: the new column does not start at the same time as a pre existing column // so we don't have to check it for active row spans, it starts in the middle // of another column which has been checked already by the GetNextColumnIndex function } } else { // Full column width cannot be processed without a pre existing row span. // We cannot analyze widths columnWidthsAvailable = false; } } else { // Incorrect column width, stop processing columnWidthsAvailable = false; } break; } htmlChildNode = htmlChildNode.NextSibling; } // The width of the tr element is the position at which it's last td element ends, which is calculated in // the columnStart value after each td element is processed double trWidth = columnWidthsAvailable ? columnStart : 0; return trWidth; }
// ............................................................. // // Inline Elements // // ............................................................. private static void AddInline( XmlElement xamlParentElement, XmlNode htmlNode, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { var node = htmlNode as XmlComment; if (node != null) { DefineInlineFragmentParent ( node, xamlParentElement); } else if (htmlNode is XmlText) { AddTextRun ( xamlParentElement, htmlNode.Value); } else { var element = htmlNode as XmlElement; if (element != null) { XmlElement htmlElement = element; // Check whether this is an html element if (htmlElement.NamespaceURI != HtmlParser.XhtmlNamespace) { return; // Skip non-html elements } // Identify element name string htmlElementName = htmlElement.LocalName.ToLower(); // Put source element to the stack sourceContext.Add(htmlElement); switch (htmlElementName) { case "a": AddHyperlink ( xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext); break; case "img": AddImage ( xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext); break; case "br": case "SelectHR": AddBreak ( xamlParentElement, htmlElementName); break; default: if (HtmlSchema.IsInlineElement(htmlElementName) || HtmlSchema.IsBlockElement(htmlElementName)) { // Note: actually we do not expect block elements here, // but if it happens to be here, we will treat it as a Span. AddSpanOrRun ( xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext); } break; } // Ignore all other elements non-(block/inline/image) // Remove the element from the stack Debug.Assert(sourceContext.Count > 0 && sourceContext[sourceContext.Count - 1] == htmlElement); sourceContext.RemoveAt(sourceContext.Count - 1); } } }
/// <summary> /// If li items are found without a parent ul/ol element in Html string, creates xamlListElement as their parent and /// adds /// them to it. If the previously added node to the same xamlParentElement was a List, adds the elements to that list. /// Otherwise, we create a new xamlListElement and add them to it. Elements are added as long as li elements appear /// sequentially. /// The first non-li or text node stops the addition. /// </summary> /// <param name="xamlParentElement"> /// Parent element for the list /// </param> /// <param name="htmlLiElement"> /// Start Html li element without parent list /// </param> /// <param name="inheritedProperties"> /// Properties inherited from parent context /// </param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> /// <returns> /// XmlNode representing the first non-li node in the input after one or more li's have been processed. /// </returns> private static XmlElement AddOrphanListItems( XmlNode xamlParentElement, XmlNode htmlLiElement, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { Debug.Assert(htmlLiElement.LocalName.ToLower() == "li"); XmlElement lastProcessedListItemElement = null; // Find out the last element attached to the xamlParentElement, which is the previous sibling of this node XmlNode xamlListItemElementPreviousSibling = xamlParentElement.LastChild; XmlElement xamlListElement = null; if (xamlListItemElementPreviousSibling != null && xamlListItemElementPreviousSibling.LocalName == XamlList) { // Previously added Xaml element was a list. We will add the new li to it xamlListElement = (XmlElement) xamlListItemElementPreviousSibling; } else { // No list element near. Create our own. if (xamlParentElement.OwnerDocument != null) { xamlListElement = xamlParentElement.OwnerDocument.CreateElement ( null, XamlList, XamlNamespace); } if (xamlListElement != null) { xamlParentElement.AppendChild(xamlListElement); } } XmlNode htmlChildNode = htmlLiElement; string htmlChildNodeName = htmlChildNode.LocalName.ToLower(); // Current element properties missed here. //currentProperties = GetElementProperties(htmlLIElement, inheritedProperties, out localProperties, stylesheet); // Add li elements to the parent xamlListElement we created as long as they appear sequentially // Use properties inherited from xamlParentElement for context while (htmlChildNode != null && htmlChildNodeName == "li") { AddListItem ( xamlListElement, (XmlElement) htmlChildNode, inheritedProperties, stylesheet, sourceContext); lastProcessedListItemElement = (XmlElement) htmlChildNode; htmlChildNode = htmlChildNode.NextSibling; htmlChildNodeName = htmlChildNode == null ? null : htmlChildNode.LocalName.ToLower(); } return lastProcessedListItemElement; }
/// <summary> /// adds table cell data to xamlTableCellElement /// </summary> /// <param name="xamlTableCellElement"> /// XmlElement representing Xaml TableCell element to which the converted data should be added /// </param> /// <param name="htmlDataStartNode"> /// XmlElement representing the start element of data to be added to xamlTableCellElement /// </param> /// <param name="currentProperties"> /// Current properties for the html td/th element corresponding to xamlTableCellElement /// </param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> private static void AddDataToTableCell( XmlElement xamlTableCellElement, XmlNode htmlDataStartNode, Hashtable currentProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { // Parameter validation Debug.Assert(xamlTableCellElement.LocalName == XamlTableCell); Debug.Assert(currentProperties != null); for (XmlNode htmlChildNode = htmlDataStartNode; htmlChildNode != null; htmlChildNode = htmlChildNode != null ? htmlChildNode.NextSibling : null) { // Process a new html element and add it to the td element htmlChildNode = AddBlock ( xamlTableCellElement, htmlChildNode, currentProperties, stylesheet, sourceContext); } }
// ............................................................. // // Images // // ............................................................. private static void AddImage( XmlElement xamlParentElement, XmlElement htmlElement, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { if (xamlParentElement == null) { throw new ArgumentNullException("xamlParentElement"); } if (htmlElement == null) { throw new ArgumentNullException("htmlElement"); } if (inheritedProperties == null) { throw new ArgumentNullException("inheritedProperties"); } if (stylesheet == null) { throw new ArgumentNullException("stylesheet"); } if (sourceContext == null) { throw new ArgumentNullException("sourceContext"); } // Implement images }
/// <summary> /// Adds TableCell elements to xamlTableRowElement. /// </summary> /// <param name="xamlTableRowElement"> /// XmlElement representing Xaml TableRow element to which the converted cells should be added /// </param> /// <param name="htmlTdStartNode"> /// XmlElement representing the child of tr or tbody element from which we should start adding td elements /// </param> /// <param name="currentProperties"> /// properties of the current html tr element to which cells are to be added /// </param> /// <param name="columnStarts"></param> /// <param name="activeRowSpans"></param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> /// <returns> /// XmlElement representing the current position of the iterator among the children of the parent Html tbody/tr element /// </returns> private static XmlNode AddTableCellsToTableRow( XmlNode xamlTableRowElement, XmlNode htmlTdStartNode, Hashtable currentProperties, ArrayList columnStarts, IList activeRowSpans, CssStylesheet stylesheet, List<XmlElement> sourceContext) { if (xamlTableRowElement == null) { throw new ArgumentNullException("xamlTableRowElement"); } // parameter validation Debug.Assert(xamlTableRowElement.LocalName == XamlTableRow); Debug.Assert(currentProperties != null); if (columnStarts != null) { Debug.Assert(activeRowSpans.Count == columnStarts.Count); } XmlNode htmlChildNode = htmlTdStartNode; int columnIndex = 0; while (htmlChildNode != null && htmlChildNode.LocalName.ToLower() != "tr" && htmlChildNode.LocalName.ToLower() != "tbody" && htmlChildNode.LocalName.ToLower() != "thead" && htmlChildNode.LocalName.ToLower() != "tfoot") { if (htmlChildNode.LocalName.ToLower() == "td" || htmlChildNode.LocalName.ToLower() == "th") { if (xamlTableRowElement.OwnerDocument != null) { XmlElement xamlTableCellElement = xamlTableRowElement.OwnerDocument.CreateElement ( null, XamlTableCell, XamlNamespace); sourceContext.Add((XmlElement) htmlChildNode); Hashtable tdElementLocalProperties; Hashtable tdElementCurrentProperties = GetElementProperties ( (XmlElement) htmlChildNode, currentProperties, out tdElementLocalProperties, stylesheet, sourceContext); // TODO: determine if localProperties can be used instead of htmlChildNode in this call, and if they can, // make necessary changes and use them instead. ApplyPropertiesToTableCellElement ( (XmlElement) htmlChildNode, xamlTableCellElement); if (columnStarts != null) { Debug.Assert(columnIndex < columnStarts.Count - 1); while (columnIndex < activeRowSpans.Count && (int) activeRowSpans[columnIndex] > 0) { activeRowSpans[columnIndex] = (int) activeRowSpans[columnIndex] - 1; Debug.Assert((int) activeRowSpans[columnIndex] >= 0); columnIndex++; } Debug.Assert(columnIndex < columnStarts.Count - 1); double columnWidth = GetColumnWidth((XmlElement) htmlChildNode); int columnSpan = CalculateColumnSpan ( columnIndex, columnWidth, columnStarts); int rowSpan = GetRowSpan((XmlElement) htmlChildNode); // Column cannot have no span Debug.Assert(columnSpan > 0); Debug.Assert(columnIndex + columnSpan < columnStarts.Count); xamlTableCellElement.SetAttribute ( XamlTableCellColumnSpan, columnSpan.ToString(CultureInfo.InvariantCulture)); // Apply row span for (int spannedColumnIndex = columnIndex; spannedColumnIndex < columnIndex + columnSpan; spannedColumnIndex++) { Debug.Assert(spannedColumnIndex < activeRowSpans.Count); activeRowSpans[spannedColumnIndex] = (rowSpan - 1); Debug.Assert((int) activeRowSpans[spannedColumnIndex] >= 0); } columnIndex = columnIndex + columnSpan; } AddDataToTableCell ( xamlTableCellElement, htmlChildNode.FirstChild, tdElementCurrentProperties, stylesheet, sourceContext); if (xamlTableCellElement.HasChildNodes) { xamlTableRowElement.AppendChild(xamlTableCellElement); } } Debug.Assert(sourceContext.Count > 0 && sourceContext[sourceContext.Count - 1] == htmlChildNode); sourceContext.RemoveAt(sourceContext.Count - 1); htmlChildNode = htmlChildNode.NextSibling; } else { // Not td element. Ignore it. // TODO: Consider better recovery htmlChildNode = htmlChildNode.NextSibling; } } return htmlChildNode; }
/// <summary> /// Converts htmlColElement into Xaml TableColumn element, and appends it to the parent /// xamlTableColumnGroupElement /// </summary> /// <param name="xamlTableElement"></param> /// <param name="htmlColElement"> /// XmlElement representing Html col element to be converted /// </param> /// <param name="inheritedProperties"> /// properties inherited from parent context /// </param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> private static void AddTableColumn( XmlNode xamlTableElement, XmlElement htmlColElement, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { Hashtable localProperties; Hashtable currentProperties = GetElementProperties ( htmlColElement, inheritedProperties, out localProperties, stylesheet, sourceContext); if (xamlTableElement.OwnerDocument == null) { return; } XmlElement xamlTableColumnElement = xamlTableElement.OwnerDocument.CreateElement ( null, XamlTableColumn, XamlNamespace); // TODO: process local properties for TableColumn element // Col is an empty element, with no subtree xamlTableElement.AppendChild(xamlTableColumnElement); }
private static void AddSpanOrRun( XmlNode xamlParentElement, XmlElement htmlElement, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { // Decide what XAML element to use for this inline element. // Check whether it contains any nested inlines bool elementHasChildren = false; for (XmlNode htmlNode = htmlElement.FirstChild; htmlNode != null; htmlNode = htmlNode.NextSibling) { if (!(htmlNode is XmlElement)) { continue; } string htmlChildName = (htmlNode).LocalName.ToLower(); if (!HtmlSchema.IsInlineElement(htmlChildName) && !HtmlSchema.IsBlockElement(htmlChildName) && htmlChildName != "img" && htmlChildName != "br" && htmlChildName != "SelectHR") { continue; } elementHasChildren = true; break; } string xamlElementName = elementHasChildren ? XamlSpan : XamlRun; // Create currentProperties as a compilation of local and inheritedProperties, set localProperties Hashtable localProperties; Hashtable currentProperties = GetElementProperties ( htmlElement, inheritedProperties, out localProperties, stylesheet, sourceContext); // Create a XAML element corresponding to this html element if (xamlParentElement.OwnerDocument == null) { return; } XmlElement xamlElement = xamlParentElement.OwnerDocument.CreateElement ( /*prefix:*/ null, /*localName:*/xamlElementName, XamlNamespace); ApplyLocalProperties ( xamlElement, localProperties, /*isBlock:*/false); // Recurse into element subtree for (XmlNode htmlChildNode = htmlElement.FirstChild; htmlChildNode != null; htmlChildNode = htmlChildNode.NextSibling) { AddInline ( xamlElement, htmlChildNode, currentProperties, stylesheet, sourceContext); } // Add the new element to the parent. xamlParentElement.AppendChild(xamlElement); }
// ............................................................. // // Tables // // ............................................................. /// <summary> /// Converts htmlTableElement to a Xaml Table element. Adds tbody elements if they are missing so /// that a resulting Xaml Table element is properly formed. /// </summary> /// <param name="xamlParentElement"> /// Parent xaml element to which a converted table must be added. /// </param> /// <param name="htmlTableElement"> /// XmlElement reprsenting the Html table element to be converted /// </param> /// <param name="inheritedProperties"> /// Hashtable representing properties inherited from parent context. /// </param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> private static void AddTable( XmlElement xamlParentElement, XmlElement htmlTableElement, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { // Parameter validation Debug.Assert(htmlTableElement.LocalName.ToLower() == "table"); Debug.Assert(xamlParentElement != null); Debug.Assert(inheritedProperties != null); // Create current properties to be used by children as inherited properties, set local properties Hashtable localProperties; Hashtable currentProperties = GetElementProperties ( htmlTableElement, inheritedProperties, out localProperties, stylesheet, sourceContext); // TODO: process localProperties for tables to override defaults, decide cell spacing defaults // Check if the table contains only one cell - we want to take only its content XmlElement singleCell = GetCellFromSingleCellTable(htmlTableElement); if (singleCell != null) { // Need to push skipped table elements onto sourceContext sourceContext.Add(singleCell); // Add the cell's content directly to parent for (XmlNode htmlChildNode = singleCell.FirstChild; htmlChildNode != null; htmlChildNode = htmlChildNode != null ? htmlChildNode.NextSibling : null) { htmlChildNode = AddBlock ( xamlParentElement, htmlChildNode, currentProperties, stylesheet, sourceContext); } Debug.Assert(sourceContext.Count > 0 && sourceContext[sourceContext.Count - 1] == singleCell); sourceContext.RemoveAt(sourceContext.Count - 1); } else { // Create xamlTableElement if (xamlParentElement.OwnerDocument != null) { XmlElement xamlTableElement = xamlParentElement.OwnerDocument.CreateElement ( null, XamlTable, XamlNamespace); // Analyze table structure for column widths and rowspan attributes ArrayList columnStarts = AnalyzeTableStructure ( htmlTableElement, stylesheet); // Process COLGROUP & COL elements AddColumnInformation ( htmlTableElement, xamlTableElement, columnStarts, currentProperties, stylesheet, sourceContext); // Process table body - TBODY and TR elements XmlNode htmlChildNode = htmlTableElement.FirstChild; while (htmlChildNode != null) { string htmlChildName = htmlChildNode.LocalName.ToLower(); // Process the element switch (htmlChildName) { case "tfoot": case "thead": case "tbody": { // Add more special processing for TableHeader and TableFooter if (xamlTableElement.OwnerDocument != null) { XmlElement xamlTableBodyElement = xamlTableElement.OwnerDocument.CreateElement ( null, XamlTableRowGroup, XamlNamespace); xamlTableElement.AppendChild(xamlTableBodyElement); sourceContext.Add((XmlElement) htmlChildNode); // Get properties of Html tbody element Hashtable tbodyElementLocalProperties; Hashtable tbodyElementCurrentProperties = GetElementProperties ( (XmlElement) htmlChildNode, currentProperties, out tbodyElementLocalProperties, stylesheet, sourceContext); // TODO: apply local properties for tbody // Process children of htmlChildNode, which is tbody, for tr elements AddTableRowsToTableBody ( xamlTableBodyElement, htmlChildNode.FirstChild, tbodyElementCurrentProperties, columnStarts, stylesheet, sourceContext); if (xamlTableBodyElement.HasChildNodes) { xamlTableElement.AppendChild(xamlTableBodyElement); // else: if there is no TRs in this TBody, we simply ignore it } } Debug.Assert ( sourceContext.Count > 0 && sourceContext[sourceContext.Count - 1] == htmlChildNode); sourceContext.RemoveAt(sourceContext.Count - 1); htmlChildNode = htmlChildNode.NextSibling; } break; case "tr": { // Tbody is not present, but tr element is present. Tr is wrapped in tbody if (xamlTableElement.OwnerDocument != null) { XmlElement xamlTableBodyElement = xamlTableElement.OwnerDocument.CreateElement ( null, XamlTableRowGroup, XamlNamespace); // We use currentProperties of xamlTableElement when adding rows since the tbody element is artificially created and has // no properties of its own htmlChildNode = AddTableRowsToTableBody ( xamlTableBodyElement, htmlChildNode, currentProperties, columnStarts, stylesheet, sourceContext); if (xamlTableBodyElement.HasChildNodes) { xamlTableElement.AppendChild(xamlTableBodyElement); } } } break; default: htmlChildNode = htmlChildNode.NextSibling; break; } } if (xamlTableElement.HasChildNodes) { xamlParentElement.AppendChild(xamlTableElement); } } } }
// ............................................................. // // Text Flow Elements // // ............................................................. /// <summary> /// Generates Section or Paragraph element from DIV depending whether it contains any block elements or not /// </summary> /// <param name="xamlParentElement"> /// XmlElement representing Xaml parent to which the converted element should be added /// </param> /// <param name="htmlElement"> /// XmlElement representing Html element to be converted /// </param> /// <param name="inheritedProperties"> /// properties inherited from parent context /// </param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> /// true indicates that a content added by this call contains at least one block element private static void AddSection( XmlElement xamlParentElement, XmlElement htmlElement, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { // Analyze the content of htmlElement to decide what xaml element to choose - Section or Paragraph. // If this Div has at least one block child then we need to use Section, otherwise use Paragraph bool htmlElementContainsBlocks = false; for (XmlNode htmlChildNode = htmlElement.FirstChild; htmlChildNode != null; htmlChildNode = htmlChildNode.NextSibling) { if (!(htmlChildNode is XmlElement)) { continue; } string htmlChildName = (htmlChildNode).LocalName.ToLower(); if (!HtmlSchema.IsBlockElement(htmlChildName)) { continue; } htmlElementContainsBlocks = true; break; } if (!htmlElementContainsBlocks) { // The Div does not contain any block elements, so we can treat it as a Paragraph AddParagraph ( xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext); } else { // The Div has some nested blocks, so we treat it as a Section // Create currentProperties as a compilation of local and inheritedProperties, set localProperties Hashtable localProperties; Hashtable currentProperties = GetElementProperties ( htmlElement, inheritedProperties, out localProperties, stylesheet, sourceContext); // Create a XAML element corresponding to this html element if (xamlParentElement.OwnerDocument != null) { XmlElement xamlElement = xamlParentElement.OwnerDocument.CreateElement ( /*prefix:*/ null, /*localName:*/XamlSection, XamlNamespace); ApplyLocalProperties ( xamlElement, localProperties, /*isBlock:*/true); // Decide whether we can unwrap this element as not having any formatting significance. if (!xamlElement.HasAttributes) { // This elements is a group of block elements whitout any additional formatting. // We can add blocks directly to xamlParentElement and avoid // creating unnecessary Sections nesting. xamlElement = xamlParentElement; } // Recurse into element subtree for (XmlNode htmlChildNode = htmlElement.FirstChild; htmlChildNode != null; htmlChildNode = htmlChildNode != null ? htmlChildNode.NextSibling : null) { htmlChildNode = AddBlock ( xamlElement, htmlChildNode, currentProperties, stylesheet, sourceContext); } // Add the new element to the parent. if (xamlElement != xamlParentElement) { xamlParentElement.AppendChild(xamlElement); } } } }
/// <summary> /// Converts an html string into xaml string. /// </summary> /// <param name="htmlString"> /// Input html which may be badly formated xml. /// </param> /// <param name="asFlowDocument"> /// true indicates that we need a FlowDocument as a root element; /// false means that Section or Span elements will be used /// dependeing on StartFragment/EndFragment comments locations. /// </param> /// <returns> /// Well-formed xml representing XAML equivalent for the input html string. /// </returns> public static string ConvertHtmlToXaml( string htmlString, bool asFlowDocument) { // Create well-formed Xml from Html string XmlElement htmlElement = HtmlParser.ParseHtml(htmlString); // Decide what name to use as a root string rootElementName = asFlowDocument ? XamlFlowDocument : XamlSection; // Create an XmlDocument for generated xaml var xamlTree = new XmlDocument(); XmlElement xamlFlowDocumentElement = xamlTree.CreateElement ( null, rootElementName, XamlNamespace); // Extract style definitions from all STYLE elements in the document var stylesheet = new CssStylesheet(htmlElement); // Source context is a stack of all elements - ancestors of a parentElement var sourceContext = new List<XmlElement>(10); // Clear fragment parent _inlineFragmentParentElement = null; // convert root html element AddBlock ( xamlFlowDocumentElement, htmlElement, new Hashtable(), stylesheet, sourceContext); // In case if the selected fragment is inline, extract it into a separate Span wrapper if (!asFlowDocument) { xamlFlowDocumentElement = ExtractInlineFragment(xamlFlowDocumentElement); } // Return a string representing resulting Xaml xamlFlowDocumentElement.SetAttribute ( "xml:space", "preserve"); string xaml = xamlFlowDocumentElement.OuterXml; return xaml; }
/// <summary> /// Generates Paragraph element from P, H1-H7, Center etc. /// </summary> /// <param name="xamlParentElement"> /// XmlElement representing Xaml parent to which the converted element should be added /// </param> /// <param name="htmlElement"> /// XmlElement representing Html element to be converted /// </param> /// <param name="inheritedProperties"> /// properties inherited from parent context /// </param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> /// true indicates that a content added by this call contains at least one block element private static void AddParagraph( XmlNode xamlParentElement, XmlElement htmlElement, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { // Create currentProperties as a compilation of local and inheritedProperties, set localProperties Hashtable localProperties; Hashtable currentProperties = GetElementProperties ( htmlElement, inheritedProperties, out localProperties, stylesheet, sourceContext); // Create a XAML element corresponding to this html element if (xamlParentElement.OwnerDocument == null) { return; } XmlElement xamlElement = xamlParentElement.OwnerDocument.CreateElement ( /*prefix:*/ null, /*localName:*/XamlParagraph, XamlNamespace); ApplyLocalProperties ( xamlElement, localProperties, /*isBlock:*/true); // Recurse into element subtree for (XmlNode htmlChildNode = htmlElement.FirstChild; htmlChildNode != null; htmlChildNode = htmlChildNode.NextSibling) { AddInline ( xamlElement, htmlChildNode, currentProperties, stylesheet, sourceContext); } // Add the new element to the parent. xamlParentElement.AppendChild(xamlElement); }
/// <summary> /// Analyzes the tag of the htmlElement and infers its associated formatted properties. /// After that parses style attribute and adds all inline css styles. /// The resulting style attributes are collected in output parameter localProperties. /// </summary> /// <param name="htmlElement"> /// </param> /// <param name="inheritedProperties"> /// set of properties inherited from ancestor elements. Currently not used in the code. Reserved for the future /// development. /// </param> /// <param name="localProperties"> /// returns all formatting properties defined by this element - implied by its tag, its attributes, or its css inline /// style /// </param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> /// <returns> /// returns a combination of previous context with local set of properties. /// This value is not used in the current code - inntended for the future development. /// </returns> private static Hashtable GetElementProperties( XmlElement htmlElement, IDictionary inheritedProperties, out Hashtable localProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { // Start with context formatting properties var currentProperties = new Hashtable(); IDictionaryEnumerator propertyEnumerator = inheritedProperties.GetEnumerator(); while (propertyEnumerator.MoveNext()) { currentProperties[propertyEnumerator.Key] = propertyEnumerator.Value; } // Identify element name string elementName = htmlElement.LocalName.ToLower(); // update current formatting properties depending on element tag localProperties = new Hashtable(); switch (elementName) { // Character formatting case "i": case "italic": case "em": localProperties["font-style"] = "italic"; break; case "b": case "bold": case "strong": case "dfn": localProperties["font-weight"] = "bold"; break; case "u": case "underline": localProperties["text-decoration-underline"] = "true"; break; case "font": string attributeValue = GetAttribute ( htmlElement, "face"); if (attributeValue != null) { localProperties["font-family"] = attributeValue; } attributeValue = GetAttribute ( htmlElement, "size"); if (attributeValue != null) { double fontSize = double.Parse(attributeValue)*(12.0/3.0); if (fontSize < 1.0) { fontSize = 1.0; } else if (fontSize > 1000.0) { fontSize = 1000.0; } localProperties["font-size"] = fontSize.ToString(CultureInfo.InvariantCulture); } attributeValue = GetAttribute ( htmlElement, "color"); if (attributeValue != null) { localProperties["color"] = attributeValue; } break; case "samp": localProperties["font-family"] = "Courier New"; // code CodePanel localProperties["font-size"] = XamlFontSizeXxSmall; localProperties["text-align"] = "Left"; break; case "sub": break; case "sup": break; // Hyperlinks case "a": // href, hreflang, urn, methods, rel, rev, title // Set default hyperlink properties break; case "acronym": break; // Paragraph formatting: case "p": // Set default paragraph properties break; case "div": // Set default div properties break; case "pre": localProperties["font-family"] = "Courier New"; // renders text in a fixed-width font localProperties["font-size"] = XamlFontSizeXxSmall; localProperties["text-align"] = "Left"; break; case "blockquote": localProperties["margin-left"] = "16"; break; case "h1": localProperties["font-size"] = XamlFontSizeXxLarge; break; case "h2": localProperties["font-size"] = XamlFontSizeXLarge; break; case "h3": localProperties["font-size"] = XamlFontSizeLarge; break; case "h4": localProperties["font-size"] = XamlFontSizeMedium; break; case "h5": localProperties["font-size"] = XamlFontSizeSmall; break; case "h6": localProperties["font-size"] = XamlFontSizeXSmall; break; // List properties case "ul": localProperties["list-style-type"] = "diStockControl"; break; case "ol": localProperties["list-style-type"] = "decimal"; break; case "table": case "body": case "html": break; } // Override html defaults by css attributes - from stylesheets and inline settings HtmlCssParser.GetElementPropertiesFromCssAttributes ( htmlElement, elementName, stylesheet, localProperties, sourceContext); // Combine local properties with context to create new current properties propertyEnumerator = localProperties.GetEnumerator(); while (propertyEnumerator.MoveNext()) { currentProperties[propertyEnumerator.Key] = propertyEnumerator.Value; } return currentProperties; }
/// <summary> /// Converts htmlColgroupElement into Xaml TableColumnGroup element, and appends it to the parent /// xamlTableElement /// </summary> /// <param name="xamlTableElement"> /// XmlElement representing Xaml Table element to which the converted column group should be added /// </param> /// <param name="htmlColgroupElement" /> /// XmlElement representing Html colgroup element to be converted /// <param name="inheritedProperties"> /// Properties inherited from parent context /// </param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> private static void AddTableColumnGroup( XmlElement xamlTableElement, XmlElement htmlColgroupElement, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { Hashtable localProperties; Hashtable currentProperties = GetElementProperties ( htmlColgroupElement, inheritedProperties, out localProperties, stylesheet, sourceContext); // TODO: process local properties for colgroup // Process children of colgroup. Colgroup may contain only col elements. for (XmlNode htmlNode = htmlColgroupElement.FirstChild; htmlNode != null; htmlNode = htmlNode.NextSibling) { if (htmlNode is XmlElement && htmlNode.LocalName.ToLower() == "col") { AddTableColumn ( xamlTableElement, (XmlElement) htmlNode, currentProperties, stylesheet, sourceContext); } } }
/// <summary> /// Processes the information about table columns - COLGROUP and COL html elements. /// </summary> /// <param name="htmlTableElement"> /// XmlElement representing a source html table. /// </param> /// <param name="xamlTableElement"> /// XmlElement repesenting a resulting xaml table. /// </param> /// <param name="columnStartsAllRows"> /// Array of doubles - column start coordinates. /// Can be null, which means that column size information is not available /// and we must use source colgroup/col information. /// In case wneh it's not null, we will ignore source colgroup/col information. /// </param> /// <param name="currentProperties"></param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> private static void AddColumnInformation( XmlNode htmlTableElement, XmlElement xamlTableElement, IList columnStartsAllRows, Hashtable currentProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { // Add column information if (columnStartsAllRows != null) { // We have consistent information derived from table cells; use it // The last element in columnStarts represents the end of the table for (int columnIndex = 0; columnIndex < columnStartsAllRows.Count - 1; columnIndex++) { if (xamlTableElement.OwnerDocument == null) { continue; } XmlElement xamlColumnElement = xamlTableElement.OwnerDocument.CreateElement ( null, XamlTableColumn, XamlNamespace); xamlColumnElement.SetAttribute ( XamlWidth, ((double) columnStartsAllRows[columnIndex + 1] - (double) columnStartsAllRows[columnIndex]).ToString ( CultureInfo.InvariantCulture)); xamlTableElement.AppendChild(xamlColumnElement); } } else { // We do not have consistent information from table cells; // Translate blindly colgroups from html. for (XmlNode htmlChildNode = htmlTableElement.FirstChild; htmlChildNode != null; htmlChildNode = htmlChildNode.NextSibling) { if (htmlChildNode.LocalName.ToLower() == "colgroup") { // TODO: add column width information to this function as a parameter and process it AddTableColumnGroup ( xamlTableElement, (XmlElement) htmlChildNode, currentProperties, stylesheet, sourceContext); } else if (htmlChildNode.LocalName.ToLower() == "col") { AddTableColumn ( xamlTableElement, (XmlElement) htmlChildNode, currentProperties, stylesheet, sourceContext); } else if (htmlChildNode is XmlElement) { // Some element which belongs to table body. Stop column loop. break; } } } }
/// <summary> /// Adds TableRow elements to xamlTableBodyElement. The rows are converted from Html tr elements that /// may be the children of an Html tbody element or an Html table element with tbody missing /// </summary> /// <param name="xamlTableBodyElement"> /// XmlElement representing Xaml TableRowGroup element to which the converted rows should be added /// </param> /// <param name="htmlTrStartNode"> /// XmlElement representing the first tr child of the tbody element to be read /// </param> /// <param name="currentProperties"> /// Hashtable representing current properties of the tbody element that are generated and applied in the /// AddTable function; to be used as inheritedProperties when adding tr elements /// </param> /// <param name="columnStarts"></param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> /// <returns> /// XmlNode representing the current position of the iterator among tr elements /// </returns> private static XmlNode AddTableRowsToTableBody( XmlNode xamlTableBodyElement, XmlNode htmlTrStartNode, Hashtable currentProperties, ArrayList columnStarts, CssStylesheet stylesheet, List<XmlElement> sourceContext) { // Parameter validation Debug.Assert(xamlTableBodyElement.LocalName == XamlTableRowGroup); Debug.Assert(currentProperties != null); // Initialize child node for iteratimg through children to the first tr element XmlNode htmlChildNode = htmlTrStartNode; ArrayList activeRowSpans = null; if (columnStarts != null) { activeRowSpans = new ArrayList(); InitializeActiveRowSpans ( activeRowSpans, columnStarts.Count); } while (htmlChildNode != null && htmlChildNode.LocalName.ToLower() != "tbody") { switch (htmlChildNode.LocalName.ToLower()) { case "tr": { if (xamlTableBodyElement.OwnerDocument != null) { XmlElement xamlTableRowElement = xamlTableBodyElement.OwnerDocument.CreateElement ( null, XamlTableRow, XamlNamespace); sourceContext.Add((XmlElement) htmlChildNode); // Get tr element properties Hashtable trElementLocalProperties; Hashtable trElementCurrentProperties = GetElementProperties ( (XmlElement) htmlChildNode, currentProperties, out trElementLocalProperties, stylesheet, sourceContext); // TODO: apply local properties to tr element AddTableCellsToTableRow ( xamlTableRowElement, htmlChildNode.FirstChild, trElementCurrentProperties, columnStarts, activeRowSpans, stylesheet, sourceContext); if (xamlTableRowElement.HasChildNodes) { xamlTableBodyElement.AppendChild(xamlTableRowElement); } } Debug.Assert(sourceContext.Count > 0 && sourceContext[sourceContext.Count - 1] == htmlChildNode); sourceContext.RemoveAt(sourceContext.Count - 1); // Advance htmlChildNode = htmlChildNode.NextSibling; } break; case "td": { // Tr element is not present. We create one and add td elements to it if (xamlTableBodyElement.OwnerDocument != null) { XmlElement xamlTableRowElement = xamlTableBodyElement.OwnerDocument.CreateElement ( null, XamlTableRow, XamlNamespace); // This is incorrect formatting and the column starts should not be set in this case Debug.Assert(columnStarts == null); htmlChildNode = AddTableCellsToTableRow ( xamlTableRowElement, htmlChildNode, currentProperties, null, null, stylesheet, sourceContext); if (xamlTableRowElement.HasChildNodes) { xamlTableBodyElement.AppendChild(xamlTableRowElement); } } } break; default: htmlChildNode = htmlChildNode.NextSibling; break; } } return htmlChildNode; }
private static void AddHyperlink( XmlNode xamlParentElement, XmlElement htmlElement, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { // Convert href attribute into NavigateUri and TargetName string href = GetAttribute ( htmlElement, "href"); if (href == null) { // When href attribute is missing - ignore the hyperlink AddSpanOrRun ( xamlParentElement, htmlElement, inheritedProperties, stylesheet, sourceContext); } else { // Create currentProperties as a compilation of local and inheritedProperties, set localProperties Hashtable localProperties; Hashtable currentProperties = GetElementProperties ( htmlElement, inheritedProperties, out localProperties, stylesheet, sourceContext); // Create a XAML element corresponding to this html element if (xamlParentElement.OwnerDocument != null) { XmlElement xamlElement = xamlParentElement.OwnerDocument.CreateElement ( /*prefix:*/ null, /*localName:*/XamlHyperlink, XamlNamespace); ApplyLocalProperties ( xamlElement, localProperties, /*isBlock:*/false); string[] hrefParts = href.Split('#'); if (hrefParts.Length > 0 && hrefParts[0].Trim() .Length > 0) { xamlElement.SetAttribute ( XamlHyperlinkNavigateUri, hrefParts[0].Trim()); } if (hrefParts.Length == 2 && hrefParts[1].Trim() .Length > 0) { xamlElement.SetAttribute ( XamlHyperlinkTargetName, hrefParts[1].Trim()); } // Recurse into element subtree for (XmlNode htmlChildNode = htmlElement.FirstChild; htmlChildNode != null; htmlChildNode = htmlChildNode.NextSibling) { AddInline ( xamlElement, htmlChildNode, currentProperties, stylesheet, sourceContext); } // Add the new element to the parent. xamlParentElement.AppendChild(xamlElement); } } }
// ReSharper restore FunctionRecursiveOnAllPaths /// <summary> /// Performs a parsing pass over a table to read information about column width and rowspan attributes. This /// information /// is used to determine the starting point of each column. /// </summary> /// <param name="htmlTableElement"></param> /// <param name="stylesheet"></param> /// <returns> /// ArrayList of type double which contains the function output. If analysis is successful, this ArrayList contains /// all the points which are the starting position of any column in the table, ordered from left to right. /// In case if analisys was impossible we return null. /// </returns> // ReSharper disable FunctionRecursiveOnAllPaths private static ArrayList AnalyzeTableStructure( object htmlTableElement, CssStylesheet stylesheet) { try { if (htmlTableElement == null) { throw new ArgumentNullException("htmlTableElement"); } if (stylesheet == null) { throw new ArgumentNullException("stylesheet"); } return AnalyzeTableStructure ( null, stylesheet); } catch (Exception) { return null; } }
/// <summary> /// Creates a Paragraph element and adds all nodes starting from htmlNode /// converted to appropriate Inlines. /// </summary> /// <param name="xamlParentElement"> /// XmlElement representing Xaml parent to which the converted element should be added /// </param> /// <param name="htmlNode"> /// XmlNode starting a collection of implicitly wrapped inlines. /// </param> /// <param name="inheritedProperties"> /// properties inherited from parent context /// </param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> /// true indicates that a content added by this call contains at least one block element /// <returns> /// The last htmlNode added to the implicit paragraph /// </returns> private static XmlNode AddImplicitParagraph( XmlNode xamlParentElement, XmlNode htmlNode, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { // Collect all non-block elements and wrap them into implicit Paragraph if (xamlParentElement.OwnerDocument != null) { XmlElement xamlParagraph = xamlParentElement.OwnerDocument.CreateElement ( /*prefix:*/ null, /*localName:*/XamlParagraph, XamlNamespace); XmlNode lastNodeProcessed = null; while (htmlNode != null) { var node = htmlNode as XmlComment; if (node != null) { DefineInlineFragmentParent(node, /*xamlParentElement:*/null); } else if (htmlNode is XmlText) { if (htmlNode.Value.Trim() .Length > 0) { AddTextRun ( xamlParagraph, htmlNode.Value); } } else if (htmlNode is XmlElement) { string htmlChildName = (htmlNode).LocalName.ToLower(); if (HtmlSchema.IsBlockElement(htmlChildName)) { // The sequence of non-blocked inlines ended. Stop implicit loop here. break; } AddInline ( xamlParagraph, htmlNode, inheritedProperties, stylesheet, sourceContext); } // Store last processed node to return it at the end lastNodeProcessed = htmlNode; htmlNode = htmlNode.NextSibling; } // Add the Paragraph to the parent // If only whitespaces and commens have been encountered, // then we have nothing to add in implicit paragraph; forget it. if (xamlParagraph.FirstChild != null) { xamlParentElement.AppendChild(xamlParagraph); } // Need to return last processed node return lastNodeProcessed; } return null; }
/// <summary> /// Performs a parsing pass over a table to read information about column width and rowspan attributes. This /// information /// is used to determine the starting point of each column. /// </summary> /// <param name="htmlTableElement"> /// XmlElement representing Html table whose structure is to be analyzed /// </param> /// <param name="stylesheet"></param> /// <param name="epsilon"></param> /// <returns> /// ArrayList of type double which contains the function output. If analysis is successful, this ArrayList contains /// all the points which are the starting position of any column in the table, ordered from left to right. /// In case if analisys was impossible we return null. /// </returns> private static ArrayList AnalyzeTableStructure( XmlNode htmlTableElement, CssStylesheet stylesheet, double epsilon) { if (htmlTableElement == null) { throw new ArgumentNullException("htmlTableElement"); } // Parameter validation Debug.Assert(htmlTableElement.LocalName.ToLower() == "table"); if (!htmlTableElement.HasChildNodes) { return null; } bool columnWidthsAvailable = true; var columnStarts = new ArrayList(); var activeRowSpans = new ArrayList(); Debug.Assert(columnStarts.Count == activeRowSpans.Count); XmlNode htmlChildNode = htmlTableElement.FirstChild; double tableWidth = 0; // Keep track of table width which is the width of its widest row // Analyze tbody and tr elements while (htmlChildNode != null && columnWidthsAvailable) { Debug.Assert(columnStarts.Count == activeRowSpans.Count); switch (htmlChildNode.LocalName.ToLower()) { case "tbody": // Tbody element, we should analyze its children for trows double tbodyWidth = AnalyzeTbodyStructure ( htmlChildNode, columnStarts, activeRowSpans, stylesheet); if (tbodyWidth > tableWidth) { // Table width must be increased to supported newly added wide row tableWidth = tbodyWidth; } else if (Math.Abs(tbodyWidth - 0) < epsilon) { // Tbody analysis may return 0, probably due to unprocessable format. // We should also fail. columnWidthsAvailable = false; // interrupt the analisys } break; case "tr": // Table row. Analyze column structure within row directly double trWidth = AnalyzeTrStructure ( htmlChildNode, columnStarts, activeRowSpans, stylesheet); if (trWidth > tableWidth) { tableWidth = trWidth; } else if (Math.Abs(trWidth - 0) < epsilon) { columnWidthsAvailable = false; // interrupt the analisys } break; case "td": // Incorrect formatting, too deep to analyze at this level. Return null. // TODO: implement analysis at this level, possibly by creating a new tr columnWidthsAvailable = false; // interrupt the analisys break; // Element should not occur directly in table. Ignore it. } htmlChildNode = htmlChildNode.NextSibling; } if (columnWidthsAvailable) { // Add an item for whole table width columnStarts.Add(tableWidth); VerifyColumnStartsAscendingOrder(columnStarts); } else { columnStarts = null; } return columnStarts; }
// ............................................................. // // Lists // // ............................................................. /// <summary> /// Converts Html ul or ol element into Xaml list element. During conversion if the ul/ol element has any children /// that are not li elements, they are ignored and not added to the list element /// </summary> /// <param name="xamlParentElement"> /// XmlElement representing Xaml parent to which the converted element should be added /// </param> /// <param name="htmlListElement"> /// XmlElement representing Html ul/ol element to be converted /// </param> /// <param name="inheritedProperties"> /// properties inherited from parent context /// </param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> private static void AddList( XmlNode xamlParentElement, XmlElement htmlListElement, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { string htmlListElementName = htmlListElement.LocalName.ToLower(); Hashtable localProperties; Hashtable currentProperties = GetElementProperties ( htmlListElement, inheritedProperties, out localProperties, stylesheet, sourceContext); // Create Xaml List element if (xamlParentElement.OwnerDocument == null) { return; } XmlElement xamlListElement = xamlParentElement.OwnerDocument.CreateElement ( null, XamlList, XamlNamespace); // Set default list markers xamlListElement.SetAttribute ( XamlListMarkerStyle, htmlListElementName == "ol" ? XamlListMarkerStyleDecimal : XamlListMarkerStyleDisc); // Apply local properties to list to set marker attribute if specified // TODO: Should we have separate list attribute processing function? ApplyLocalProperties ( xamlListElement, localProperties, /*isBlock:*/true); // Recurse into list subtree for (XmlNode htmlChildNode = htmlListElement.FirstChild; htmlChildNode != null; htmlChildNode = htmlChildNode.NextSibling) { if (!(htmlChildNode is XmlElement) || htmlChildNode.LocalName.ToLower() != "li") { // Not an li element. Add it to previous ListBoxItem // We need to append the content to the end // of a previous list item. } else { sourceContext.Add((XmlElement) htmlChildNode); AddListItem ( xamlListElement, (XmlElement) htmlChildNode, currentProperties, stylesheet, sourceContext); Debug.Assert(sourceContext.Count > 0 && sourceContext[sourceContext.Count - 1] == htmlChildNode); sourceContext.RemoveAt(sourceContext.Count - 1); } } // Add the List element to xaml tree - if it is not empty if (xamlListElement.HasChildNodes) { xamlParentElement.AppendChild(xamlListElement); } }
/// <summary> /// Converts htmlLIElement into Xaml ListItem element, and appends it to the parent xamlListElement /// </summary> /// <param name="xamlListElement"> /// XmlElement representing Xaml List element to which the converted td/th should be added /// </param> /// <param name="htmlLiElement"> /// XmlElement representing Html li element to be converted /// </param> /// <param name="inheritedProperties"> /// Properties inherited from parent context /// </param> /// <param name="stylesheet"></param> /// <param name="sourceContext"></param> private static void AddListItem( XmlNode xamlListElement, XmlElement htmlLiElement, Hashtable inheritedProperties, CssStylesheet stylesheet, List<XmlElement> sourceContext) { // Parameter validation Debug.Assert(xamlListElement != null); Debug.Assert(xamlListElement.LocalName == XamlList); Debug.Assert(htmlLiElement != null); Debug.Assert(htmlLiElement.LocalName.ToLower() == "li"); Debug.Assert(inheritedProperties != null); Hashtable localProperties; Hashtable currentProperties = GetElementProperties ( htmlLiElement, inheritedProperties, out localProperties, stylesheet, sourceContext); if (xamlListElement.OwnerDocument == null) { return; } XmlElement xamlListItemElement = xamlListElement.OwnerDocument.CreateElement ( null, XamlListItem, XamlNamespace); // TODO: process local properties for li element // Process children of the ListItem for (XmlNode htmlChildNode = htmlLiElement.FirstChild; htmlChildNode != null; htmlChildNode = htmlChildNode != null ? htmlChildNode.NextSibling : null) { htmlChildNode = AddBlock ( xamlListItemElement, htmlChildNode, currentProperties, stylesheet, sourceContext); } // Add resulting ListBoxItem to a xaml parent xamlListElement.AppendChild(xamlListItemElement); }