WriteTo() public method

Saves the current node to a string.
public WriteTo ( ) : string
return string
    /// <summary>
    /// Saves the <see cref="HtmlDocument"/> to the specified path.
    /// </summary>
    /// <param name="path">The file to write to.</param>
    /// <param name="node">The <see cref="HtmlNode"/> to save.</param>
    /// <param name="encoding">The encoding to use when writing the file.</param>
    /// <seealso cref="HtmlNode"/>
    /// <see cref="HtmlDocument"/>
    public static void Save(string path, HtmlNode node, Encoding encoding)
    {
        if (path == null || node == null)
        {
            DebugBreakOrThrow("Figure out why " + (path == null ? "path" : "node") + "is null", new ArgumentNullException(path == null ? "path" : "node"));
        }

        // Will only be triggered if the caller isn't called by another Save().
        if (encoding == null)
        {
            DebugBreakOrThrow("Figure out why encoding is null.", new ArgumentNullException("encoding"));
        }

        using (FileStream fileStream = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.Read, 1024))
        {
            using (StreamWriter streamWriter = new StreamWriter(fileStream, encoding))
            {
                // StreamWriter is a TextWriter, so we can pass it to HtmlNode.WriteContentTo(TextWriter).
                // So node.WriteTo only saves the current node, which is only useful if the node has no children.
                // node.WriteContentTo, only saves the current nodes children, therefore if the current node has a parent, we use the parent to save.
                // this will include all siblings aswell :S
                if (node.ParentNode != null)
                {
                    node.ParentNode.WriteContentTo(streamWriter);
                }
                else if (node.HasChildNodes == false)
                {
                    node.WriteTo(streamWriter);
                }
                else if (node.Name == HtmlNode.HtmlNodeTypeNameDocument)
                {
                    node.WriteContentTo(streamWriter);
                }
                else
                {
                    // TODO: Properly save parent-less node with children.
                    DebugBreakOrThrow("Properly save parent-less node with children. Inspect 'node'.", new InvalidOperationException("Don't know how to save the node, and it's children!"));
                }

                streamWriter.Flush();
            }
        }
    }
        private static void GetSafeHtmlIter(HtmlNode node, TextWriter writer, List<string[]> removals, string[] tagsWhiteList, string[] tagsBlackListDeleteContent)
        {
            bool found = false;
            bool deleleteTagFound = false;

            if (node.NodeType == HtmlNodeType.Text)
            {
                node.WriteTo(writer);
            }

            else
            {
                foreach (string tagName in tagsWhiteList)
                {
                    if (node.Name.ToLower() == tagName.ToLower())
                    {
                        found = true;
                        break;
                    }
                }
            }

            if (found)
            {
                WriteBeginTag(node, writer);
            }

            foreach (string tagName in tagsBlackListDeleteContent)
            {

                if (node.Name.ToLower() == tagName.ToLower())
                {
                    deleleteTagFound = true;
                    removals.Add(new string[] { "Deleted tag and child content", node.Name });
                    break;
                }
            }

            if (!deleleteTagFound)
            {
                foreach (HtmlNode childNode in node.ChildNodes)
                {
                    GetSafeHtmlIter(childNode, writer, removals, tagsWhiteList, tagsBlackListDeleteContent);
                }
            }

            if (found)
            {
                WriteEndTag(node, writer);
            }
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Convert an HtmlNode into a string
        /// </summary>
        /// <param name="node">HtmlNode</param>
        /// <returns>string of the HTML</returns>
        public static string Output(HtmlNode node)
        {
            string output = null;

            using (StringWriter sw = new StringWriter())
            {
                node.WriteTo(sw);
                output = sw.ToString();

                // strip off XML doc header
                if (!string.IsNullOrEmpty(output))
                {
                    int at = output.IndexOf("?>");
                    if (at >= 0)
                        output = output.Substring(at + 2);
                }
            }

            return output;
        }
Ejemplo n.º 4
0
    public static BsonDocument select_ul(HtmlNode node_input)
    {
        BsonDocument doc_result = new BsonDocument();
        doc_result.Add("doc_id", DateTime.Now.ToString("yyyyMMddHHmmss") + DateTime.Now.Millisecond.ToString());
        doc_result.Add("from_url", global_url);
        doc_result.Add("from_html_type", "ul");
        doc_result.Add("html_path", node_input.XPath);
        doc_result.Add("original_html", node_input.WriteTo());

        HtmlNodeCollection ul_nodes = node_input.SelectNodes(node_input.XPath + @"//li");

        BsonArray ul_array = new BsonArray();
        foreach (HtmlNode node in ul_nodes)
        {
            if (!string.IsNullOrEmpty(node.InnerText))
            {
                ul_array.Add(node.InnerText);
            }
        }
        doc_result.Add("ul", ul_array);

        if (is_open_mongo) MongoHelper.insert_bson("web", doc_result);

        return doc_result;
    }
Ejemplo n.º 5
0
    public static BsonDocument select_table(HtmlNode node_input)
    {
        BsonDocument doc_result = new BsonDocument();
        doc_result.Add("doc_id", DateTime.Now.ToString("yyyyMMddHHmmss") + DateTime.Now.Millisecond.ToString());
        doc_result.Add("from_url", global_url);
        doc_result.Add("from_html_type", "table");
        doc_result.Add("html_path", node_input.XPath);
        doc_result.Add("original_html", node_input.WriteTo());

        HtmlNodeCollection tr_nodes = node_input.SelectNodes(node_input.XPath + @"//tr");
        string[] cells = new string[] { @"//td", @"//th" };

        DataTable table = new DataTable();
        for (int i = 0; i < 500; i++)
        {
            table.Columns.Add("C" + i.ToString());
        }
        for (int i = 0; i < 500; i++)
        {
            DataRow row_new = table.NewRow();
            for (int j = 0; j < 500; j++)
            {
                row_new[j] = "X000000X";
            }
            table.Rows.Add(row_new);
        }
        for (int i = 0; i < tr_nodes.Count; i++)
        {
            BsonArray td_array = new BsonArray();
            foreach (string cell in cells)
            {
                HtmlNodeCollection td_nodes = node_input.SelectNodes(tr_nodes[i].XPath + cell);
                int start = 0;
                if (td_nodes != null)
                {
                    for (int k = 0; k < td_nodes.Count; k++)
                    {
                        if (table.Rows[i][start].ToString() == "X000000X")
                        {
                            foreach (HtmlAttribute attr in td_nodes[k].Attributes)
                            {
                                if (attr.Name.ToLower() == "rowspan")
                                {
                                    int span_count = Convert.ToInt32(attr.Value);
                                    for (int j = 1; j < span_count; j++)
                                    {
                                        table.Rows[i + j][start] = td_nodes[k].InnerText;
                                    }
                                }
                                if (attr.Name.ToLower() == "colspan")
                                {
                                    int span_count = Convert.ToInt32(attr.Value);
                                    for (int j = 1; j < span_count; j++)
                                    {
                                        table.Rows[i][start + j] = td_nodes[k].InnerText;
                                    }
                                }
                            }
                            table.Rows[i][start] = td_nodes[k].InnerText;
                            start = start + 1;
                        }
                        else
                        {
                            start = start + 1;
                            k = k - 1;
                        }
                    }
                }
            }
        }

        //add table to doc
        BsonArray header_array = new BsonArray();
        for (int i = 0; i < 500; i++)
        {
            if (table.Rows[0][i].ToString() != "X000000X")
            {
                header_array.Add(table.Rows[0][i].ToString());
            }
        }
        doc_result.Add("header", header_array);

        for (int i = 1; i < 500; i++)
        {
            BsonArray td_array = new BsonArray();
            for (int j = 0; j < 500; j++)
            {
                if (table.Rows[i][j].ToString() != "X000000X")
                {
                    td_array.Add(table.Rows[i][j].ToString());
                }
            }
            if (td_array.Count != 0)
            {
                doc_result.Add((i - 1).ToString(), td_array);
            }
        }

        if (is_open_mongo) MongoHelper.insert_bson("web", doc_result);
        return doc_result;
    }
Ejemplo n.º 6
0
 /// <summary>
 /// Decodes the given HTML tag into a corresponding the web shape.
 /// </summary>
 /// <returns>A webshape corresponding to the given HTML content.</returns>
 /// <param name="htmlNode">The given node to decode.</param>
 /// <remarks>
 /// <para>If the tagname cannot be found or the content cannot be deserialized, <c>null</c> is returned.</para>
 /// </remarks>
 public static IWebShape DecodeWebShape(HtmlNode htmlNode)
 {
     string tagname = htmlNode.Name;
     string data = htmlNode.WriteTo ();
     using (StringReader sr = new StringReader(data)) {
         using (XmlReader xr = XmlReader.Create (sr)) {
             return DecodeWebShape (tagname, xr);
         }
     }
 }
Ejemplo n.º 7
0
    public void saveNodeToFile(HtmlNode _node)
    {
        HtmlNode nodeContent;
        string stNodeContent;
        Encoding locEncoding = Encoding.Default;
        StreamWriter swriter = new StreamWriter(fileName, true, locEncoding);

        _node.WriteTo(swriter);
        swriter.Close();
    }