public static void ParseHtmlDom(TextSource snapSource, IHtmlDocument htmldoc, WebDom.DomElement parentElement)
 {
     var parser = GetHtmlParser();
     //------------------------ 
     parser.Parse(snapSource, (LayoutFarm.WebDom.Impl.HtmlDocument)htmldoc, parentElement);
     FreeHtmlParser(parser);
 }
 /// <summary>
 /// Parses the source html to css boxes tree structure.
 /// </summary>
 /// <param name="source">the html source to parse</param>
 public static HtmlDocument ParseDocument(TextSource snapSource)
 {
     var parser = GetHtmlParser();
     //------------------------
     var blankHtmlDoc = new HtmlDocument();
     parser.Parse(snapSource, blankHtmlDoc, blankHtmlDoc.RootNode);
     FreeHtmlParser(parser);
     return blankHtmlDoc;
 }
示例#3
0
 private void button1_Click(object sender, EventArgs e)
 {
     //test web parser
     var parser = HtmlParser.CreateHtmlParser(ParseEngineKind.MyHtmlParser);
     var blankHtmlDoc = new LayoutFarm.WebDom.Impl.HtmlDocument();
     var snapSource = new TextSource(this.richTextBox1.Text.ToCharArray());
     parser.Parse(snapSource, blankHtmlDoc, blankHtmlDoc.RootNode);
     this.treeView2.Nodes.Clear();
     var rootNode = new TreeNode("root");
     DescibeNode(blankHtmlDoc.RootNode, rootNode);
     treeView2.Nodes.Add(rootNode);
     this.treeView2.ExpandAll();
 }
示例#4
0
        public override void Parse(TextSource textSnapshot, WebDocument htmldoc, DomElement currentNode)
        {
            this._resultHtmlDoc = htmldoc;
            char[] copyBuffer = textSnapshot.ActualSnapshot.Copy(0, textSnapshot.ActualSnapshot.Length);
            using (var ms = new System.IO.MemoryStream(System.Text.Encoding.UTF8.GetBytes(copyBuffer)))
            using (var textReader = new System.IO.StreamReader(ms))
            {
                var tokenizer = new HtmlTokenizer(textReader);
                HtmlToken token;
                while (tokenizer.ReadNextToken(out token))
                {
                    switch (token.Kind)
                    {
                        case HtmlTokenKind.Data:
                            {
                                var text = (HtmlDataToken)token;
                                currentNode.AddChild(_resultHtmlDoc.CreateTextNode(text.Data.ToCharArray()));
                            }
                            break;
                        case HtmlTokenKind.Tag:
                            {
                                var tag = (HtmlTagToken)token;
                                if (!tag.IsEndTag)
                                {
                                    //open tag 
                                    DomElement elem = this._resultHtmlDoc.CreateElement(null, tag.Name);
                                    currentNode.AddChild(elem);
                                    foreach (var attribute in tag.Attributes)
                                    {
                                        var attr = this._resultHtmlDoc.CreateAttribute(null, attribute.Name);
                                        if (attribute.Value != null)
                                        {
                                            attr.Value = attribute.Value;
                                        }
                                        elem.AddAttribute(attr);
                                    }
                                    if (!tag.IsEmptyElement)
                                    {
                                        openEltStack.Push(currentNode);
                                        currentNode = elem;
                                    }
                                }
                                else
                                {
                                    //this is end tag
                                    //check end tag match or not
                                    int tagNameIndex = _resultHtmlDoc.AddStringIfNotExists(tag.Name);
                                    if (currentNode.Name == tag.Name)
                                    {
                                        currentNode = openEltStack.Pop();
                                    }
                                    else
                                    {
                                        //if not equal then check if current node need close tag or not
                                        int count = 3;//?
                                        bool ok = false;
                                        while (count > 0)
                                        {
                                            if (HtmlTagMatching.IsSingleTag(currentNode.LocalNameIndex))
                                            {
                                                if (openEltStack.Count > 0)
                                                {
                                                    currentNode = openEltStack.Pop();
                                                }
                                                if (currentNode.LocalName == tag.Name)
                                                {
                                                    if (openEltStack.Count > 0)
                                                    {
                                                        currentNode = openEltStack.Pop();
                                                        ok = true;
                                                        break;
                                                    }
                                                }
                                            }
                                            else if (HtmlTagMatching.CanAutoClose(currentNode.LocalNameIndex))
                                            {
                                                if (openEltStack.Count > 0)
                                                {
                                                    currentNode = openEltStack.Pop();
                                                }
                                                if (currentNode.LocalName == tag.Name)
                                                {
                                                    if (openEltStack.Count > 0)
                                                    {
                                                        currentNode = openEltStack.Pop();
                                                        ok = true;
                                                        break;
                                                    }
                                                }
                                            }
                                            else
                                            {
                                                //implement err handling here!
                                                throw new NotSupportedException();
                                            }
                                            count--;
                                        }
                                        if (!ok)
                                        {
                                            throw new NotSupportedException();
                                        }
                                    }
                                }
                            }
                            break;
                        case HtmlTokenKind.Comment:

                            break;
                        case HtmlTokenKind.DocType:

                            break;
                        default:
                            {
                            }
                            break;
                    }
                }
            }
        }
示例#5
0
        public void Parse(TextSource textSnapshot, WebDocument htmldoc, DomElement currentNode)
        {
            this.Parse(textSnapshot.ActualSnapshot, htmldoc, currentNode);

        }
示例#6
0
 public abstract void Parse(TextSource textSnapshot, WebDocument htmldoc, DomElement currentNode);
示例#7
0
        public override void Parse(TextSource textSnapshot, WebDocument htmldoc, DomElement currentNode)
        {
            this._resultHtmlDoc = htmldoc;
            char[] copyBuffer = textSnapshot.ActualSnapshot.Copy(0, textSnapshot.ActualSnapshot.Length);
            using (var ms = new System.IO.MemoryStream(System.Text.Encoding.UTF8.GetBytes(copyBuffer)))
                using (var textReader = new System.IO.StreamReader(ms))
                {
                    var       tokenizer = new HtmlTokenizer(textReader);
                    HtmlToken token;
                    while (tokenizer.ReadNextToken(out token))
                    {
                        switch (token.Kind)
                        {
                        case HtmlTokenKind.Data:
                        {
                            var text = (HtmlDataToken)token;
                            currentNode.AddChild(_resultHtmlDoc.CreateTextNode(text.Data.ToCharArray()));
                        }
                        break;

                        case HtmlTokenKind.Tag:
                        {
                            var tag = (HtmlTagToken)token;
                            if (!tag.IsEndTag)
                            {
                                //open tag
                                DomElement elem = this._resultHtmlDoc.CreateElement(null, tag.Name);
                                currentNode.AddChild(elem);
                                foreach (var attribute in tag.Attributes)
                                {
                                    var attr = this._resultHtmlDoc.CreateAttribute(null, attribute.Name);
                                    if (attribute.Value != null)
                                    {
                                        attr.Value = attribute.Value;
                                    }
                                    elem.AddAttribute(attr);
                                }
                                if (!tag.IsEmptyElement)
                                {
                                    openEltStack.Push(currentNode);
                                    currentNode = elem;
                                }
                            }
                            else
                            {
                                //this is end tag
                                //check end tag match or not
                                int tagNameIndex = _resultHtmlDoc.AddStringIfNotExists(tag.Name);
                                if (currentNode.Name == tag.Name)
                                {
                                    currentNode = openEltStack.Pop();
                                }
                                else
                                {
                                    //if not equal then check if current node need close tag or not
                                    int  count = 3;   //?
                                    bool ok    = false;
                                    while (count > 0)
                                    {
                                        if (HtmlTagMatching.IsSingleTag(currentNode.LocalNameIndex))
                                        {
                                            if (openEltStack.Count > 0)
                                            {
                                                currentNode = openEltStack.Pop();
                                            }
                                            if (currentNode.LocalName == tag.Name)
                                            {
                                                if (openEltStack.Count > 0)
                                                {
                                                    currentNode = openEltStack.Pop();
                                                    ok          = true;
                                                    break;
                                                }
                                            }
                                        }
                                        else if (HtmlTagMatching.CanAutoClose(currentNode.LocalNameIndex))
                                        {
                                            if (openEltStack.Count > 0)
                                            {
                                                currentNode = openEltStack.Pop();
                                            }
                                            if (currentNode.LocalName == tag.Name)
                                            {
                                                if (openEltStack.Count > 0)
                                                {
                                                    currentNode = openEltStack.Pop();
                                                    ok          = true;
                                                    break;
                                                }
                                            }
                                        }
                                        else
                                        {
                                            //implement err handling here!
                                            throw new NotSupportedException();
                                        }
                                        count--;
                                    }
                                    if (!ok)
                                    {
                                        throw new NotSupportedException();
                                    }
                                }
                            }
                        }
                        break;

                        case HtmlTokenKind.Comment:

                            break;

                        case HtmlTokenKind.DocType:

                            break;

                        default:
                        {
                        }
                        break;
                        }
                    }
                }
        }
示例#8
0
 public override void Parse(TextSource textSnapshot, WebDocument htmldoc, DomElement currentNode)
 {
     this.Parse(textSnapshot.ActualSnapshot, htmldoc, currentNode);
 }
示例#9
0
 public static string GetInternalText(TextSource textsource)
 {
     return new string(TextSnapshot.UnsafeGetInternalBuffer(textsource.actualSnapshot));
 }
示例#10
0
 public static string GetInternalText(TextSource textsource)
 {
     return(new string(TextSnapshot.UnsafeGetInternalBuffer(textsource.actualSnapshot)));
 }