Exemplo n.º 1
0
        private void GetNodeText(HtmlNode node, StringBuilder result, HtmlNodeCollection inputs)
        {
            var text = node.InnerText;

            if (!string.IsNullOrEmpty(text))
            {
                result.Append(text.Trim());
            }
            else
            {
                if (inputs.Contains(node))
                {
                    var att = node.Attributes["value"];
                    if (att != null)
                    {
                        result.Append(" " + att.Value.Trim());
                    }
                }
            }
        }
Exemplo n.º 2
0
        public string Parse(string html)
        {
            HtmlNode           root   = GetRoot(html);
            var                result = new StringBuilder();
            HtmlNodeCollection inputs = root.SelectNodes("//input");
            HtmlNodeCollection brs    = root.SelectNodes("//br");
            HtmlNodeCollection divs   = root.SelectNodes("//div");

            foreach (HtmlNode node in root.DescendantNodes())
            {
                if (!node.HasChildNodes)
                {
                    GetNodeText(node, result, inputs);
                }
                if (brs != null && brs.Contains(node) || (divs != null && divs.Contains(node)))
                {
                    result.AppendLine();
                }
            }

            return(result.ToString());
        }
Exemplo n.º 3
0
        private void ParseBookHtml(BookInfo bookInfo, string html)
        {
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(html);

            #region BookName
            var nodeList = doc.DocumentNode.SelectNodes("//span[@property='v:itemreviewed']");

            if (nodeList != null && nodeList.Count < 1)
            {
                return;
            }

            bookInfo.BookName = nodeList[0].InnerText;
            #endregion

            #region Info
            nodeList = doc.DocumentNode.SelectNodes("//div[@id='info']");
            HtmlNodeCollection infoes = null;
            if (nodeList != null && nodeList.Count >= 1)
            {
                infoes = nodeList[0].SelectNodes("//span[@class='pl']");
            }

            if (infoes != null && infoes.Count > 0)
            {
                foreach (var info in infoes)
                {
                    string key   = info.InnerText.Trim();
                    string value = string.Empty;

                    HtmlNode nextNode = info.NextSibling;
                    while (nextNode != null && !infoes.Contains(nextNode))
                    {
                        string innerContent = nextNode.InnerText.Trim();
                        if (!string.IsNullOrEmpty(innerContent))
                        {
                            if (string.IsNullOrEmpty(value))
                            {
                                value = innerContent;
                            }
                            else
                            {
                                value = string.Format("{0}{1}", value, innerContent);
                            }
                        }
                        nextNode = nextNode.NextSibling;
                    }

                    switch (key)
                    {
                    case "作者":
                        bookInfo.Author = value.TrimStart(':');
                        break;

                    case "出版社:":
                        bookInfo.Publisher = value;
                        break;

                    case "原作名:":
                        if (string.IsNullOrEmpty(bookInfo.Author))
                        {
                            bookInfo.Author = value;
                        }
                        break;

                    case "译者":
                        break;

                    case "出版年:":
                        bookInfo.PublishDate = value;
                        break;

                    case "页数:":
                        bookInfo.PageNum = value;
                        break;

                    case "定价:":
                        bookInfo.Price = value;
                        break;

                    case "ISBN:":
                        bookInfo.ISBN = value;
                        break;

                    default:
                        break;
                    }
                }
            }
            #endregion

            #region Score
            HtmlNode averageNode = doc.DocumentNode.SelectSingleNode("//strong[@property='v:average']");
            if (averageNode != null)
            {
                float average;
                if (float.TryParse(averageNode.InnerText, out average))
                {
                    bookInfo.AverageScore = average;
                }
            }

            HtmlNode voteNode = doc.DocumentNode.SelectSingleNode("//span[@property='v:votes']");
            if (voteNode != null)
            {
                int vote;
                if (int.TryParse(voteNode.InnerText, out vote))
                {
                    bookInfo.RatingNum = vote;
                }
            }

            var starNodeList = doc.DocumentNode.SelectNodes("//span[@class='rating_per']");
            int starNum      = 0;
            if (starNodeList != null)
            {
                foreach (HtmlNode starNode in starNodeList)
                {
                    if (starNum >= 5)
                    {
                        break;
                    }

                    string star = starNode.InnerText.TrimEnd('%');

                    float dStar;
                    if (float.TryParse(star, out dStar))
                    {
                        SetStar(bookInfo, starNum, dStar / 100f);
                    }
                    starNum++;
                }
            }
            #endregion

            #region Intro
            var introNodeList = doc.DocumentNode.SelectNodes("//div[@class='intro']");
            if (introNodeList != null && introNodeList.Count >= 2)
            {
                bookInfo.ContentDescription = introNodeList[0].InnerText;
                bookInfo.AuthorDescription  = introNodeList[1].InnerText;
            }
            #endregion

            #region Tag

            HtmlNodeCollection tagNodeLists     = doc.DocumentNode.SelectNodes("//a[@class='  tag']");
            StringBuilder      tagStringBuilder = new StringBuilder();
            if (tagNodeLists != null)
            {
                foreach (var tagNode in tagNodeLists)
                {
                    string tag = tagNode.InnerText.Trim();

                    if (!string.IsNullOrEmpty(tag))
                    {
                        tagStringBuilder.AppendFormat("{0},", tag);
                    }
                }
            }
            bookInfo.Tags = tagStringBuilder.ToString().TrimEnd(',');
            #endregion
        }