public BookCardInfo Build(string content) { //reset mState = BookCardInfoBuilderState.TitleAuthor; mBookCardInfo = new BookCardInfo(); mContentAndIsbnTexts.Clear(); content = HREF_REGEX.Replace(content, ""); content = HtmlEntityCorrect.Encode(content); using (mReader = XmlReader.Create(new MemoryStream(Encoding.UTF8.GetBytes(content)))) { while (mReader.Read()) { switch (mReader.NodeType) { case XmlNodeType.Text: this.onText(HtmlEntityCorrect.Decode(mReader.Value)); break; case XmlNodeType.Element: this.onElement(HtmlEntityCorrect.Decode(mReader.Name)); break; } } } return(mBookCardInfo); }
public void Encode() { string test = "Post & Put & & & &"; test = HtmlEntityCorrect.Encode(test); Assert.AreEqual("Post & Put & & & &", test); test = HtmlEntityCorrect.Decode(test); Assert.AreEqual("Post & Put & & & &", test); }
private void onElementA() { switch (mState) { case BookCardInfoBuilderState.Publisher: mBookCardInfo.Publisher = HtmlEntityCorrect.Decode(mReader.ReadString()); mState = BookCardInfoBuilderState.ContentAndIsbn_Br; break; //遇到第二个a元素则内容和ISBN结束 case BookCardInfoBuilderState.ContentAndIsbn_0: mState = BookCardInfoBuilderState.ContentAndIsbn_1; break; case BookCardInfoBuilderState.ContentAndIsbn_1: getContentAndIsbn(); mState = BookCardInfoBuilderState.Other; break; } }
public List <DeptInfo> Build(string content) { //reset mDeptInfos = new List <DeptInfo>(); content = HtmlEntityCorrect.Encode(content); using (mReader = XmlReader.Create(new MemoryStream(Encoding.UTF8.GetBytes(content)))) { DeptInfo deptInfo = null; while (mReader.Read()) { if (mReader.NodeType == XmlNodeType.Element) { if (mReader.Name == "tr") { deptInfo = new DeptInfo(); mDeptInfos.Add(deptInfo); mState = DeptInfoBuilderState.DeptPlace; } else if (mReader.Name == "a") { deptInfo.DeptPlace = HtmlEntityCorrect.Decode(mReader.ReadString().Trim()); mState = DeptInfoBuilderState.Index; } else if (mReader.Name == "td") { switch (mState) { case DeptInfoBuilderState.Index: deptInfo.Index = HtmlEntityCorrect.Decode(mReader.ReadString()).Trim(); mState = DeptInfoBuilderState.Register; break; case DeptInfoBuilderState.Register: deptInfo.Register = HtmlEntityCorrect.Decode(mReader.ReadString()).Trim(); mState = DeptInfoBuilderState.Volume; break; case DeptInfoBuilderState.Volume: deptInfo.Volume = HtmlEntityCorrect.Decode(mReader.ReadString()).Trim(); mState = DeptInfoBuilderState.Year; break; case DeptInfoBuilderState.Year: deptInfo.Year = HtmlEntityCorrect.Decode(mReader.ReadString()).Trim(); mState = DeptInfoBuilderState.Status; break; case DeptInfoBuilderState.Status: deptInfo.Status = HtmlEntityCorrect.Decode(mReader.ReadString()).Trim(); mState = DeptInfoBuilderState.Type; break; case DeptInfoBuilderState.Type: deptInfo.Type = HtmlEntityCorrect.Decode(mReader.ReadString()).Trim(); mState = DeptInfoBuilderState.Other; break; default: break; } } } } } return(mDeptInfos); }
public static List <Book> Parse(string tbody) { List <Book> books = new List <Book>(); Book book = null; tbody = HtmlEntityCorrect.Encode(tbody); using (var reader = XmlReader.Create(new MemoryStream(Encoding.UTF8.GetBytes(tbody)))) { string currentElementName = null; int tdCount = 0; while (reader.Read()) { switch (reader.NodeType) { case XmlNodeType.Element: currentElementName = reader.Name; if (reader.Name == "tr") { book = new Book(); tdCount = 0; } else if (reader.Name == "td") { tdCount++; } else if (reader.Name == "a") { book.Url = reader.GetAttribute("href"); } break; case XmlNodeType.Text: if (currentElementName == "a") { book.Title = HtmlEntityCorrect.Decode(reader.Value); } else if (currentElementName == "td") { if (tdCount == 3) { book.Author = HtmlEntityCorrect.Decode(reader.Value); } else if (tdCount == 4) { try { book.Publisher = HtmlEntityCorrect.Decode(reader.Value); } catch (Exception e) { Console.WriteLine(e); } } else if (tdCount == 5) { book.PublishYear = HtmlEntityCorrect.Decode(reader.Value); } else if (tdCount == 6) { book.Index = HtmlEntityCorrect.Decode(reader.Value); } else if (tdCount == 7) { book.Total = Int32.Parse(HtmlEntityCorrect.Decode(reader.Value)); } else if (tdCount == 8) { book.Available = Int32.Parse(HtmlEntityCorrect.Decode(reader.Value)); } } break; case XmlNodeType.EndElement: if (reader.Name == "tr") { books.Add(book); } break; } } } return(books); }