/// <summary> /// 对html进行解析,得到每个信息的日期、时间、昵称、QQ号、email和内容。使用HtmlAgilityPack库解析html /// </summary> private void parseHtmlToMessages() { string date = null; var doc = new HtmlAgilityPack.HtmlDocument(); doc.Load(tmpDir + "/index.html", new System.Text.UTF8Encoding()); foreach (HtmlAgilityPack.HtmlNode td in doc.DocumentNode.SelectNodes("//td")) { // <tr><td style=border-bottom-width:1px;border-bottom-color:#8EC3EB;border-bottom-style:solid;color:#3568BB;font-weight:bold;height:24px;line-height:24px;padding-left:10px;margin-bottom:5px;>日期: 2012/4/11</td></tr> var style = td.Attributes["style"]; if (style != null && style.Value != null && td.InnerText != null && td.InnerText.StartsWith("日期")) { date = td.InnerText.SubstringAfter("日期: "); } // <tr><td><div style=color:#006EFE;padding-left:10px;><div style=float:left;margin-right:6px;>大魔头<[email protected]></div>12:28:18</div><div style=padding-left:20px;><font style="font-size:16pt;font-family:'华文楷体','MS Sans Serif',sans-serif;" color='000000'>看字节码干嘛。。。</font></div></td></tr> // <tr><td><div style=color:#006EFE;padding-left:10px;><div style=float:left;margin-right:6px;>519870018(519870018)</div>12:34:43</div><div style=padding-left:20px;><IMG src="{3D48C238-CD47-4d17-9C8F-3593C6D4738B}.dat"></div></td></tr> // <tr><td><div style=color:#42B475;padding-left:10px;><div style=float:left;margin-right:6px;>风自由(23246779)</div>20:11:16</div><div style=padding-left:20px;><font style="font-size:10pt;font-family:'微软雅黑','MS Sans Serif',sans-serif;" color='000000'>就我们两</font></div></td></tr> else { var firstDiv = td.FirstChild; string msgStyle = null; if (firstDiv != null) { var styleAttr = firstDiv.Attributes["style"]; if (styleAttr != null) { msgStyle = styleAttr.Value; } } if (msgStyle != null && msgStyle.Contains("color") && msgStyle.Contains("padding-left:10px;")) { QQMessage message = new QQMessage(); var name = td.FirstChild.FirstChild.InnerText.Replace("<", "<").Replace(">", ">").Replace("&", "&").Replace(" ", " "); if (name.EndsWith(">")) { message.nickname = name.SubstringBefore("<"); message.qqEmail = name.SubstringBetween("<", ">"); } else { message.nickname = name.SubstringBefore("("); message.qqNumber = name.SubstringBetween("(", ")"); } // 日期:2012/4/11 var culture = CultureInfo.CurrentCulture; message.date = Int32.Parse(DateTime.ParseExact(date, new string[] { "yyyy/M/d", "yyyy-M-d", "M/d/yyyy" }, culture, DateTimeStyles.None).ToString("yyyyMMdd")); // 12:28:18 message.time = Int32.Parse(DateTime.ParseExact(td.FirstChild.LastChild.InnerText, new string[] { "H:m:s" }, culture, DateTimeStyles.None).ToString("HHmmss")); message.content = td.LastChild.InnerHtml; messages.Add(message); } } } }
public bool Equals(QQMessage another) { return (qqNumber == another.qqNumber || qqEmail == another.qqEmail) && date == another.date && time == another.time && content == another.content; }
private bool checkMessage(List<QQMessage> messages, int fromIndex, QQMessage checkingMsg, out int index) { for (index = fromIndex; index < messages.Count; index++) { var message = messages[index]; if (checkingMsg.date < message.date || (checkingMsg.date == message.date && checkingMsg.time < message.time)) { break; } else if (checkingMsg.Equals(message)) { return false; } } return true; }