/// <summary> /// 在百度知道查询答案。 /// </summary> /// <param name="sentence">要查询的句子</param> /// <param name="num">获取的答案数</param> /// <returns></returns> public static string[] getBaiduZhidaoAnswers(string sentence, int num = 10) { string url = string.Format("https://zhidao.baidu.com/search?word={0}", WebConnectHelper.UrlEncode(sentence)); List <string> res = new List <string>(); string html = WebConnectHelper.getData(url, Encoding.GetEncoding("gb2312")); HtmlDocument hdoc = new HtmlDocument(); hdoc.LoadHtml(html); try { var urls = hdoc.DocumentNode.SelectNodes("//a[@class=\"ti\"]"); foreach (var aurl in urls) { string dw = ItemParser.removeBlank(aurl.GetAttributeValue("href", ""), true); var areslist = getBaiduZhidaoAnswersByUrl(dw); if (areslist.Length > 0) { res.Add(areslist[0]); } // 暂时只查第一个 if (res.Count > 0) { break; } } } catch { } return(res.ToArray()); }
/// <summary> /// 从html文档中找出文本部分 /// </summary> /// <param name="html"></param> /// <returns></returns> public static string[] getText(string html) { if (string.IsNullOrWhiteSpace(html) || html[0] != '<') { return(new string[] { ItemParser.removeUnText(html) }); } List <string> res = new List <string>(); HtmlDocument hdoc = new HtmlDocument(); hdoc.LoadHtml(html); foreach (var node in hdoc.DocumentNode.ChildNodes) { if (node.NodeType == HtmlNodeType.Text) { res.Add(ItemParser.removeUnText(node.InnerText)); } //else if (node.Name == "br") //{ // res.Add("\r\n"); //} else if (node.NodeType == HtmlNodeType.Element) { var tmp = getText(node.InnerHtml); foreach (var t in tmp) { res.Add(t); } } } return(res.ToArray()); }
/// <summary> /// 从百度知识图谱中寻找答案 /// 基本就是把关键词放入百度搜索,然后看百度有没有智能返回的结果 /// </summary> /// <param name="words"></param> /// <returns></returns> public static string[] getBaiduKGResult(string words) { List <string> reslist = new List <string>(); string askUrl = "http://www.baidu.com/s?wd=" + WebConnectHelper.UrlEncode(words); string html = WebConnectHelper.getData(askUrl, Encoding.UTF8); HtmlDocument hdoc = new HtmlDocument(); hdoc.LoadHtml(html); //统计数值 try { string gdp = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_gdp_subtitle\"]").InnerText); reslist.Add(gdp); } catch { } //图谱常识 try { string common = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_exactqa_s_answer\"]").InnerText); reslist.Add(common); } catch { } //股票 try { string gp = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_stockweakdemand_cur_num c-gap - right - small\"]").InnerText); reslist.Add(gp); string gpzf = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_stockweakdemand_cur_info c-gap-icon-right-small\"]").InnerText); reslist.Add(gpzf); } catch { } //热线电话 try { string rx = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_kefupoly_td2\"]").InnerText); reslist.Add(rx); } catch { } //翻译 try { string trans = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_sp_fanyi_line_two\"]").InnerText); reslist.Add(trans); } catch { } //数学运算 try { string trans = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@style=\"font-size:1.4em;line-height:22px;padding-bottom:2px;width:474px;\"]").InnerText); reslist.Add(trans); } catch { } //汇率换算 try { string hl = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_exrate_result\"]").InnerText); reslist.Add(hl); } catch { } //单位换算 try { string dw = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op-unit-result-single c-gap-left c-gap-right\"]").InnerText); reslist.Add(dw); } catch { } //邮编 try { string dw = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_post_content \"]").InnerText); reslist.Add(dw); } catch { } // 百度知道最佳答案 // 去所跳转的页面上找答案 try { string dw = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_best_answer_question_link\"]").GetAttributeValue("href", "")); var answers = getBaiduZhidaoAnswers(dw)[0]; reslist.Add(answers); } catch { } // 百度知道的推荐答案的第一个 // 去所跳转的页面上找答案 try { string dw = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_generalqa_answer c-gap-bottom-small op_generalqa_answer_first\"]").ChildNodes[3].FirstChild.GetAttributeValue("href", "")); var answers = getBaiduZhidaoAnswersByUrl(dw)[0]; reslist.Add(answers); } catch { } // 百度日历上的日子 // 由于它是js生成的日历,所以需要从js里正则匹配一下 try { Regex reg = new Regex("\"selectday\":\"([^\"]*?)\"", RegexOptions.None); string date = reg.Match(html).Groups[1].Value; if (!string.IsNullOrWhiteSpace(date)) { reslist.Add(date); } } catch { } return(reslist.ToArray()); }