コード例 #1
0
        /// <summary>
        /// 在百度知道查询答案。
        /// </summary>
        /// <param name="sentence">要查询的句子</param>
        /// <param name="num">获取的答案数</param>
        /// <returns></returns>
        public static string[] getBaiduZhidaoAnswers(string sentence, int num = 10)
        {
            string url = string.Format("https://zhidao.baidu.com/search?word={0}", WebConnectHelper.UrlEncode(sentence));

            List <string> res  = new List <string>();
            string        html = WebConnectHelper.getData(url, Encoding.GetEncoding("gb2312"));
            HtmlDocument  hdoc = new HtmlDocument();

            hdoc.LoadHtml(html);

            try
            {
                var urls = hdoc.DocumentNode.SelectNodes("//a[@class=\"ti\"]");
                foreach (var aurl in urls)
                {
                    string dw       = ItemParser.removeBlank(aurl.GetAttributeValue("href", ""), true);
                    var    areslist = getBaiduZhidaoAnswersByUrl(dw);
                    if (areslist.Length > 0)
                    {
                        res.Add(areslist[0]);
                    }
                    // 暂时只查第一个
                    if (res.Count > 0)
                    {
                        break;
                    }
                }
            }
            catch { }

            return(res.ToArray());
        }
コード例 #2
0
        /// <summary>
        /// 从html文档中找出文本部分
        /// </summary>
        /// <param name="html"></param>
        /// <returns></returns>
        public static string[] getText(string html)
        {
            if (string.IsNullOrWhiteSpace(html) || html[0] != '<')
            {
                return(new string[] { ItemParser.removeUnText(html) });
            }
            List <string> res  = new List <string>();
            HtmlDocument  hdoc = new HtmlDocument();

            hdoc.LoadHtml(html);

            foreach (var node in hdoc.DocumentNode.ChildNodes)
            {
                if (node.NodeType == HtmlNodeType.Text)
                {
                    res.Add(ItemParser.removeUnText(node.InnerText));
                }
                //else if (node.Name == "br")
                //{
                //    res.Add("\r\n");
                //}
                else if (node.NodeType == HtmlNodeType.Element)
                {
                    var tmp = getText(node.InnerHtml);
                    foreach (var t in tmp)
                    {
                        res.Add(t);
                    }
                }
            }

            return(res.ToArray());
        }
コード例 #3
0
        /// <summary>
        /// 从百度知识图谱中寻找答案
        /// 基本就是把关键词放入百度搜索,然后看百度有没有智能返回的结果
        /// </summary>
        /// <param name="words"></param>
        /// <returns></returns>
        public static string[] getBaiduKGResult(string words)
        {
            List <string> reslist = new List <string>();
            string        askUrl  = "http://www.baidu.com/s?wd=" + WebConnectHelper.UrlEncode(words);
            string        html    = WebConnectHelper.getData(askUrl, Encoding.UTF8);
            HtmlDocument  hdoc    = new HtmlDocument();

            hdoc.LoadHtml(html);

            //统计数值
            try
            {
                string gdp = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_gdp_subtitle\"]").InnerText);
                reslist.Add(gdp);
            }
            catch { }

            //图谱常识
            try
            {
                string common = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_exactqa_s_answer\"]").InnerText);
                reslist.Add(common);
            }
            catch { }

            //股票
            try
            {
                string gp = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_stockweakdemand_cur_num c-gap - right - small\"]").InnerText);
                reslist.Add(gp);
                string gpzf = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_stockweakdemand_cur_info c-gap-icon-right-small\"]").InnerText);
                reslist.Add(gpzf);
            }
            catch { }

            //热线电话
            try
            {
                string rx = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_kefupoly_td2\"]").InnerText);
                reslist.Add(rx);
            }
            catch { }


            //翻译
            try
            {
                string trans = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_sp_fanyi_line_two\"]").InnerText);
                reslist.Add(trans);
            }
            catch { }

            //数学运算
            try
            {
                string trans = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@style=\"font-size:1.4em;line-height:22px;padding-bottom:2px;width:474px;\"]").InnerText);
                reslist.Add(trans);
            }
            catch { }

            //汇率换算
            try
            {
                string hl = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_exrate_result\"]").InnerText);
                reslist.Add(hl);
            }
            catch { }

            //单位换算
            try
            {
                string dw = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op-unit-result-single c-gap-left c-gap-right\"]").InnerText);
                reslist.Add(dw);
            }
            catch { }

            //邮编
            try
            {
                string dw = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_post_content \"]").InnerText);
                reslist.Add(dw);
            }
            catch { }


            // 百度知道最佳答案
            // 去所跳转的页面上找答案
            try
            {
                string dw      = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_best_answer_question_link\"]").GetAttributeValue("href", ""));
                var    answers = getBaiduZhidaoAnswers(dw)[0];
                reslist.Add(answers);
            }
            catch { }

            // 百度知道的推荐答案的第一个
            // 去所跳转的页面上找答案
            try
            {
                string dw      = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_generalqa_answer c-gap-bottom-small op_generalqa_answer_first\"]").ChildNodes[3].FirstChild.GetAttributeValue("href", ""));
                var    answers = getBaiduZhidaoAnswersByUrl(dw)[0];
                reslist.Add(answers);
            }
            catch { }

            // 百度日历上的日子
            // 由于它是js生成的日历,所以需要从js里正则匹配一下
            try
            {
                Regex  reg  = new Regex("\"selectday\":\"([^\"]*?)\"", RegexOptions.None);
                string date = reg.Match(html).Groups[1].Value;
                if (!string.IsNullOrWhiteSpace(date))
                {
                    reslist.Add(date);
                }
            }
            catch { }


            return(reslist.ToArray());
        }