public string[] getBaiduTiebaAnswers(string sentence, int num = 10) { string url = string.Format("http://tieba.baidu.com/f/search/res?ie=utf-8&qw=%20{0}", WebConnectActor.UrlEncode(sentence)); List <string> res = new List <string>(); string html = WebConnectActor.getData(url, Encoding.GetEncoding("gb2312")); HtmlDocument hdoc = new HtmlDocument(); hdoc.LoadHtml(html); try { HtmlNodeCollection tiezinodes = null; try { tiezinodes = hdoc.DocumentNode.SelectNodes("//div[@class=\"p_content\"]"); if (tiezinodes == null) { return(res.ToArray()); } int numm = 0; foreach (var node in tiezinodes) { string content = removeReplyWords(node.InnerText); content = string.Join("\r\n", getText(content)).Trim(); //bool useful = true; int dontmatch = 0; foreach (char c in sentence) { if (!content.Contains(c)) { dontmatch++; //useful = false; //break; } } if (dontmatch <= 2) { res.Add(content); numm++; if (numm >= num) { break; } } } } catch { } } catch { } return(res.ToArray()); }
/// <summary> /// 在百度知道查询答案。 /// </summary> /// <param name="sentence">要查询的句子</param> /// <param name="num">获取的答案数</param> /// <returns></returns> public string[] getBaiduZhidaoAnswers(string sentence, int num = 10) { List <string> res = new List <string>(); try { string url = $"https://zhidao.baidu.com/search?word={WebConnectActor.UrlEncode(sentence)}"; string html = WebConnectActor.getData(url, Encoding.GetEncoding("gb2312"), cookie); //FileIOActor.log(url); //FileIOActor.log(html); HtmlDocument hdoc = new HtmlDocument(); hdoc.LoadHtml(html); HtmlNode favurl = null; try { var node = hdoc.DocumentNode.SelectSingleNode("//dt[@class=\"dt mb-8\"]"); if (node != null) { favurl = node.ChildNodes[1]; } } catch (Exception ex) { FileIOActor.log(ex); } var urls = hdoc.DocumentNode.SelectNodes("//a[@class=\"ti\"]"); if (favurl != null) { urls.Insert(0, favurl); } foreach (var aurl in urls) { string dw = ItemParser.removeBlank(aurl.GetAttributeValue("href", ""), true); var areslist = getBaiduZhidaoAnswersByUrl(dw); if (areslist.Length > 0) { res.Add(areslist[0].Trim()); } if (res.Count > num) { break; } } } catch (Exception ex) { FileIOActor.log(ex); } return(res.ToArray()); }
void InitTkk() { string tkkHtml = string.Empty; string url = "https://translate.google.cn"; Hashtable headers = new Hashtable(); #if UNITY_ANDROID headers.Add("User-Agent", "Mozilla/5.0 (Linux; U; Android 2.2.1; zh-cn; HTC_Wildfire_A3333 Build/FRG83D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"); //android #else headers.Add("User-Agent", "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5"); //ios 哈希表的数据格式 #endif try { tkkHtml = WebConnectActor.getData(url, Encoding.UTF8, "", true); tkk = GetTkk(tkkHtml); } catch { } }
/// <summary> /// 获取翻译结果 /// </summary> /// <param name="src"></param> /// <returns></returns> public string Translation(string src, string to = "简体中文", string from = "自动") { InitTkk(); string tk = GetTK(src); string url = $"{getTranslateUrl(from, to)}&tk={tk}&q={UrlEncode(src)}"; //FileIOActor.log(url); //return url; string httpresult = WebConnectActor.getData(url, Encoding.UTF8, "", true); FileIOActor.log(httpresult); try { string res = ""; JArray jo = (JArray)JsonConvert.DeserializeObject(httpresult); //JObject jo = JObject.Parse(httpresult); //FileIOActor.log(jo[0].ToString()); int resnum = jo[0].Count(); if (resnum >= 1) { foreach (var item in jo[0]) { //FileIOActor.log(item.ToString()); //FileIOActor.log(item[0].ToString()); res += item[0].ToString() + Environment.NewLine; } } return(res.Trim()); } catch (Exception e) { FileIOActor.log(e.Message + "\r\n" + e.StackTrace); } ////正则获取结果集 //int begin = httpresult.IndexOf("[[[\"") + 4; //int end = httpresult.IndexOf("\",\""); //try //{ // httpresult = httpresult.Substring(begin, end - begin); //} //catch { } return(httpresult); }
public List <LiveInfo> getLiveInfos(int parea, int area, int maxpage = -1) { int page = 1; string sort = "online"; int sum = 0; int sumindex = 0; List <LiveInfo> infos = new List <LiveInfo>(); do { string url = $"https://api.live.bilibili.com/room/v3/area/getRoomList?platform=web&parent_area_id={parea}&cate_id=0&area_id={area}&sort_type={sort}&page={page}&page_size=30&tag_version=1"; string resstr = WebConnectActor.getData(url, Encoding.UTF8); JObject jo = JObject.Parse(resstr); sum = int.Parse(jo["data"]["count"].ToString()); int num = jo["data"]["list"].Count(); for (int i = 0; i < num; i++) { try { LiveInfo info = new LiveInfo(); info.roomid = int.Parse(jo["data"]["list"][i]["roomid"].ToString()); info.uid = int.Parse(jo["data"]["list"][i]["uid"].ToString()); info.uname = jo["data"]["list"][i]["uname"].ToString(); info.online = int.Parse(jo["data"]["list"][i]["online"].ToString()); info.title = jo["data"]["list"][i]["title"].ToString(); infos.Add(info); } catch { } } sumindex += num; page += 1; if (page == maxpage) { break; } } while (sumindex < sum); return(infos); }
/// <summary> /// 暂时不可用 /// </summary> /// <param name="question"></param> /// <returns></returns> public string getAsklibResult(string question) { string url = string.Format("http://www.asklib.com/s/{0}", WebConnectActor.UrlEncode(question)); string res = ""; //List<string> res = new List<string>(); string html = WebConnectActor.getData(url, Encoding.UTF8); HtmlDocument hdoc = new HtmlDocument(); hdoc.LoadHtml(html); try { HtmlNode favurl = null; try { //res = html; return res; favurl = hdoc.DocumentNode.SelectSingleNode("//div[@class=\"p15 right\"]").ChildNodes[1]; url = ItemParser.removeBlank(favurl.GetAttributeValue("href", ""), true); url = "http://www.asklib.com/" + url; html = WebConnectActor.getData(url, Encoding.UTF8); hdoc = new HtmlDocument(); hdoc.LoadHtml(html); var tmp = getText(hdoc.DocumentNode.SelectSingleNode("//div[@class=\"listtip\"]").InnerHtml); StringBuilder sb = new StringBuilder(); foreach (var t in tmp) { if (!string.IsNullOrWhiteSpace(t.Trim())) { sb.Append(t + "\r\n"); } } sb.Replace("\r\n\r\n", "\r\n"); res = sb.ToString(); } catch { } } catch { } return(res); }
public string getRoomInfo(string roomid) { string url = "https://live.bilibili.com/" + roomid; string html = WebConnectActor.getData(url, Encoding.UTF8); HtmlDocument hdoc = new HtmlDocument(); hdoc.LoadHtml(html); //统计数值 try { string info = ""; //string title = hdoc.DocumentNode.SelectSingleNode("//*[@id=\"link-app-title\"]").InnerText; if (html.Contains("__NEPTUNE_IS_MY_WAIFU__")) { int begin = html.LastIndexOf("__NEPTUNE_IS_MY_WAIFU__") + 24; int end = html.LastIndexOf("}") + 1; //FileIOActor.log("begin "+begin); //FileIOActor.log("end " + end); if (begin < end) { string json = html.Substring(begin, end - begin); //FileIOActor.log(json); try { JObject j = JObject.Parse(json); string status = j["roomInitRes"]["data"]["live_status"].ToString(); string title = j["baseInfoRes"]["data"]["title"].ToString(); string timeSpendStr = ""; long beginTimelong = long.Parse(j["roomInitRes"]["data"]["live_time"].ToString()); if (beginTimelong > 1000000000) { //FileIOActor.log("beginTimelong " + beginTimelong); DateTime beginTime = SecondsToDateTime(beginTimelong); //FileIOActor.log("beginTime " + beginTime.ToString("yyyyMMdd HHmmss")); DateTime nowTime = DateTime.Now; // FileIOActor.log("nowTime " + nowTime.ToString("yyyyMMdd HHmmss")); var timespend = nowTime - beginTime; if (timespend.Days > 0) { timeSpendStr += $"{timespend.Days}天"; } if (timespend.Hours > 0) { timeSpendStr += $"{timespend.Hours}小时"; } if (timespend.Minutes > 0) { timeSpendStr += $"{timespend.Minutes}分钟"; } if (timeSpendStr.Length <= 0) { timeSpendStr = "刚不到一分钟"; } } else { timeSpendStr = "不知道多长时间"; } //FileIOActor.log("begin ? "+ timeSpendStr); string areaName = j["baseInfoRes"]["data"]["area_name"].ToString(); int online = int.Parse(j["baseInfoRes"]["data"]["online"].ToString()); if (status == "1") { // live open info = $"正在{areaName}区播 {title},人气{online},播了{timeSpendStr}"; } else { // live close info = "没播"; } } catch (Exception e1) { FileIOActor.log(e1); } } } return(info); } catch (Exception e) { FileIOActor.log(e.Message + "\r\n" + e.StackTrace); } return(""); }
//public static string[] getSearchResult(string words, int pagenum = 10) //{ // List<string> reslist = new List<string>(); // for (int i = 0; i < pagenum; i++) // { // string askUrl = "http://www.baidu.com/s?wd=" + WebConnectHelper.UrlEncode(words) + "&pn=" + (i * 10); // string res = WebConnectHelper.getData(askUrl, Encoding.UTF8); // res = res.Replace("\n", "").Replace("\r", ""); // HtmlDocument hdoc = new HtmlDocument(); // hdoc.LoadHtml(res); // HtmlNodeCollection collection = hdoc.DocumentNode.SelectNodes("//*[@class=\"c-abstract\"]"); // if (collection != null) // { // foreach (HtmlNode node in collection) // { // reslist.Add(node.InnerText); // } // collection = hdoc.DocumentNode.SelectNodes("//*[@class=\"t\"]"); // foreach (HtmlNode node in collection) // { // reslist.Add(node.InnerText); // } // } // else // { // return null; // } // } // return reslist.ToArray(); //} public string[] getWebsiteAnswer(string question) { List <string> answer = new List <string>(); string askUrl = "http://www.baidu.com/s?wd=" + WebConnectActor.UrlEncode(question); string res = WebConnectActor.getData(askUrl, Encoding.UTF8, cookie); res = res.Replace("\n", "").Replace("\r", "").Replace(" ", ""); Regex reg = new Regex("class=\"op_exactqa_s_answer\">(.*?)</div>"); if (reg.IsMatch(res)) { //说明百度首页给出了智能答案 res = reg.Match(res).Groups[1].ToString(); reg = new Regex("target=\"_blank\">(.*?)</a>"); if (reg.IsMatch(res)) { res = reg.Match(res).Groups[1].ToString(); answer.Add(res); } } else { //判断是否是百度统计相关答案 reg = new Regex("<p class='op_gdp_subtitle'>(.*?)</p>"); if (reg.IsMatch(res)) { res = reg.Match(res).Groups[1].ToString(); answer.Add(res); } else { //判断是否是计算题答案 reg = new Regex("line-height:22px;padding-bottom:2px;width:474px;\">(.*?)</div>"); if (reg.IsMatch(res)) { res = reg.Match(res).Groups[1].ToString().Replace(" ", " "); answer.Add(res); } else { string tmpstr = "正在百度问题:" + question; answer.Add(tmpstr); //去百度知道查一波 askUrl = "http://zhidao.baidu.com/search?word=" + question; res = WebConnectActor.getData(askUrl, Encoding.Default, cookie); res = res.Replace("\n", "").Replace("\r", "").Replace(" ", ""); //如果rank较低就舍弃 reg = new Regex("data-rank=\"(.*?)\""); if (reg.IsMatch(res)) { string rank = reg.Match(res).Groups[1].ToString().Split(':')[0]; int rankvalue = Int32.Parse(rank); if (rankvalue <= 500) { //rank太低了,不再查询答案。 //return false; } } reg = new Regex("data-log-area=\"list\">(.*?)</a>"); if (reg.IsMatch(res)) { res = reg.Match(res).Groups[1].ToString(); reg = new Regex("href=\"(.*?)\""); if (reg.IsMatch(res)) { //从知道首页找到最接近的答案的url askUrl = reg.Match(res).Groups[1].ToString(); res = WebConnectActor.getData(askUrl, Encoding.Default, cookie).Replace("\n", "").Replace("\r", "").Replace(" ", ""); Regex[] regs = new Regex[] { //被采纳答案 new Regex("wgt-best(.*?)i-quality-icon"), new Regex("wgt-best(.*?)answer-share-widget"), //尝试优质答案 new Regex("quality-content-detailcontent\">(.*?)</div>"), //尝试网友推荐答案 new Regex("wgt-recommend(.*?)i-quality-icon") }; bool ismatch = false; foreach (var treg in regs) { if (treg.IsMatch(res)) { //tmpOutputSentence.Add(res); res = treg.Match(res).Groups[1].ToString(); ismatch = true; break; } } if (ismatch) { reg = new Regex("<pre(.*?)>(.*?)</pre>"); if (reg.IsMatch(res)) { res = reg.Match(res).Groups[2].ToString(); res = res.Replace("<br>", "\r\n"); res = res.Replace("<br/>", "\r\n"); res = replaceImageWords(res); answer.Add(res); } } //{ // reg = ; // if (reg.IsMatch(res)) // { // res = reg.Match(res).Groups[1].ToString(); // res = res.Replace("<br>", "\r\n"); // res = res.Replace("<br/>", "\r\n"); // res = replaceImageWords(res); // tmpOutputSentence.Add(res); // isa = true; // } // else // { // ; // if (reg.IsMatch(res)) // { // res = reg.Match(res).Groups[1].ToString(); // reg = new Regex("<pre(.*?)>(.*?)</pre>"); // if (reg.IsMatch(res)) // { // res = reg.Match(res).Groups[2].ToString(); // res = res.Replace("<br>", "\r\n"); // res = res.Replace("<br/>", "\r\n"); // res = replaceImageWords(res); // tmpOutputSentence.Add(res); // isa = true; // } // } // } //} } } } } } return(answer.ToArray()); }
//public string[] getBaiduBaikeAnswer(string sentence) //{ // string url = string.Format("https://baike.baidu.com/item/{0}", WebConnectActor.UrlEncode(sentence)); // List<string> res = new List<string>(); // string html = WebConnectActor.getData(url, Encoding.GetEncoding("gb2312")); // HtmlDocument hdoc = new HtmlDocument(); // hdoc.LoadHtml(html); //} /// <summary> /// 根据百度知道的页面来查找是否有最佳答案或者用户认可答案之类的 /// </summary> /// <param name="url"></param> /// <returns></returns> public string[] getBaiduZhidaoAnswersByUrl(string url) { List <string> res = new List <string>(); // 弱智百度的编码是gb2312 string html = WebConnectActor.getData(url, Encoding.GetEncoding("gb2312"), cookie); HtmlDocument hdoc = new HtmlDocument(); hdoc.LoadHtml(html); // 新版最佳答案 try { var node = hdoc.DocumentNode.SelectSingleNode("//*[@class=\"best-text mb-10\"]"); if (node != null) { string dw = node.InnerHtml; dw = replaceImageWords(dw).Trim(); var tmp = getText(dw); StringBuilder sb = new StringBuilder(); foreach (var t in tmp) { if (!string.IsNullOrWhiteSpace(t.Trim())) { sb.Append(t + "\r\n"); } } string[] watermark = new string[] { "百", "度", "知", "道", "问", "答", "来", "自", "内", "容", "版", "权", "专", "属", "zhidao", "源", "copy", "抄", "袭", "zd" }; foreach (var wm in watermark) { sb = sb.Replace("\r\n" + wm + "\r\n", ""); } Regex rg = new Regex("[a-f0-9]{50}"); sb = sb.Replace("\r\n\r\n", "\r\n"); sb = sb.Replace("\r\n\r\n", "\r\n"); string r = sb.ToString(); var rgr = rg.Matches(r); if (rgr.Count > 0) { foreach (Match m in rgr) { r = r.Replace(m.Groups[0].ToString(), ""); } } res.Add(sb.ToString()); } } catch (Exception ex) { FileIOActor.log(ex); } return(res.ToArray()); //res = reg.Match(res).Groups[1].ToString(); //reg = new Regex("href=\"(.*?)\""); //if (reg.IsMatch(res)) //{ // //从知道首页找到最接近的答案的url // askUrl = reg.Match(res).Groups[1].ToString(); // res = WebConnectHelper.getData(askUrl).Replace("\n", "").Replace("\r", "").Replace(" ", ""); // Regex[] regs = new Regex[]{ // //被采纳答案 // new Regex("wgt-best(.*?)i-quality-icon"), // new Regex("wgt-best(.*?)answer-share-widget"), // //尝试优质答案 // new Regex("quality-content-detailcontent\">(.*?)</div>"), // //尝试网友推荐答案 // new Regex("wgt-recommend(.*?)i-quality-icon") // }; // bool ismatch = false; // foreach (var treg in regs) // { // if (treg.IsMatch(res)) // { // //tmpOutputSentence.Add(res); // res = treg.Match(res).Groups[1].ToString(); // ismatch = true; // break; // } // } // if (ismatch) // { // reg = new Regex("<pre(.*?)>(.*?)</pre>"); // if (reg.IsMatch(res)) // { // res = reg.Match(res).Groups[2].ToString(); // res = res.Replace("<br>", "\r\n"); // res = res.Replace("<br/>", "\r\n"); // res = replaceImageWords(res); // answer.Add(res); // } // } //} }
/// <summary> /// 从百度知识图谱中寻找答案 /// 基本就是把关键词放入百度搜索,然后看百度有没有智能返回的结果 /// </summary> /// <param name="words"></param> /// <returns></returns> public string[] getBaiduKGResult(string words) { List <string> reslist = new List <string>(); string askUrl = "https://www.baidu.com/s?ie=utf-8&wd=" + WebConnectActor.UrlEncode(words); string html = WebConnectActor.getData(askUrl, Encoding.UTF8, cookie); //var html1 = HttpUtility.UrlDecode(html); //var html2 = Regex.Unescape(html); //FileIOActor.log(askUrl); //FileIOActor.log(html); HtmlDocument hdoc = new HtmlDocument(); hdoc.LoadHtml(html); HtmlNode tnode = null; //统计数值 try { tnode = hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_gdp_subtitle\"]"); if (tnode != null) { string gdp = parser.removeUnText(tnode.InnerText); reslist.Add(gdp); } } catch { } //图谱常识 try { tnode = hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_exactqa_s_answer\"]"); if (tnode != null) { string common = parser.removeUnText(tnode.InnerText); reslist.Add(common); } } catch { } //股票 //try //{ // string gp = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_stockweakdemand_cur_num c-gap-right-small\"]").InnerText); // reslist.Add(gp); // string gpzf = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_stockweakdemand_cur_info c-gap-icon-right-small\"]").InnerText); // reslist.Add(gpzf); //} //catch { } try { tnode = hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op-stockdynamic-moretab-cur-num c-gap-right-small\"]"); if (tnode != null) { string gp = parser.removeUnText(tnode.InnerText); reslist.Add(gp); tnode = hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op-stockdynamic-moretab-cur-unit\"]"); if (tnode != null) { string gpzf = tnode.InnerText.Trim(); tnode = hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op-stockdynamic-moretab-cur\"]"); if (tnode != null) { gpzf = tnode.InnerText.Trim(); foreach (var s in new string[] { "美元", "元", "镑" }) { if (gpzf.Contains(s)) { gpzf = s; break; } } reslist.Add(gpzf); } } } } catch { } //热线电话 try { tnode = hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_kefupoly_td2\"]"); if (tnode != null) { string rx = parser.removeUnText(tnode.InnerText); reslist.Add(rx); } } catch { } ////翻译 //try //{ // string trans = parser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_sp_fanyi_line_two\"]").InnerText); // reslist.Add(trans); //} //catch { } //数学运算 try { tnode = hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_new_val_screen_result\"]"); if (tnode != null) { string trans = parser.removeUnText(tnode.InnerText); //string trans = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@style=\"font-size:1.4em;line-height:22px;padding-bottom:2px;width:474px;\"]").InnerText); reslist.Add(trans.Trim()); } } catch { } //汇率换算 try { tnode = hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_exrate_result\"]"); if (tnode != null) { string hl = parser.removeUnText(tnode.InnerText); reslist.Add(hl); } } catch { } //单位换算 try { var res = Regex.Match(html, " tab\\:(.*?),\\\n rank", RegexOptions.Singleline).Groups[1].Value.Trim(); string num = Regex.Match(res, "numres\\\":\\\"(.*?)\\\",").Groups[1].Value.Trim(); string dw = Regex.Match(res, "to_syn\\\":\\\"(.*?)\\\",").Groups[1].Value.Trim(); dw = Regex.Unescape(dw); if (num.Length <= 0) { throw new Exception("not answer"); } //string dw = ItemParser.removeUnText(hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op-unit-result c-clearfix\"]").InnerText); //reslist.Add(dw); reslist.Add(num + dw); } catch { } //邮编 try { tnode = hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_post_content \"]"); if (tnode != null) { string dw = parser.removeUnText(tnode.InnerText); reslist.Add(dw); } } catch { } // 百度知道最佳答案 // 去所跳转的页面上找答案 try { tnode = hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_best_answer_question_link\"]"); if (tnode != null) { string dw = parser.removeUnText(tnode.GetAttributeValue("href", "")); var answers = getBaiduZhidaoAnswers(dw)[0]; reslist.Add(answers); } } catch { } // 百度知道的推荐答案的第一个 // 去所跳转的页面上找答案 try { tnode = hdoc.DocumentNode.SelectSingleNode("//*[@class=\"op_generalqa_answer c-gap-bottom-small op_generalqa_answer_first\"]"); if (tnode != null) { string dw = parser.removeUnText(tnode.ChildNodes[3].FirstChild.GetAttributeValue("href", "")); var answers = getBaiduZhidaoAnswersByUrl(dw)[0]; reslist.Add(answers); } } catch { } // 百度日历上的日子 // 由于它是js生成的日历,所以需要从js里正则匹配一下 try { Regex reg = new Regex("\"selectday\":\"([^\"]*?)\"", RegexOptions.None); string date = reg.Match(html).Groups[1].Value; if (!string.IsNullOrWhiteSpace(date)) { reslist.Add(date); } } catch { } return(reslist.ToArray()); }