public List <level1link> Query(searchkeyword tsk, keyword businessKeyword, List <keyword> businessKeywords, List <keyword> excludedKeywords) { List <level1link> result = new List <level1link>(); var links = get_urls(tsk, businessKeyword); if (links == null || links.Count == 0) { return(null); } foreach (var link in links) { try { GetLinks(link, tsk, businessKeyword, businessKeywords, excludedKeywords); //if (list != null && list.Count > 0) // result.AddRange(list); } catch (Exception ex) { log(ex.Message + ex.StackTrace); } } return(result); }
/// <summary> /// Converts to keyword model. /// </summary> /// <param name="item">The item.</param> /// <returns></returns> internal static KeywordModel ConvertToKeywordModel(keyword item) { return(new KeywordModel() { Id = item.Id, Name = item.KeywordName }); }
public ActionResult Keyword_cloud() { string query = "select top 30 document_count from sys.dm_fts_index_keywords( DB_ID('ServicesData'), OBJECT_ID('offer') ) where display_term!='End of file' and column_id=6 order by document_count desc"; string query1 = "select top 30 display_term from sys.dm_fts_index_keywords( DB_ID('ServicesData'), OBJECT_ID('offer') ) where display_term!='End of file' and column_id=6 order by document_count desc"; var doc_count = db.Database.SqlQuery <Int64>(query).ToList(); var terms = db.Database.SqlQuery <string>(query1).ToList(); keyword keywords = new keyword(); keywords.document_count.AddRange(doc_count); keywords.terms.AddRange(terms); return(View(keywords)); }
/// <summary> /// 关键字 /// </summary> /// <returns></returns> public ActionResult keywords() { List <keyword> lists = new List <keyword>() { }; keyword kw1 = new keyword { id = 0, type = "kw", title = "Scratch", biaozhi = "lesson" }; keyword kw3 = new keyword { id = 0, type = "kw", title = "人工智能", biaozhi = "lesson" }; lists.Add(kw3); keyword kw4 = new keyword { id = 0, type = "kw", title = "信息学奥赛", biaozhi = "lesson" }; lists.Add(kw4); keyword kw2 = new keyword { id = 1, type = "detail", title = "实战京东物流车", biaozhi = "lesson" }; lists.Add(kw2); return(Json(lists, JsonRequestBehavior.AllowGet)); }
public async Task <string> GetForm(FormData formData) { var result = "waiting"; if (formData.kw.Contains('@')) { result = "All sent"; try { await AsyncProcess(formData.kw.Trim(), "*****@*****.**", formData.sessionid).ConfigureAwait(false); } catch (Exception ex) { result = ex.Message; return(result); } } else { var kw = new keyword() { Id = Guid.NewGuid().ToString(), kw = formData.kw, nosignkw = removeVietnameseSign(formData.kw).ToLower(), volume = Int32.Parse(formData.vl), landingpage = formData.lp, currentranking = Int32.Parse(formData.cr), kd = Int32.Parse(formData.kd), cost = Int32.Parse(formData.cost), sessionid = formData.sessionid, stt = Int32.Parse(formData.stt) }; db.keywords.Add(kw); db.SaveChanges(); } return(result); }
List <string> get_urls(searchkeyword tsk, keyword businessKeywords) { var searchKeywords = tsk.Keyword.RemoveSpace().GetLower().Split(';'); List <string> kws = new List <string>(); foreach (var t in searchKeywords) { if (string.IsNullOrEmpty(t)) { continue; } kws.Add("{0}{1}".FormatStr(t, businessKeywords.Txt)); } string baiduUrlFormat = "http://www.baidu.com/s?ie=utf-8&wd={0}"; return(kws.Where(x => !string.IsNullOrEmpty(x.GetTrimed())) .Distinct().Select(x => x.GetUrlEncodedString("utf-8")) .Select(x => baiduUrlFormat.FormatStr(x)).ToList()); }
private static string LocalReturnFun(string allCode) { StringBuilder result = new StringBuilder(); Stack <keyword> keywordStack = new Stack <keyword>(); StringReader sr = new StringReader(allCode); int localFunCount = 0; int localVarCount = 0; //int localTBCount = 0; bool canStackPush = false; while (sr.Peek() != -1) { string line = sr.ReadLine(); #region key word bool containEnd = CheckIsEndLine(line); bool containFunction = CheckIsFunctionLine(line); bool containFor = CheckIsForLine(line); bool containIf = CheckIsIfLine(line); bool containWhile = CheckIsWhileLine(line); bool containReturn = CheckIsReturnLine(line); bool containLeft = CheckIsLeftTable(line); #endregion keyword lastPop = keyword.kif; if (canStackPush) { if (containFunction) { keywordStack.Push(keyword.kfunction); } if (containFor) { keywordStack.Push(keyword.kfor); } if (containIf) { keywordStack.Push(keyword.kif); } if (containWhile) { keywordStack.Push(keyword.kwhile); } if (containEnd && keywordStack.Count > 0) { lastPop = keywordStack.Pop(); } } if (containReturn) { if (containFunction) { returnFunName = "Profiler_Return_FunVar" + localFunCount++; canStackPush = true; keywordStack.Push(keyword.kfunction); line = Regex.Replace(line, @"(?<=(^|\s))return(?=(\s|\(|$))", ReplaceReturnFun); } else { if (line.Trim() != "return") { returnVarName = "Profiler_Return_Var" + localVarCount++ + "," + "Profiler_Return_Var" + localVarCount++ + "," + "Profiler_Return_Var" + localVarCount++ + "," + "Profiler_Return_Var" + localVarCount++ + "," + "Profiler_Return_Var" + localVarCount++; //没办法保证一个函数到底返回几个数,所以就搞了5个返回值 line = Regex.Replace(line, @"(?<=(^|\s))return(?=(\s|\(|$))", ReplaceReturnVar).TrimEnd(); if (containLeft) { line += "\r\n" + GetReturnTable(sr); } line += "\r\nreturn " + returnVarName; returnVarName = string.Empty; } } } else if (containEnd && lastPop == keyword.kfunction && keywordStack.Count <= 0) { line = line + "\r\nreturn " + returnFunName; returnFunName = string.Empty; canStackPush = false; } ApeendCrLine(result, line); } return(result.ToString()); }
private static string PrettySpace(string allCode) { StringBuilder sb = new StringBuilder(); StringReader sr = new StringReader(allCode); Stack <keyword> keywordStack = new Stack <keyword>(); int lineIndex = 0; while (sr.Peek() != -1) { string line = sr.ReadLine().Trim(); if (string.IsNullOrEmpty(line)) { continue; } lineIndex++; //bool containLocal = CheckIsLocalLine(line); bool containFunction = CheckIsFunctionLine(line); bool containFor = CheckIsForLine(line); bool containIf = CheckIsIfLine(line); bool containWhile = CheckIsWhileLine(line); bool containEnd = CheckIsEndLine(line); bool containLeft = CheckIsLeftTable(line); bool containRight = CheckIsRightTable(line); keyword popValue = keyword.knull; if ((containEnd || (containRight && !containLeft)) && !containIf && keywordStack.Count > 0) { popValue = keywordStack.Pop(); } int count = keywordStack.Count; if (Regex.IsMatch(line, @"(?<=(^|\s))else(?=(\s|$))")) { count--; } else if (Regex.IsMatch(line, @"(?<=(^|\s))elseif(?=(\s|$))")) { count--; } line = new string(' ', 4 * Math.Max(0, count)) + line; ApeendCrLine(sb, line.Replace("line:%d", string.Format("line:{0}", lineIndex))); if (containEnd && !containIf && popValue != keyword.kif) { ApeendCrLine(sb, ""); lineIndex++; } #region key word if (containFunction) { keywordStack.Push(keyword.kfunction); //if (containFunction && !containLocal) keywordStack.Push(keyword.kfunction); } if (containFor) { keywordStack.Push(keyword.kfor); } if (containIf && !containEnd) { keywordStack.Push(keyword.kif); } if (containWhile) { keywordStack.Push(keyword.kwhile); } if (containLeft && !containRight) { keywordStack.Push(keyword.kleftTable); } #endregion } return(sb.ToString()); }
private static string ParseLua(string fileName, string allCode) { #region file profiler StringBuilder sb = new StringBuilder(); string format = ""; #endregion allCode = TrimComment(allCode); allCode = NewLineFunction(allCode); allCode = PrettyReturn(allCode); allCode = PrettyTable(allCode); allCode = PrettyOrAnd(allCode); allCode = PrettyCalSig(allCode); allCode = PrettyThen(allCode); allCode = PrettyEnd(allCode); allCode = LocalReturnFun(allCode); StringReader sr = new StringReader(allCode); Stack <keyword> keywordStack = new Stack <keyword>(); bool beginSample = false; bool needEndSample = true; format = "CS.MikuLuaProfiler.LuaProfiler.BeginSample(\"{0}\")"; ApeendCrLine(sb, string.Format(format, "require " + fileName)); bool needEndFileSample = true; while (sr.Peek() != -1) { string line = sr.ReadLine(); #region key word bool containEnd = CheckIsEndLine(line); bool containFunction = CheckIsFunctionLine(line); bool containFor = CheckIsForLine(line); bool containIf = CheckIsIfLine(line); bool containWhile = CheckIsWhileLine(line); bool containReturn = CheckIsReturnLine(line); if (containFunction) { keywordStack.Push(keyword.kfunction); } if (containFor) { keywordStack.Push(keyword.kfor); } if (containIf) { keywordStack.Push(keyword.kif); } if (containWhile) { keywordStack.Push(keyword.kwhile); } if (containReturn) { var keyArray = keywordStack.ToArray(); for (int i = 0, imax = keyArray.Length; i < imax; i++) { if (keyArray[i] == keyword.kfunction) { needEndSample = false; break; } else if (keyArray[i] == keyword.kif) { needEndSample = true; break; } } } keyword lastPop = keyword.kif; if (containEnd && keywordStack.Count > 0) { lastPop = keywordStack.Pop(); } #endregion #region add profiler if (containFunction) { needEndSample = true; beginSample = true; format = "CS.MikuLuaProfiler.LuaProfiler.BeginSample(\"{0}\")"; string funName = GetFunName(line, fileName); ApeendCrLine(sb, line); ApeendCrLine(sb, string.Format(format, funName)); } else if (containEnd && lastPop == keyword.kfunction && beginSample && needEndSample) { ApeendCrLine(sb, "CS.MikuLuaProfiler.LuaProfiler.EndSample()"); ApeendCrLine(sb, line); beginSample = keywordStack.Count > 0; } else if (containReturn) { line = line.Replace("return", "CS.MikuLuaProfiler.LuaProfiler.EndSample()\r\nreturn"); ApeendCrLine(sb, line); if (!keywordStack.Contains(keyword.kfunction)) { needEndFileSample = false; } } else { ApeendCrLine(sb, line); } if (containEnd) { ApeendCrLine(sb, ""); } #endregion } if (needEndFileSample) { ApeendCrLine(sb, "\r\nCS.MikuLuaProfiler.LuaProfiler.EndSample()"); } string code = sb.ToString().Replace("{\r\n", "{"); code = code.Replace("\r\n}", "}"); code = RollBackString(code); return(PrettySpace(code)); }
void GetLinks(string link, searchkeyword tsk, keyword businessKeyword, List <keyword> businessKeywords, List <keyword> excludedKeywords) { BotMng botmng = BotMng.Instance; AppType img = (AppType)tsk.AppType; string[] searchKeywords = tsk.Keyword.GetLower().RemoveSpace().Split(';'); List <KeywordScore> patterns = businessKeywords.Select(x => new KeywordScore { Keyword = x.Txt, Score = x.Score, BizType = x.BizType }).ToList(); string[] bizPatterns = businessKeywords.Select(x => x.Txt).ToArray(); patterns.Add(new KeywordScore { Keyword = tsk.Keyword, Score = 50, BizType = 0 }); //List<level1link> result = new List<level1link>(); int nohist_pages = 0; int quried_pages = 0; //最多搜索60页 while (!string.IsNullOrEmpty(link) && quried_pages <= 60) { log(link); var html = get_html(link); if (html == null) { break; } var tags = html.SubAfter("content_left").SplitWith("c-container"); if (tags == null || tags.Length == 0) { log("BLOCKED " + tsk.Keyword); break; } bool nohit = true; foreach (var tag in tags) { var a = tag.SubAfter("h3").SubAfter("a"); string title = RemoveInivalidChar( a.RemoveSpace().GetLower().SubBefore("</h3>").GetTxtFromHtml2().RemoveSpace().GetLower()); string href = a.GetFirstHref2(); string abs = RemoveInivalidChar(tag.SubAfter("abstract").SubBefore("</div").GetTxtFromHtml2().RemoveSpace().GetLower()); string domain = tag.SubLastStringAfter("\"f13").SubBefore("</span").GetTxtFromHtml2(); domain = GetDomain(domain); int maxScore = 0; //没有包含需要protect item信息的过滤掉 string txt = "{0}{1}".FormatStr(title, abs); if (string.IsNullOrEmpty(txt)) { continue; } string realUrl = null, detailHtml = null, abstracts = null; byte appType = 0; if (!string.IsNullOrWhiteSpace(href)) { //Encoding enc = Encoding.UTF8; //detailHtml = HtmlQueryHelper.GetContent(href, 8000, ref enc, out realUrl); var tuplehtml = get_htmlUrl(href); if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item1)) { realUrl = tuplehtml.Item1; } if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item2)) { detailHtml = tuplehtml.Item2; } if (!string.IsNullOrEmpty(realUrl) && string.IsNullOrEmpty(domain)) { domain = GetDomain(realUrl); } } if (!string.IsNullOrEmpty(detailHtml) && detailHtml.Contains("document.getElementById(\"link\").click()")) { var gourl = detailHtml.GetFirstHref2(); if (!string.IsNullOrEmpty(gourl)) { var tuplehtml = get_htmlUrl(gourl); if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item1)) { realUrl = tuplehtml.Item1; } if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item2)) { detailHtml = tuplehtml.Item2; } if (!string.IsNullOrEmpty(realUrl) && string.IsNullOrEmpty(domain)) { domain = GetDomain(realUrl); } } } if (string.IsNullOrEmpty(realUrl)) { realUrl = href; } List <KeywordScore> matchpatterns = new List <KeywordScore>(); if (string.IsNullOrEmpty(detailHtml)) { continue; } else { if (!detailHtml.Contains(tsk.Keyword) || !detailHtml.IsContains2(bizPatterns)) { continue; } var hrefs = detailHtml.GetDescendents("a", "href"); StringBuilder sbabstracts = new StringBuilder(); List <string> abstractlist = new List <string>(); StringBuilder sbabstractlist = new StringBuilder(); foreach (KeywordScore pattern in patterns) { string[] splitDetailHtmls = detailHtml.SplitWith(pattern.Keyword); if (splitDetailHtmls.Length > 1) { matchpatterns.Add(pattern); } StringBuilder sbpatternStr = new StringBuilder(); for (int i = 0; i < splitDetailHtmls.Length - 1; i++) { string splitDetailHtml1 = splitDetailHtmls[i]; string splitDetailHtml2 = i < splitDetailHtmls.Length - 2 ? splitDetailHtmls[i + 1] : ""; for (int j = splitDetailHtml1.Length - 1; j >= 0; j--) { if (split_bef_commas.Contains(splitDetailHtml1[j]) && j - 1 >= 0 && !split_num_commas.Contains(splitDetailHtml1[j - 1])) { break; } sbpatternStr.Append(splitDetailHtml1[j]); } for (int q = sbpatternStr.Length - 1; q >= 0; q--) { sbabstracts.Append(sbpatternStr[q]); } sbabstracts.Append(pattern.Keyword); sbpatternStr.Clear(); for (int j = 0; j < splitDetailHtml2.Length; j++) { if (split_aft_commas.Contains(splitDetailHtml2[j]) && j + 1 < splitDetailHtml2.Length && !split_num_commas.Contains(splitDetailHtml2[j + 1])) { break; } sbpatternStr.Append(splitDetailHtml2[j]); } sbabstracts.Append(sbpatternStr); sbpatternStr.Clear(); string tmpsbabstracts = sbabstracts.ToString(); tmpsbabstracts = BaiduQuery.RemoveInivalidChar(tmpsbabstracts.GetTxtFromHtml2().RemoveSpace().GetLower()); if (!abstractlist.Contains(tmpsbabstracts)) { abstractlist.Add(tmpsbabstracts); sbabstractlist.Append(tmpsbabstracts).Append(" "); } sbabstracts.Clear(); } } abstracts = sbabstractlist.ToString(); if (!string.IsNullOrEmpty(abstracts) && matchpatterns.Count > 0) { maxScore = matchpatterns.Max(x => x.Score ?? 50); appType = matchpatterns.Where(x => x.BizType > 0).OrderByDescending(x => x.Score).Select(x => x.BizType).FirstOrDefault(); maxScore += matchpatterns.Sum(x => (x.Score ?? 50) / 10); maxScore -= matchpatterns.Max(x => (x.Score ?? 50) / 10); } } if (string.IsNullOrEmpty(abstracts) && !string.IsNullOrEmpty(abs)) { matchpatterns = patterns.Where(x => abs.Contains(x.Keyword)).ToList(); maxScore = matchpatterns.Max(x => x.Score ?? 50); appType = matchpatterns.Where(x => x.BizType > 0).OrderByDescending(x => x.Score).Select(x => x.BizType).FirstOrDefault(); maxScore += matchpatterns.Sum(x => (x.Score ?? 50) / 10); maxScore -= matchpatterns.Max(x => (x.Score ?? 50) / 10); } if (maxScore > 100) { maxScore = 100; } bool is_bus_matched = txt.IsContains2(businessKeyword.Txt); bool is_title_matched = title.GetLower().IsContains2(searchKeywords); bool is_abstr_matched = abs.IsContains2(searchKeywords); BaiduItemPart part = is_title_matched && is_abstr_matched ? BaiduItemPart.TitleAbstract : is_title_matched ? BaiduItemPart.Title : is_abstr_matched ? BaiduItemPart.Abstract : BaiduItemPart.None; bool is_itm_title_matched = txt.GetLower().IsContains2(searchKeywords); level1link l1 = new level1link { UsrId = tsk.UsrId, Domain = domain, TopDomain = GetLevel1Domain(domain), Keywords = string.Format("{0} + {1}", tsk.Keyword, businessKeyword.Txt), LinkUrl = realUrl, MatchAt = (byte)part, Html = detailHtml, MatchType = (byte)((is_bus_matched ? 1 : 0) + (is_itm_title_matched ? 2 : 0)), AppType = appType, BizId = IDHelper.GetGuid("{0}/{1}/{2}".FormatStr(realUrl, tsk.UsrId, tsk.Keyword)), SearchkeywordId = tsk._id.ToString(), CreatedAt = DateTime.UtcNow.AddHours(8), Description = abs, Title = title, Score = maxScore, Abstract = abstracts }; byte MatchType = (byte)((is_bus_matched ? 10 : 0) + (is_itm_title_matched ? 30 : 0)); botmng.save_level1_links(new List <level1link> { l1 }, tsk, excludedKeywords); nohit = false; nohist_pages = 0; } if (nohit) { nohist_pages++; } //如果连续3页都没有结果,就跳出 if (nohist_pages > 3) { break; } quried_pages++; pages++; link = html.SubAfter("fkfk_cur").SubBefore("下一页").GetLastHref2(); if (!string.IsNullOrEmpty(link) && !link.IsStartWith("http")) { if (link.IsStartWith("/")) { link = link.SubAfter("/"); } link = "http://www.baidu.com/".GetContact(link); } } //return result; }
new SyntaxToken(keyword switch {