Exemple #1
0
        public List <level1link> Query(searchkeyword tsk, keyword businessKeyword, List <keyword> businessKeywords, List <keyword> excludedKeywords)
        {
            List <level1link> result = new List <level1link>();

            var links = get_urls(tsk, businessKeyword);

            if (links == null || links.Count == 0)
            {
                return(null);
            }


            foreach (var link in links)
            {
                try
                {
                    GetLinks(link, tsk, businessKeyword, businessKeywords, excludedKeywords);
                    //if (list != null && list.Count > 0)
                    //    result.AddRange(list);
                }
                catch (Exception ex)
                {
                    log(ex.Message + ex.StackTrace);
                }
            }


            return(result);
        }
 /// <summary>
 /// Converts to keyword model.
 /// </summary>
 /// <param name="item">The item.</param>
 /// <returns></returns>
 internal static KeywordModel ConvertToKeywordModel(keyword item)
 {
     return(new KeywordModel()
     {
         Id = item.Id,
         Name = item.KeywordName
     });
 }
        public ActionResult Keyword_cloud()
        {
            string query  = "select top 30 document_count from sys.dm_fts_index_keywords( DB_ID('ServicesData'), OBJECT_ID('offer') ) where display_term!='End of file' and column_id=6 order by document_count desc";
            string query1 = "select top 30 display_term from sys.dm_fts_index_keywords( DB_ID('ServicesData'), OBJECT_ID('offer') ) where display_term!='End of file' and column_id=6 order by document_count desc";

            var     doc_count = db.Database.SqlQuery <Int64>(query).ToList();
            var     terms     = db.Database.SqlQuery <string>(query1).ToList();
            keyword keywords  = new keyword();

            keywords.document_count.AddRange(doc_count);
            keywords.terms.AddRange(terms);


            return(View(keywords));
        }
        /// <summary>
        /// 关键字
        /// </summary>
        /// <returns></returns>
        public ActionResult keywords()
        {
            List <keyword> lists = new List <keyword>()
            {
            };
            keyword kw1 = new keyword
            {
                id      = 0,
                type    = "kw",
                title   = "Scratch",
                biaozhi = "lesson"
            };
            keyword kw3 = new keyword
            {
                id      = 0,
                type    = "kw",
                title   = "人工智能",
                biaozhi = "lesson"
            };

            lists.Add(kw3);
            keyword kw4 = new keyword
            {
                id      = 0,
                type    = "kw",
                title   = "信息学奥赛",
                biaozhi = "lesson"
            };

            lists.Add(kw4);
            keyword kw2 = new keyword
            {
                id      = 1,
                type    = "detail",
                title   = "实战京东物流车",
                biaozhi = "lesson"
            };

            lists.Add(kw2);
            return(Json(lists, JsonRequestBehavior.AllowGet));
        }
        public async Task <string> GetForm(FormData formData)
        {
            var result = "waiting";


            if (formData.kw.Contains('@'))
            {
                result = "All sent";
                try
                {
                    await AsyncProcess(formData.kw.Trim(), "*****@*****.**", formData.sessionid).ConfigureAwait(false);
                }
                catch (Exception ex)
                {
                    result = ex.Message;
                    return(result);
                }
            }
            else
            {
                var kw = new keyword()
                {
                    Id             = Guid.NewGuid().ToString(),
                    kw             = formData.kw,
                    nosignkw       = removeVietnameseSign(formData.kw).ToLower(),
                    volume         = Int32.Parse(formData.vl),
                    landingpage    = formData.lp,
                    currentranking = Int32.Parse(formData.cr),
                    kd             = Int32.Parse(formData.kd),
                    cost           = Int32.Parse(formData.cost),
                    sessionid      = formData.sessionid,
                    stt            = Int32.Parse(formData.stt)
                };
                db.keywords.Add(kw);
                db.SaveChanges();
            }


            return(result);
        }
Exemple #6
0
        List <string> get_urls(searchkeyword tsk, keyword businessKeywords)
        {
            var           searchKeywords = tsk.Keyword.RemoveSpace().GetLower().Split(';');
            List <string> kws            = new List <string>();

            foreach (var t in searchKeywords)
            {
                if (string.IsNullOrEmpty(t))
                {
                    continue;
                }


                kws.Add("{0}{1}".FormatStr(t, businessKeywords.Txt));
            }

            string baiduUrlFormat = "http://www.baidu.com/s?ie=utf-8&wd={0}";

            return(kws.Where(x => !string.IsNullOrEmpty(x.GetTrimed()))
                   .Distinct().Select(x => x.GetUrlEncodedString("utf-8"))
                   .Select(x => baiduUrlFormat.FormatStr(x)).ToList());
        }
Exemple #7
0
        private static string LocalReturnFun(string allCode)
        {
            StringBuilder   result        = new StringBuilder();
            Stack <keyword> keywordStack  = new Stack <keyword>();
            StringReader    sr            = new StringReader(allCode);
            int             localFunCount = 0;
            int             localVarCount = 0;
            //int localTBCount = 0;

            bool canStackPush = false;

            while (sr.Peek() != -1)
            {
                string line = sr.ReadLine();
                #region key word
                bool containEnd      = CheckIsEndLine(line);
                bool containFunction = CheckIsFunctionLine(line);
                bool containFor      = CheckIsForLine(line);
                bool containIf       = CheckIsIfLine(line);
                bool containWhile    = CheckIsWhileLine(line);
                bool containReturn   = CheckIsReturnLine(line);
                bool containLeft     = CheckIsLeftTable(line);
                #endregion
                keyword lastPop = keyword.kif;
                if (canStackPush)
                {
                    if (containFunction)
                    {
                        keywordStack.Push(keyword.kfunction);
                    }
                    if (containFor)
                    {
                        keywordStack.Push(keyword.kfor);
                    }
                    if (containIf)
                    {
                        keywordStack.Push(keyword.kif);
                    }
                    if (containWhile)
                    {
                        keywordStack.Push(keyword.kwhile);
                    }

                    if (containEnd && keywordStack.Count > 0)
                    {
                        lastPop = keywordStack.Pop();
                    }
                }

                if (containReturn)
                {
                    if (containFunction)
                    {
                        returnFunName = "Profiler_Return_FunVar" + localFunCount++;
                        canStackPush  = true;
                        keywordStack.Push(keyword.kfunction);
                        line = Regex.Replace(line, @"(?<=(^|\s))return(?=(\s|\(|$))", ReplaceReturnFun);
                    }
                    else
                    {
                        if (line.Trim() != "return")
                        {
                            returnVarName = "Profiler_Return_Var" + localVarCount++ + ","
                                            + "Profiler_Return_Var" + localVarCount++ + ","
                                            + "Profiler_Return_Var" + localVarCount++ + ","
                                            + "Profiler_Return_Var" + localVarCount++ + ","
                                            + "Profiler_Return_Var" + localVarCount++; //没办法保证一个函数到底返回几个数,所以就搞了5个返回值
                            line = Regex.Replace(line, @"(?<=(^|\s))return(?=(\s|\(|$))", ReplaceReturnVar).TrimEnd();
                            if (containLeft)
                            {
                                line += "\r\n" + GetReturnTable(sr);
                            }
                            line         += "\r\nreturn " + returnVarName;
                            returnVarName = string.Empty;
                        }
                    }
                }
                else if (containEnd && lastPop == keyword.kfunction && keywordStack.Count <= 0)
                {
                    line          = line + "\r\nreturn " + returnFunName;
                    returnFunName = string.Empty;
                    canStackPush  = false;
                }

                ApeendCrLine(result, line);
            }

            return(result.ToString());
        }
Exemple #8
0
        private static string PrettySpace(string allCode)
        {
            StringBuilder   sb           = new StringBuilder();
            StringReader    sr           = new StringReader(allCode);
            Stack <keyword> keywordStack = new Stack <keyword>();
            int             lineIndex    = 0;

            while (sr.Peek() != -1)
            {
                string line = sr.ReadLine().Trim();
                if (string.IsNullOrEmpty(line))
                {
                    continue;
                }
                lineIndex++;

                //bool containLocal = CheckIsLocalLine(line);
                bool containFunction = CheckIsFunctionLine(line);
                bool containFor      = CheckIsForLine(line);
                bool containIf       = CheckIsIfLine(line);
                bool containWhile    = CheckIsWhileLine(line);
                bool containEnd      = CheckIsEndLine(line);
                bool containLeft     = CheckIsLeftTable(line);
                bool containRight    = CheckIsRightTable(line);

                keyword popValue = keyword.knull;
                if ((containEnd || (containRight && !containLeft)) && !containIf && keywordStack.Count > 0)
                {
                    popValue = keywordStack.Pop();
                }
                int count = keywordStack.Count;
                if (Regex.IsMatch(line, @"(?<=(^|\s))else(?=(\s|$))"))
                {
                    count--;
                }
                else if (Regex.IsMatch(line, @"(?<=(^|\s))elseif(?=(\s|$))"))
                {
                    count--;
                }

                line = new string(' ', 4 * Math.Max(0, count)) + line;

                ApeendCrLine(sb, line.Replace("line:%d", string.Format("line:{0}", lineIndex)));
                if (containEnd && !containIf && popValue != keyword.kif)
                {
                    ApeendCrLine(sb, "");
                    lineIndex++;
                }


                #region key word
                if (containFunction)
                {
                    keywordStack.Push(keyword.kfunction);                 //if (containFunction && !containLocal) keywordStack.Push(keyword.kfunction);
                }
                if (containFor)
                {
                    keywordStack.Push(keyword.kfor);
                }
                if (containIf && !containEnd)
                {
                    keywordStack.Push(keyword.kif);
                }
                if (containWhile)
                {
                    keywordStack.Push(keyword.kwhile);
                }
                if (containLeft && !containRight)
                {
                    keywordStack.Push(keyword.kleftTable);
                }
                #endregion
            }
            return(sb.ToString());
        }
Exemple #9
0
        private static string ParseLua(string fileName, string allCode)
        {
            #region file profiler
            StringBuilder sb     = new StringBuilder();
            string        format = "";
            #endregion

            allCode = TrimComment(allCode);
            allCode = NewLineFunction(allCode);
            allCode = PrettyReturn(allCode);
            allCode = PrettyTable(allCode);
            allCode = PrettyOrAnd(allCode);
            allCode = PrettyCalSig(allCode);
            allCode = PrettyThen(allCode);
            allCode = PrettyEnd(allCode);
            allCode = LocalReturnFun(allCode);

            StringReader    sr            = new StringReader(allCode);
            Stack <keyword> keywordStack  = new Stack <keyword>();
            bool            beginSample   = false;
            bool            needEndSample = true;

            format = "CS.MikuLuaProfiler.LuaProfiler.BeginSample(\"{0}\")";
            ApeendCrLine(sb, string.Format(format, "require " + fileName));
            bool needEndFileSample = true;
            while (sr.Peek() != -1)
            {
                string line = sr.ReadLine();

                #region key word
                bool containEnd      = CheckIsEndLine(line);
                bool containFunction = CheckIsFunctionLine(line);
                bool containFor      = CheckIsForLine(line);
                bool containIf       = CheckIsIfLine(line);
                bool containWhile    = CheckIsWhileLine(line);
                bool containReturn   = CheckIsReturnLine(line);

                if (containFunction)
                {
                    keywordStack.Push(keyword.kfunction);
                }
                if (containFor)
                {
                    keywordStack.Push(keyword.kfor);
                }
                if (containIf)
                {
                    keywordStack.Push(keyword.kif);
                }
                if (containWhile)
                {
                    keywordStack.Push(keyword.kwhile);
                }

                if (containReturn)
                {
                    var keyArray = keywordStack.ToArray();
                    for (int i = 0, imax = keyArray.Length; i < imax; i++)
                    {
                        if (keyArray[i] == keyword.kfunction)
                        {
                            needEndSample = false;
                            break;
                        }
                        else if (keyArray[i] == keyword.kif)
                        {
                            needEndSample = true;
                            break;
                        }
                    }
                }
                keyword lastPop = keyword.kif;
                if (containEnd && keywordStack.Count > 0)
                {
                    lastPop = keywordStack.Pop();
                }

                #endregion

                #region add profiler
                if (containFunction)
                {
                    needEndSample = true;
                    beginSample   = true;
                    format        = "CS.MikuLuaProfiler.LuaProfiler.BeginSample(\"{0}\")";
                    string funName = GetFunName(line, fileName);
                    ApeendCrLine(sb, line);
                    ApeendCrLine(sb, string.Format(format, funName));
                }
                else if (containEnd && lastPop == keyword.kfunction && beginSample && needEndSample)
                {
                    ApeendCrLine(sb, "CS.MikuLuaProfiler.LuaProfiler.EndSample()");
                    ApeendCrLine(sb, line);
                    beginSample = keywordStack.Count > 0;
                }
                else if (containReturn)
                {
                    line = line.Replace("return",
                                        "CS.MikuLuaProfiler.LuaProfiler.EndSample()\r\nreturn");
                    ApeendCrLine(sb, line);
                    if (!keywordStack.Contains(keyword.kfunction))
                    {
                        needEndFileSample = false;
                    }
                }
                else
                {
                    ApeendCrLine(sb, line);
                }
                if (containEnd)
                {
                    ApeendCrLine(sb, "");
                }
                #endregion
            }

            if (needEndFileSample)
            {
                ApeendCrLine(sb, "\r\nCS.MikuLuaProfiler.LuaProfiler.EndSample()");
            }
            string code = sb.ToString().Replace("{\r\n", "{");
            code = code.Replace("\r\n}", "}");
            code = RollBackString(code);

            return(PrettySpace(code));
        }
Exemple #10
0
        void GetLinks(string link, searchkeyword tsk, keyword businessKeyword, List <keyword> businessKeywords, List <keyword> excludedKeywords)
        {
            BotMng  botmng = BotMng.Instance;
            AppType img    = (AppType)tsk.AppType;

            string[] searchKeywords = tsk.Keyword.GetLower().RemoveSpace().Split(';');

            List <KeywordScore> patterns = businessKeywords.Select(x => new KeywordScore {
                Keyword = x.Txt, Score = x.Score, BizType = x.BizType
            }).ToList();

            string[] bizPatterns = businessKeywords.Select(x => x.Txt).ToArray();
            patterns.Add(new KeywordScore {
                Keyword = tsk.Keyword, Score = 50, BizType = 0
            });

            //List<level1link> result = new List<level1link>();
            int nohist_pages = 0;
            int quried_pages = 0;

            //最多搜索60页
            while (!string.IsNullOrEmpty(link) && quried_pages <= 60)
            {
                log(link);
                var html = get_html(link);
                if (html == null)
                {
                    break;
                }
                var tags = html.SubAfter("content_left").SplitWith("c-container");

                if (tags == null || tags.Length == 0)
                {
                    log("BLOCKED " + tsk.Keyword);
                    break;
                }
                bool nohit = true;
                foreach (var tag in tags)
                {
                    var    a     = tag.SubAfter("h3").SubAfter("a");
                    string title = RemoveInivalidChar(
                        a.RemoveSpace().GetLower().SubBefore("</h3>").GetTxtFromHtml2().RemoveSpace().GetLower());
                    string href = a.GetFirstHref2();


                    string abs    = RemoveInivalidChar(tag.SubAfter("abstract").SubBefore("</div").GetTxtFromHtml2().RemoveSpace().GetLower());
                    string domain = tag.SubLastStringAfter("\"f13").SubBefore("</span").GetTxtFromHtml2();
                    domain = GetDomain(domain);

                    int maxScore = 0;
                    //没有包含需要protect item信息的过滤掉
                    string txt = "{0}{1}".FormatStr(title, abs);
                    if (string.IsNullOrEmpty(txt))
                    {
                        continue;
                    }

                    string realUrl = null, detailHtml = null, abstracts = null;
                    byte   appType = 0;

                    if (!string.IsNullOrWhiteSpace(href))
                    {
                        //Encoding enc = Encoding.UTF8;
                        //detailHtml = HtmlQueryHelper.GetContent(href, 8000, ref enc, out realUrl);
                        var tuplehtml = get_htmlUrl(href);
                        if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item1))
                        {
                            realUrl = tuplehtml.Item1;
                        }
                        if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item2))
                        {
                            detailHtml = tuplehtml.Item2;
                        }
                        if (!string.IsNullOrEmpty(realUrl) && string.IsNullOrEmpty(domain))
                        {
                            domain = GetDomain(realUrl);
                        }
                    }
                    if (!string.IsNullOrEmpty(detailHtml) && detailHtml.Contains("document.getElementById(\"link\").click()"))
                    {
                        var gourl = detailHtml.GetFirstHref2();
                        if (!string.IsNullOrEmpty(gourl))
                        {
                            var tuplehtml = get_htmlUrl(gourl);
                            if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item1))
                            {
                                realUrl = tuplehtml.Item1;
                            }
                            if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item2))
                            {
                                detailHtml = tuplehtml.Item2;
                            }
                            if (!string.IsNullOrEmpty(realUrl) && string.IsNullOrEmpty(domain))
                            {
                                domain = GetDomain(realUrl);
                            }
                        }
                    }
                    if (string.IsNullOrEmpty(realUrl))
                    {
                        realUrl = href;
                    }
                    List <KeywordScore> matchpatterns = new List <KeywordScore>();
                    if (string.IsNullOrEmpty(detailHtml))
                    {
                        continue;
                    }
                    else
                    {
                        if (!detailHtml.Contains(tsk.Keyword) || !detailHtml.IsContains2(bizPatterns))
                        {
                            continue;
                        }
                        var           hrefs          = detailHtml.GetDescendents("a", "href");
                        StringBuilder sbabstracts    = new StringBuilder();
                        List <string> abstractlist   = new List <string>();
                        StringBuilder sbabstractlist = new StringBuilder();

                        foreach (KeywordScore pattern in patterns)
                        {
                            string[] splitDetailHtmls = detailHtml.SplitWith(pattern.Keyword);
                            if (splitDetailHtmls.Length > 1)
                            {
                                matchpatterns.Add(pattern);
                            }
                            StringBuilder sbpatternStr = new StringBuilder();
                            for (int i = 0; i < splitDetailHtmls.Length - 1; i++)
                            {
                                string splitDetailHtml1 = splitDetailHtmls[i];
                                string splitDetailHtml2 = i < splitDetailHtmls.Length - 2 ? splitDetailHtmls[i + 1] : "";
                                for (int j = splitDetailHtml1.Length - 1; j >= 0; j--)
                                {
                                    if (split_bef_commas.Contains(splitDetailHtml1[j]) && j - 1 >= 0 && !split_num_commas.Contains(splitDetailHtml1[j - 1]))
                                    {
                                        break;
                                    }
                                    sbpatternStr.Append(splitDetailHtml1[j]);
                                }
                                for (int q = sbpatternStr.Length - 1; q >= 0; q--)
                                {
                                    sbabstracts.Append(sbpatternStr[q]);
                                }
                                sbabstracts.Append(pattern.Keyword);
                                sbpatternStr.Clear();
                                for (int j = 0; j < splitDetailHtml2.Length; j++)
                                {
                                    if (split_aft_commas.Contains(splitDetailHtml2[j]) && j + 1 < splitDetailHtml2.Length && !split_num_commas.Contains(splitDetailHtml2[j + 1]))
                                    {
                                        break;
                                    }
                                    sbpatternStr.Append(splitDetailHtml2[j]);
                                }
                                sbabstracts.Append(sbpatternStr);
                                sbpatternStr.Clear();

                                string tmpsbabstracts = sbabstracts.ToString();
                                tmpsbabstracts = BaiduQuery.RemoveInivalidChar(tmpsbabstracts.GetTxtFromHtml2().RemoveSpace().GetLower());
                                if (!abstractlist.Contains(tmpsbabstracts))
                                {
                                    abstractlist.Add(tmpsbabstracts);
                                    sbabstractlist.Append(tmpsbabstracts).Append(" ");
                                }
                                sbabstracts.Clear();
                            }
                        }
                        abstracts = sbabstractlist.ToString();
                        if (!string.IsNullOrEmpty(abstracts) && matchpatterns.Count > 0)
                        {
                            maxScore  = matchpatterns.Max(x => x.Score ?? 50);
                            appType   = matchpatterns.Where(x => x.BizType > 0).OrderByDescending(x => x.Score).Select(x => x.BizType).FirstOrDefault();
                            maxScore += matchpatterns.Sum(x => (x.Score ?? 50) / 10);
                            maxScore -= matchpatterns.Max(x => (x.Score ?? 50) / 10);
                        }
                    }
                    if (string.IsNullOrEmpty(abstracts) && !string.IsNullOrEmpty(abs))
                    {
                        matchpatterns = patterns.Where(x => abs.Contains(x.Keyword)).ToList();
                        maxScore      = matchpatterns.Max(x => x.Score ?? 50);
                        appType       = matchpatterns.Where(x => x.BizType > 0).OrderByDescending(x => x.Score).Select(x => x.BizType).FirstOrDefault();

                        maxScore += matchpatterns.Sum(x => (x.Score ?? 50) / 10);
                        maxScore -= matchpatterns.Max(x => (x.Score ?? 50) / 10);
                    }
                    if (maxScore > 100)
                    {
                        maxScore = 100;
                    }

                    bool is_bus_matched = txt.IsContains2(businessKeyword.Txt);

                    bool          is_title_matched = title.GetLower().IsContains2(searchKeywords);
                    bool          is_abstr_matched = abs.IsContains2(searchKeywords);
                    BaiduItemPart part             = is_title_matched && is_abstr_matched ? BaiduItemPart.TitleAbstract :
                                                     is_title_matched ? BaiduItemPart.Title :
                                                     is_abstr_matched ? BaiduItemPart.Abstract : BaiduItemPart.None;
                    bool is_itm_title_matched = txt.GetLower().IsContains2(searchKeywords);



                    level1link l1 = new level1link
                    {
                        UsrId           = tsk.UsrId,
                        Domain          = domain,
                        TopDomain       = GetLevel1Domain(domain),
                        Keywords        = string.Format("{0} + {1}", tsk.Keyword, businessKeyword.Txt),
                        LinkUrl         = realUrl,
                        MatchAt         = (byte)part,
                        Html            = detailHtml,
                        MatchType       = (byte)((is_bus_matched ? 1 : 0) + (is_itm_title_matched ? 2 : 0)),
                        AppType         = appType,
                        BizId           = IDHelper.GetGuid("{0}/{1}/{2}".FormatStr(realUrl, tsk.UsrId, tsk.Keyword)),
                        SearchkeywordId = tsk._id.ToString(),
                        CreatedAt       = DateTime.UtcNow.AddHours(8),
                        Description     = abs,
                        Title           = title,
                        Score           = maxScore,
                        Abstract        = abstracts
                    };

                    byte MatchType = (byte)((is_bus_matched ? 10 : 0) + (is_itm_title_matched ? 30 : 0));

                    botmng.save_level1_links(new List <level1link> {
                        l1
                    }, tsk, excludedKeywords);
                    nohit        = false;
                    nohist_pages = 0;
                }

                if (nohit)
                {
                    nohist_pages++;
                }
                //如果连续3页都没有结果,就跳出
                if (nohist_pages > 3)
                {
                    break;
                }

                quried_pages++;
                pages++;
                link = html.SubAfter("fkfk_cur").SubBefore("下一页").GetLastHref2();
                if (!string.IsNullOrEmpty(link) && !link.IsStartWith("http"))
                {
                    if (link.IsStartWith("/"))
                    {
                        link = link.SubAfter("/");
                    }
                    link = "http://www.baidu.com/".GetContact(link);
                }
            }
            //return result;
        }
         new SyntaxToken(keyword switch
 {