Ejemplo n.º 1
0
            public void RepFile()
            {
                try {
                    List <string> list = FileAction.ReadToArr(file.FullName);
                    string        p    = file.FullName.Replace(path, outpath);

                    // 匹配注释正则
                    string regNotes = "([/]{2,}|[*]+).*";
                    // 匹配中文正则
                    string regChinese = @"([\u4e00-\u9fa5]{1,}[\s,,‘“;(()):、:.&\\-a-zA-Z0-9\u4e00-\u9fa5]{0,}[。”’!0-9\u4e00-\u9fa5]{1,})|([\u4e00-\u9fa5]{1})";
                    int    index      = 0;

                    foreach (var item in list)
                    {
                        index++;
                        // 去掉注释
                        string str = DataCheck.RepStr(item.Trim(), regNotes, "");
                        //是否包含中文
                        if (!DataCheck.CheckReg(str, regChinese))
                        {
                            FileAction.AppendStr(p, item + "\n");
                        }
                        else
                        {
                            // 取出中文
                            string[] strArr = DataCheck.GetRegStrArr(str, regChinese);
                            string   temp   = str;
                            string   get    = "";
                            // 在语言包中寻找匹配
                            foreach (var chinese in strArr)
                            {
                                // 如果没有包含汉字,查找下一个
                                if (!DataCheck.CheckReg(chinese, "[\u4e00-\u9fa5]+"))
                                {
                                    continue;
                                }

                                // 若语言包中存在对应中文,直接替换
                                if (dic.ContainsKey(chinese))
                                {
                                    get  = dic[chinese];
                                    temp = temp.Replace(chinese, DataCheck.RepLanguage(dic[chinese], false));
                                    errInfo.Add($"{chinese}\t{dic[chinese]}\t{file.FullName}");
                                }
                                // 否则,去寻找最类似的中文
                                else
                                {
                                    // 获取极限长度
                                    int min = chinese.Length - 2;
                                    int max = chinese.Length + 2;
                                    // 判断是否替换
                                    bool bl = false;
                                    // 循环字典
                                    foreach (var key in dic.Keys)
                                    {
                                        // 超出极限长度,则跳出
                                        if (max < key.Length || key.Length < min)
                                        {
                                            continue;
                                        }
                                        // 若符合极限长度,且包含当前文字
                                        if (key.Contains(chinese))
                                        {
                                            // 进行替换
                                            temp = temp.Replace(chinese, DataCheck.RepLanguage(dic[key], false));
                                            errInfo.Insert(0, $"^{chinese}:{index}行\t{dic[key]}\t{file.FullName}");
                                            bl = true;
                                        }
                                    }
                                    if (!bl)
                                    {
                                        errInfo.Insert(0, $"^^{chinese}:{index}行\t{file.FullName}");
                                    }
                                }
                            }

                            // 将当前行写入文件
                            FileAction.AppendStr(p, item.Replace(str, temp) + "\n");
                        }
                    }
                }
                catch (Exception e) {
                    errInfo.Add("错误:" + file.FullName + "\t" + e.Message);
                }
            }
Ejemplo n.º 2
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void Crawler_OnCompleted(OnCompletedEventArgs e, int layer, string Title = "")
        {
            SetLog($"读取网站:{e.Uri.ToString()}\n\t\t\t\t深度:{layer}\t用时:{e.Milliseconds} 毫秒\t线程ID:{e.TreadID}", Color.Gray);
            UpAll(1);
            urlList.Add(e.Uri.ToString());
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(e.PageSource);
            ConReg = "//div[@id='endText']";
            // 查看是否存在内容
            HtmlNode conNode = doc.DocumentNode.SelectSingleNode(ConReg);

            if (conNode != null)
            {
                string url = e.Uri.ToString();
                // 判断此超链接是否已经读取
                if (_GSQ_NewsService.Exists(c => c.url == url))
                {
                    return;
                }

                if (!string.IsNullOrEmpty(conNode.InnerHtml))
                {
                    GSQ_News _News = new GSQ_News();
                    _News.title         = Title;
                    _News.url           = e.Uri.ToString();
                    _News.sourcewebsite = conNode.InnerHtml;
                    _News.num           = 0;
                    _News.CreateDate    = DateTime.Now;
                    _GSQ_NewsService.AddEntity(_News);
                    UpCon(1);
                    UpNum(1);
                    SetLog($"抓取新闻《{Title}》,用时:{e.Milliseconds} 毫秒", Color.Gray);
                }
                return;
            }
            if (layer >= 4)
            {
                return;
            }

            // 获取所有a标签
            var AList = doc.DocumentNode.Descendants("a");

            foreach (var item in AList)
            {
                // 爬虫类
                Crawler   crawler   = new Crawler();
                Operation operation = new Operation()
                {
                    Action    = (x) => { },
                    Condition = (x) => { return(true); },
                    timeout   = 5000
                };
                crawler.OnError     += Crawler_OnError;
                crawler.OnCompleted += (s, ex) => {
                    Crawler_OnCompleted(ex, layer + 1, item.InnerText);
                };
                string url  = Utils.DelLastChar(e.Uri.ToString(), "/", 0);
                string href = item.Attributes["href"]?.Value;
                if (!string.IsNullOrEmpty(href) && !urlList.Contains(href) &&
                    DataCheck.CheckReg(href, DataCheck.Reg_Url) &&
                    href.Contains(url))
                {
                    crawler.Start(href, operation, null).Wait();
                }
            }
        }