private async Task <bool> PostRepositories(string url, string json) { var requestProecessor = new WebRequestProcessor(); var result = await requestProecessor.WebRequestPost(url, json); return(result); }
private async Task <string> RetrieveRepositories(string url) { var requestProecessor = new WebRequestProcessor(); var jsonResponse = await requestProecessor.WebRequestGet(url); return(jsonResponse); }
void OnEnable() { _target = target as WebRequestProcessor; _bundles = Settings.instance.runtimeSettings.bundles .Where(x => x.type != BundleType.Static) .Select(x => x.name) .ToArray(); }
private void CrawlDailyReport(Worksheet dailyWorksheet, Workbook dailybook, ref int dailyStartRow, string categoryName, string[] categoryUrls) { bool isFirst = true; foreach (string url in categoryUrls) { var dailycontent = WebRequestProcessor.DownloadHTTPString(url); Thread.Sleep(2000); var dailyMatches = Regex.Matches(dailycontent, baiduRegex, RegexOptions.IgnoreCase | RegexOptions.Multiline); foreach (Match dailyMatch in dailyMatches) { if (!dailyMatch.Groups["PubDate"].Value.Contains("前")) { continue; } if (isFirst) { dailyWorksheet.Cells[dailyStartRow, 2].PutValue(categoryName); isFirst = false; } var resultUrl = dailyMatch.Groups["Url"].Value; try { Uri uri = new Uri(resultUrl); var domain = GetUrlDomain(uri.Host); //匹配媒体名 dailyWorksheet.Cells[dailyStartRow, 1].PutValue(domain); } catch (Exception) { } var title = TextCleaner.FullClean(dailyMatch.Groups["Title"].Value) + Environment.NewLine + TextCleaner.FullClean(dailyMatch.Groups["Text"].Value); var colorstyle = dailyWorksheet.Cells[dailyStartRow, 6].GetDisplayStyle(); colorstyle.Font.Color = Color.Blue; var currentExcelRow = dailyStartRow + 1; dailyWorksheet.Cells[dailyStartRow, 0].PutValue(resultUrl); dailyWorksheet.Cells[dailyStartRow, 5].Formula = "=VLOOKUP(B" + currentExcelRow + ",Sheet2!A:B,2,FALSE)"; dailyWorksheet.Cells[dailyStartRow, 6].SetStyle(colorstyle); dailyWorksheet.Cells[dailyStartRow, 6].PutValue(title); dailyWorksheet.Hyperlinks.Add(dailyStartRow, 6, 1, 1, resultUrl); dailyWorksheet.Cells[dailyStartRow, 7].PutValue(DateTime.Now.ToString("yyyy-MM-dd")); dailyWorksheet.Cells[dailyStartRow, 8].PutValue("负面舆情"); dailyStartRow++; } } dailybook.Save(@"D:\dailyreport\日报.xlsx"); }
private void ParsePage(string title, string url, PageElement pageElement = null) { if (pageElement == null) { pageElement = new PageElement { Title = title, Url = url }; } var xpath = new ItemPageXPaths(); List <SubItemElement> subList; DateTime startTime = DateTime.Now; PageElement result; if (GeckoDownRd.Checked) { //result = new GeckoParser().GetArticleContent(url, title, DeterminedMode(), out xpath); CrawlResponse resp = GeckoRequestProcessor.DoRequest(BuildFakeRequest(url), BuildFakeSiteEntity(), null, null, null, true, 1000); string content = resp.Content; result = PageAutoAnalyzer.AnalyzeContent(content, pageElement, DeterminedMode(), new IdentityContentElement(), ref xpath, out subList, 86400, ExcludeTxt.Text); } else if (HttpdownRd.Checked) { string content = WebRequestProcessor.DownloadHTTPString(url, 30); result = PageAutoAnalyzer.AnalyzeContent(content, pageElement, DeterminedMode(), new IdentityContentElement(), ref xpath, out subList, 86400, ExcludeTxt.Text); } else { throw new Exception("不支持该方式分析正文"); } TimeSpan usedTime = DateTime.Now - startTime; if (result == null) { return; } PageUrlTxt.Text = HtmlUtility.ExpandRelativePath(url, result.Url); TitleTxt.Text = result.Title; ContentTxt.Text = result.Content; ViewTxt.Text = result.View.ToString(); ReplyTxt.Text = result.Reply.ToString(); PubdateTxt.Text = result.Pubdate == null ? "" : result.Pubdate.ToString(); AuthorTxt.Text = result.Author; MediaTxt.Text = result.MediaName; ElementXPathTxt.Text = result.ElementXPath; ElementBlockTxt.Text = result.ElementBlock; NextpageXPathTxt.Text = result.NextPageXPath; }
private void ParseListBtn_Click(object sender, EventArgs e) { string url = InputUrlTxt.Text; string content = ""; RecogniseMode mode = DeterminedMode(); var xpath = new ListPageXPaths(); PageElement[] result; if (GeckoDownRd.Checked) { //result = new GeckoParser().AnalyzeArticleList(url,mode,out xpath,86400); CrawlResponse resp = GeckoRequestProcessor.DoRequest(BuildFakeRequest(url), BuildFakeSiteEntity(), null, null, null, true, 1000); content = resp.Content; var ret = PageAutoAnalyzer.AnalyzeArticleList(resp.Url, content, mode, new IdentityPageElement(), ref xpath, 86400); result = ret == null ? null : ret.List; } else if (HttpdownRd.Checked) { content = WebRequestProcessor.DownloadHTTPString(url, 30); var ret = PageAutoAnalyzer.AnalyzeArticleList(url, content, mode, new IdentityPageElement(), ref xpath, 86400); result = ret == null ? null : ret.List; } else { throw new NotSupportedException("不支持当前项抓取"); } if (result == null) { MessageBox.Show("解析不出数据"); return; } foreach (var pageElement in result) { pageElement.Url = HtmlUtility.ExpandRelativePath(url, pageElement.Url); } ListGridView.DataSource = result; }
private void CrawlBtn_Click(object sender, EventArgs e) { //ImportMedia(); //return; //Dsg Report generate var content = WebRequestProcessor.DownloadHTTPString(DsgUrl); var matches = Regex.Matches(content, baiduRegex, RegexOptions.Multiline | RegexOptions.IgnoreCase); Workbook book = new Workbook(); book.Open(@"D:\dailyreport\DSG.xlsx"); var worksheet = book.Worksheets[0]; int dsgStartRow = 7; foreach (Match match in matches) { if (match.Groups["PubDate"].Value.Contains("前")) { worksheet.Cells.InsertRow(dsgStartRow); } } foreach (Match match in matches) { if (!match.Groups["PubDate"].Value.Contains("前")) { continue; } var resultUrl = match.Groups["Url"].Value; try { Uri uri = new Uri(resultUrl); var domain = GetUrlDomain(uri.Host); //匹配媒体名 worksheet.Cells[dsgStartRow, 1].PutValue(domain); } catch (Exception) { } var title = TextCleaner.FullClean(match.Groups["Title"].Value) + Environment.NewLine + TextCleaner.FullClean(match.Groups["Text"].Value); var currentExcelRow = dsgStartRow + 1; worksheet.Cells[dsgStartRow, 0].PutValue(resultUrl); worksheet.Cells[dsgStartRow, 5].Formula = "=VLOOKUP(B" + currentExcelRow + ",Sheet2!A:B,2,FALSE)"; worksheet.Cells[dsgStartRow, 6].PutValue(title); worksheet.Hyperlinks.Add(dsgStartRow, 6, 1, 1, match.Groups["Url"].Value); worksheet.Cells[dsgStartRow, 7].PutValue(DateTime.Now.ToString("yyyy-MM-dd")); worksheet.Cells[dsgStartRow, 8].PutValue("负面舆情"); dsgStartRow++; } book.Save(@"D:\dailyreport\DSG.xlsx"); //Polo Report generate Workbook dailybook = new Workbook(); dailybook.Open(@"D:\dailyreport\日报.xlsx"); var dailyWorksheet = dailybook.Worksheets[0]; int dailyStartRow = 6; string categoryName = "大众-POLO"; var categoryUrls = poloUrls; CrawlDailyReport(dailyWorksheet, dailybook, ref dailyStartRow, categoryName, categoryUrls); categoryName = "大众-朗逸"; categoryUrls = langyiUrls; CrawlDailyReport(dailyWorksheet, dailybook, ref dailyStartRow, categoryName, categoryUrls); categoryName = "大众-途安"; categoryUrls = turanUrls; CrawlDailyReport(dailyWorksheet, dailybook, ref dailyStartRow, categoryName, categoryUrls); categoryName = "大众-帕萨特"; categoryUrls = pasateUrls; CrawlDailyReport(dailyWorksheet, dailybook, ref dailyStartRow, categoryName, categoryUrls); categoryName = "大众-桑塔纳"; categoryUrls = santanaUrls; CrawlDailyReport(dailyWorksheet, dailybook, ref dailyStartRow, categoryName, categoryUrls); categoryName = "大众-途观"; categoryUrls = tuguanUrls; CrawlDailyReport(dailyWorksheet, dailybook, ref dailyStartRow, categoryName, categoryUrls); MessageBox.Show("抓取完成"); }