private async Task <UrlResultModel> ParseUrlAsync(Uri url) { var resultModel = new UrlResultModel() { Url = url.ToString() }; using (WebClient c = new WebClient()) { try { string body = await c.DownloadStringTaskAsync(url); if (!string.IsNullOrEmpty(body)) { resultModel.AnchorCount = AnchorRegEx.Matches(body).Count; } } catch (ArgumentException e) { resultModel.Error = e.Message; } catch (WebException e) { resultModel.Error = e.Message; } } return(resultModel); }
public static void SelectSearchEngine() { UrlResultModel urlObject = new UrlResultModel(); Console.Write("Search:"); urlObject.content = Console.ReadLine(); urlObject.engineUrl = "https://www.google.com.br/search?q="; ConnectEngine(urlObject); }
public UrlResultModel Upload(HttpPostedFileBase file, string watermark) { UrlResultModel result = new UrlResultModel(); result.HasError = false; string path = AppDomain.CurrentDomain.BaseDirectory + "uploads\\"; var sourcePath = Path.Combine(path, file.FileName); file.SaveAs(sourcePath); try { Guid id = Guid.NewGuid(); var filename = System.IO.Path.GetFileNameWithoutExtension(sourcePath); var extension = System.IO.Path.GetExtension(sourcePath); string tempPath = path + "_" + Guid.NewGuid() + ".pdf"; string targetPath = path + $"{id}_{filename}.pdf"; if (new List <string>() { ".ppt", ".pptx" }.Contains(extension.Trim().ToLower())) { AsposeTransPdf.AsposePPTToPDF(sourcePath, tempPath); } if (new List <string>() { ".doc", ".docx" }.Contains(extension.Trim().ToLower())) { AsposeTransPdf.AsposeWordToPDF(sourcePath, tempPath); } if (new List <string>() { ".xls", ".xlsx" }.Contains(extension.Trim().ToLower())) { AsposeTransPdf.AsposeExcelToPDF(sourcePath, tempPath); } //WordToPDF(tempPath, targetPath); ITextSharpUtils.setWatermark(tempPath, targetPath, String.IsNullOrWhiteSpace(watermark) ? "金 风 科 技" : watermark); System.IO.File.Delete(tempPath); result.Url = $"uploads\\{id}_{filename}.pdf"; } catch (Exception e) { result.HasError = true; result.ErrorMessage = e.Message; } finally { System.IO.File.Delete(sourcePath); } return(result); }
public static void ConnectEngine(UrlResultModel url) { HtmlWeb web = new HtmlWeb(); HtmlDocument document = web.Load(url.engineUrl + url.content); document.Save(Environment.GetFolderPath(Environment.SpecialFolder.Desktop) + @"\ScraperTexto.txt"); HtmlDocument documentParse = new HtmlDocument(); documentParse.Load(Environment.GetFolderPath(Environment.SpecialFolder.Desktop) + @"\ScraperTexto.txt"); HtmlNode[] nodes = documentParse.DocumentNode.SelectNodes("//a").ToArray(); foreach (HtmlNode obj in nodes) { obj.InnerHtml = Regex.Replace(obj.InnerHtml, @"<[^>]*>", String.Empty); Console.WriteLine(obj.InnerHtml + " "); } Console.Read(); }