/// <summary> /// Pobranie danych z url /// </summary> /// <param name="WebPageModel"></param> /// <returns>"WebPageModel"</returns> public WebPageModel GetDataFromUrl(WebPageModel webPageModel) { try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(webPageModel.urlAdress); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); if (response.StatusCode == HttpStatusCode.OK) { Stream receiveStream = response.GetResponseStream(); webPageModel.htmlDocument = new HtmlAgilityPack.HtmlDocument(); webPageModel.htmlDocument.Load(receiveStream, true); response.Close(); } } catch(Exception e) { if (e is WebException) MessageBox.Show("Strona o podanym adresie url nie istnieje"); else MessageBox.Show("Wystąpił błąd: " + e.Message); } return webPageModel; }
public void request(IRequestMonitor monitor, WebPageModel page, HandWebDataTask handTask, RepeatInfo repeat, string passingData = null) { request(monitor, page, handTask, repeat, new PassingData() { Data = passingData }); }
protected override void OnInitCommands() { SendCustomDataCommand = new DelegateCommand(); AbortCommand = new DelegateCommand(); SendCustomDataCommand.CanExecuteCommand += o => IsTaskEmpty; AbortCommand.CanExecuteCommand += o => !IsTaskEmpty; SendCustomDataCommand.ExecuteCommand += o => { WebPageModel pageTemp = null; try { pageTemp = new WebPageModel("subTask", CustomUrl, CustomTypeOfPost, httpRequestConfig.Encoding.EncodingName, null, null, null); } catch (Exception ex) { return; } if (CustomRepeatEnable) { requester.request(this, pageTemp, customMatchHtmlTask, CustomRepeatInfo, CustomData); } else { requester.request(this, pageTemp, customMatchHtmlTask, CustomData); } }; AbortCommand.ExecuteCommand += o => { requester.cancel(); }; }
// GET: WebPageController/Create public ActionResult Create() { var model = new WebPageModel(); //model.AvailableThemes = _themeService.GetThemes().Select(x => new SelectListItem { Text = x }); return(View(model)); }
//webpage public static WebPageModel ToModel(this WebPage entity, IWebPageService webPageService, ISectionService sectionService) { if (entity == null) { return(null); } bool userIsAdmin = webPageService.IsAdminCurrentUser(); bool userHasEditRights = webPageService.HasEditRights(entity.Id); bool userHasCreateRights = webPageService.HasCreateRights(entity.Id); var model = new WebPageModel() { Id = entity.Id, //SidebarVisible = entity.SidebarEnabled, UserHasCreateRights = userHasCreateRights, AvailableSections = sectionService.GetAvailableSections().Select(x => new SelectListItem { Text = x }) }; foreach (var sectionEntity in entity.Sections) { var sm = GetSectionModel(sectionEntity); sm.Id = sectionEntity.Id; sm.WebPage = sectionEntity.WebPage; sm.UserHasEditRights = userHasEditRights; sm.UserIsAdmin = userIsAdmin; model.Sections.Add(sm); } return(model); }
private void PrepareWebPageModel(WebPage entity, WebPageModel model) { //model.AvailableParentWebPages = _webPageService.GetAll().Where(x => x.Id != page.Id).Select(x => new SelectListItem { Text = x.NavigationName, Value = x.Id.ToString(), Selected = page.ParentId == x.Id }); //model.Roles = GetAllRoles(page); //model.AvailableThemes = _themeService.GetThemes().Select(x => new SelectListItem { Text = x, Selected = page.Theme == x }); //model.CustomAttributes = CustomAttributeHelper.PrepareCustomAttributes(page, _customAttributeService, _customAttributeParser); model.Roles = GetAllRoles(entity); }
public void formatRequest(IRequestMonitor monitor, WebPageModel page, HandWebDataTask handTask, object[] addition, RepeatInfo repeat, params string[] datas) { string data = string.Empty; data = string.Format(page.PostDataFormat, datas); request(monitor, page, handTask, repeat, new PassingData() { Data = data, Addition = addition }); }
/// <summary> /// Uzupełnienie danych - statystyka na temat słów kluczowych /// </summary> /// <param name="WebPageModel"></param> /// <returns>"WebPageModel"</returns> public WebPageModel CompletePageStats(WebPageModel webPageModel) { foreach (string keyWord in webPageModel.listKeyWords) { int amountOfKeyWords = Regex.Matches(webPageModel.body, keyWord).Count; webPageModel.listKeyWordWithStats.Add(new KeyValuePair<string, int>(keyWord, amountOfKeyWords)); } return webPageModel; }
private void PrepareModels(WebPage entity, WebPageModel model) { //var roles = _webPageService.GetRolesByPageId(entity.Id).ToList(); //var roleModelList = new List<WebPageModel.WebPageRoleModel>(); //roles.ForEach(x => //{ // roleModelList.Add(new WebPageModel.WebPageRoleModel { Id = x.Id, Name = x.Name }); //}); //model.Roles = roleModelList; model.Roles = GetAllRoles(entity); }
private Task <HandResult> handLogoutData(WebPageModel page, PassingData passing, ResponseData <string> res) { return(Task.Run(() => { HtmlValue = res.Data; httpRequestConfig.HoleCookieContainer = new CookieContainer(); updateLogin(false); HandDebugMessage("Logout."); return HandResult.HandComplete; })); }
public IActionResult Post([FromBody] WebPageModel model) { if (!ModelState.IsValid) { return(BadRequest(ModelState)); } var entity = model.ToEntity(); entity.Id = 0; _webPageService.Insert(entity); return(NoContent()); }
private Task <HandResult> handHtmlTask(WebPageModel page, PassingData passing, ResponseData <string> res) { return(Task.Run(() => { if (string.IsNullOrEmpty(res.Data)) { return HandResult.RequestResend; } this.HtmlValue = res.Data; return HandResult.HandComplete; })); }
public IActionResult Put(int id, [FromBody] WebPageModel model) { if (!ModelState.IsValid) { return(BadRequest(ModelState)); } var entity = _webPageService.GetById(id); entity = model.ToEntity(entity); _webPageService.Update(entity); return(NoContent()); }
public void request(IRequestMonitor monitor, WebPageModel page, HandWebDataTask handTask, RepeatInfo repeatInfo, PassingData data) { if (page == null) { return; } //Cancel Previous ensureCancelCTS(); cacheCTS = new CancellationTokenSource(); var token = cacheCTS.Token; //Create Monitor if (monitor == null) { monitor = new RequestMonitor(); } //Request var httpRequest = provideRequestTask(page.IsPost); var requestInfo = new HttpRequestInfo() { RequestTask = httpRequest, HttpRequestConfig = httpConfig, RequestMonitor = monitor, HandDataTask = handTask, Page = page, Passing = data, RepeatInfo = repeatInfo, Token = token }; monitor.IsTaskEmpty = false; monitor.IsTaskWaiting = false; Task.Run(() => loopingRequest(requestInfo).ContinueWith(task => { ensureCancelCTS(); monitor.IsTaskWaiting = false; monitor.IsTaskEmpty = true; monitor.IsRequesting = false; if (task.IsCanceled) { monitor.OnTaskCanceled(); } else if (task.IsFaulted) { if (task.Exception.GetBaseException() != null) { monitor.OnFaulted(task.Exception.GetBaseException()); } } })); }
//[ValidateAntiForgeryToken] public ActionResult Create(WebPageModel model) { if (!ModelState.IsValid) { //model.AvailableThemes = _themeService.GetThemes().Select(x => new SelectListItem { Text = x }); return(View(model)); } var page = model.ToEntity(); _webPageService.Insert(page); _urlRecordService.SaveSlug(page, page.VirtualPath, 0); return(RedirectToAction(nameof(Index))); }
public HttpResponseMessage ScrapePage(WebPageModel model) { try { var watch = System.Diagnostics.Stopwatch.StartNew(); //var url = "https://ro.wikipedia.org/wiki/Lista_ora%C8%99elor_din_Rom%C3%A2nia"; var file = new System.IO.StreamWriter(AppDomain.CurrentDomain.BaseDirectory + "RawData\\text.txt"); var htmlWeb = new HtmlWeb() { AutoDetectEncoding = true, UserAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" }; var htmlDocument = htmlWeb.Load(model.Url); var dataTable = GetWikiTable(htmlDocument); if (null == dataTable) { return(new HttpResponseMessage(HttpStatusCode.NotFound)); } var header = GetTableHeaders(dataTable); var contentList = GetTableContent(dataTable); file.Write(new JavaScriptSerializer().Serialize(ParseToJson(header, contentList))); file.Close(); var rowGroupData = ParseToJson(header, contentList) .GroupBy(g => g.Row, (key, value) => new { key, value }); var list = new JavaScriptSerializer().Serialize(rowGroupData); var dataFormatted = JToken.Parse(list).ToString(Formatting.Indented); _db.Data.Add(new DataEntity() { CreatedOn = DateTime.Now, IdCollectionType = (int)CollectionTypeEnum.Wiki, JsonObject = dataFormatted }); _db.SaveChanges(); watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; System.Diagnostics.Debug.WriteLine("Timp wiki scraper: " + elapsedMs); return(Request.CreateErrorResponse(HttpStatusCode.OK, dataFormatted)); } catch (Exception ex) { return(new HttpResponseMessage(HttpStatusCode.InternalServerError)); } }
private Task <HandResult> handLoginData(WebPageModel page, PassingData passing, ResponseData <string> res) { return(Task.Run(() => { HtmlValue = res.Data; if (passing.Addition == null || passing.Addition.Length <= 0) { return HandResult.ForceStopCurrent; } string userName = passing.Addition[0].ToString(); string rightPattern = page.RegexPattern; string wrongPattern = "<p.+?><font.+?><big><b>(.+?)</b></big></font></p>;"; string webData = res.Data; // updateLogin(false); if (string.IsNullOrWhiteSpace(webData) || webData.Length < 10) { //Check if received data is error HandDebugMessage("Received data error/nCheckBkjwLogin()."); return HandResult.RequestResend; } else { //Check data match MatchCollection matches = webData.clearHTMLHead().Matches(rightPattern); if (matches.Count == 1) { GroupCollection group = matches[0].Groups; if (group[2].Value == userName) { updateLogin(true); HandDebugMessage("Login successful!"); return HandResult.HandComplete; } else { HandDebugMessage("User name no match, Request name : " + group[1].Value + "Input name : " + userName + "/nCheckBkjwLogin()"); return HandResult.ForceStopCurrent; } } else { HandDebugMessage(Regex.IsMatch(res.Data, wrongPattern) ? "Wrong user name or password/nCheckBkjwLogin()." : "Data no match wrong pattern/nCheckBkjwLogin()."); return HandResult.ForceStopCurrent; } } })); }
public ActionResult Create(WebPageModel model) { try { if (Request.IsAjaxRequest()) { _unitOfWorkAsync.Repository <WebPageModel>().Insert(model); _unitOfWorkAsync.SaveChanges(); return(PartialView("Details", model)); } return(RedirectToAction("Details", model)); } catch (Exception ex) { return(View("Error", ex.Message)); } }
//[ValidateAntiForgeryToken] public ActionResult Edit(WebPageModel model, IFormCollection form) { var page = _webPageService.GetById(model.Id); if (!ModelState.IsValid) { PrepareWebPageModel(page, model); //model.AvailableThemes = _themeService.GetThemes().Select(x => new SelectListItem { Text = x }); return(View(model)); } if (model.Roles != null) { foreach (var role in model.Roles) { var found = page.Roles.FirstOrDefault(x => x.Name == role.Name); if (found == null) { // new found = new WebPageRole(); found.Name = role.Name; found.CreatedOn = DateTime.Now; found.UpdatedOn = DateTime.Now; page.Roles.Add(found); } found.PermissionLevel = role.PermissionLevel; } } page = model.ToEntity(page); //var webPageAttributes = ParseCustomWebPageAttributes(page, form); //_genericAttributeService.SaveAttribute(page, SystemWebPageAttributeNames.CustomWebPageAttributes, webPageAttributes); ////custom address attributes //var customAttributes = form.ParseCustomAddressAttributes(_customAttributeParser, _customAttributeService); //var customAttributeWarnings = _customAttributeParser.GetAttributeWarnings(customAttributes); //page.CustomAttributes = customAttributes; _webPageService.Update(page); _urlRecordService.SaveSlug(page, page.VirtualPath, 0); return(RedirectToAction("Edit", new { pageId = page.Id })); }
public ActionResult Delete(WebPageModel model) { try { if (Request.IsAjaxRequest()) { _unitOfWorkAsync.Repository <WebPageModel>().Delete(model); _unitOfWorkAsync.SaveChanges(); return(RedirectToAction("Index")); } _unitOfWorkAsync.Repository <WebPageModel>().Delete(model); _unitOfWorkAsync.SaveChanges(); return(RedirectToAction("Index")); } catch (Exception ex) { return(View("Error", ex.Message)); } }
/// <summary> /// Pobranie podstawowych danych o stronie /// </summary> /// <param name="WebPageModel"></param> /// <returns>"WebPageModel"</returns> public WebPageModel CompleteMainKeyWordsData(WebPageModel webPageModel) { HtmlAgilityPack.HtmlNodeCollection nodesKeyWords = webPageModel.htmlDocument.DocumentNode.SelectNodes("//meta[@name='keywords']"); if (nodesKeyWords != null) { foreach (HtmlAgilityPack.HtmlNode nodeKeyWords in nodesKeyWords) { IEnumerable<HtmlAgilityPack.HtmlAttribute> aa = nodeKeyWords.Attributes.AttributesWithName("content"); foreach (HtmlAgilityPack.HtmlAttribute x in aa) { webPageModel.listKeyWords.AddRange(x.Value.Split(',')); } } } webPageModel.body = webPageModel.htmlDocument.DocumentNode.InnerText; return webPageModel; }
protected Task <HandResult> handTableData(WebPageModel page, PassingData passing, ResponseData <string> res) { return(Task.Run(() => { EmptyTableWithoutMonitoring(); if (string.IsNullOrEmpty(page.RegexPattern)) { return HandResult.ForceStopCurrent; } DataTable cacheTable = null; cacheTable = res.Data.RegexToTable("WebData", page.RegexPattern, page.DataHeaders, this.maxMatches); updateDataTable(cacheTable); if (cacheTable == null) { return HandResult.HandedButWrong; } return HandResult.HandComplete; })); }
//public static FileModel GoogleFileToModel(Google.Apis.Drive.v3.Data.File file) //{ // FileModel fileModel = new FileModel(); // fileModel.Id = Guid.NewGuid(); // fileModel.FileName = file.Name; // if (file.HasThumbnail.HasValue && file.HasThumbnail.Value) // fileModel.AvatarPath = file.ThumbnailLink; // fileModel.FullPath = file.Id; // fileModel.SizeInB = file.Size.Value; // fileModel.Description = file.Description; // fileModel.CreatedBy = file.Owners[0].DisplayName; // return fileModel; //} public ArticleModel ToArticle(WebPageModel model) { ArticleModel _view = new ArticleModel(); _view.Id = model.Id; _view.CreatedDate = model.CreatedDate; _view.CreatedBy = model.CreatedBy; _view.Title = model.Title; _view.Content = model.Page.HtmlContent; if (model.Title != null) { _view.Tags = model.Title.ToWords().First(); } else { _view.Tags = string.Empty; } return(_view); }
/// <summary> /// Output dictionary data to console /// </summary> /// <param name="dict"></param> public static string debugOutput(Dictionary <string, SiteModel> models) { StringBuilder sb = new StringBuilder("ModelDictionary count " + models.Count); foreach (var pair in models) { Dictionary <string, WebPageModel> requestDict = pair.Value.WebPageModels; sb.Append( "\nRequestDict count " + requestDict.Count + "\nSite -> " + pair.Key + "\n Site Name -> " + pair.Value.SiteName + "\n Encoding -> " + pair.Value.EncodingName); foreach (var item in requestDict) { WebPageModel rbm = item.Value; sb.Append( "\n key -> " + item.Key + "\n Url -> " + rbm.Url + "\n RegexPattern -> " + rbm.RegexPattern); string[] headers; if ((headers = rbm.DataHeaders) != null) { sb.Append("\n DataHeaders"); foreach (var header in headers) { sb.Append("\n > " + header); } } if (!string.IsNullOrEmpty(item.Value.PostDataFormat)) { sb.Append("\n PostDataFormat"); sb.Append("\n > " + item.Value.PostDataFormat); } } } System.Diagnostics.Debug.WriteLine(sb.ToString()); return(sb.ToString()); }
private Task <HandResult> customMatchHtmlTask(WebPageModel page, PassingData passing, ResponseData <string> res) { return(Task.Run(() => { string webData = res.Data; if (string.IsNullOrEmpty(webData)) { return HandResult.RequestResend; } if (!string.IsNullOrEmpty(CustomPattern)) { if (NotifySettingValue == NotifyNoneValue) { return HandResult.HandComplete; } MatchCollection matches = webData.clearHTMLHeadBody().Matches(CustomPattern); if (matches.Count > 0 && NotifySettingValue == NotifyOnMatchValue) { Messenger.Default.Send(new DialogContent() { Title = "Notify", Content = "New web data match pattern" }, Token); } else if (matches.Count <= 0 && NotifySettingValue == NotifyNotMatchValue) { Messenger.Default.Send(new DialogContent() { Title = "Notify", Content = "New web data not match pattern" }, Token); } } this.HtmlValue = webData; return HandResult.HandComplete; })); }
public ActionResult Delete(WebPageModel model) { try { var page = _webPageService.GetById(model.Id); var sections = _webPageService.GetSectionsByPageId(model.Id); foreach (var section in sections) { _sectionService.Delete(section); } _webPageService.Delete(page); var urlRecord = _urlRecordService.GetBySlug(page.VirtualPath); _urlRecordService.Delete(urlRecord); return(RedirectToAction("Index")); } catch { return(View()); } }
public async Task basicRequest(HttpRequestInfo info) { //Jump out for request cancel info.Token.ThrowIfCancellationRequested(); //Monitor //Basic data WebPageModel page = info.Page; ResponseData <string> received = null; var monitor = info.RequestMonitor; bool requestResend; int wrongDataRetryTimes = WrongReceiveRetryTimes; int webErrorRetryTimes = WebErrorRetryTimes; do { //Init requestResend = false; //Requesting monitor.IsRequesting = true; try { received = await info.RequestTask(info.HttpRequestConfig, page.Url, info.Passing.Data, info.Token); } catch (AggregateException e) { throw e; } catch (WebException e) { if (webErrorRetryTimes > 0) { if (e.Response == null) { throw; } HttpWebResponse response = null; if ((response = (e.Response as HttpWebResponse)) != null) { switch (response.StatusCode) { case HttpStatusCode.NotFound: case HttpStatusCode.InternalServerError: case HttpStatusCode.Forbidden: throw; default: break; } response.Dispose(); } --wrongRetryTimes; requestResend = true; if (monitor.OnRetryRequest(e.Message) == RequestReceipt.Cancel) { break; } } } finally { monitor.IsRequesting = false; } //Jump out for request cancel info.Token.ThrowIfCancellationRequested(); //Hand result if (!requestResend) { if (string.IsNullOrEmpty(received.Data)) { if (wrongDataRetryTimes > 0) { if (monitor.OnRetryRequest(received.ErrorMessage) == RequestReceipt.Cancel) { break; } --wrongDataRetryTimes; requestResend = true; } } else { HandResult handResult = await info.HandDataTask(page, info.Passing, received); if (handResult == HandResult.RequestResend) { requestResend = true; } } } } while (requestResend); }
public static WebPage ToEntity(this WebPageModel model, WebPage destination) { return(model.MapTo(destination)); }
public void request(IRequestMonitor monitor, WebPageModel page, HandWebDataTask handTask, PassingData data) { request(monitor, page, handTask, RepeatInfo.Once, data); }
private void sendRequestOnly(WebPageModel pageModel, string data = null) { requester.request(this, pageModel, handHtmlTask, data); }
public void formatRequest(IRequestMonitor monitor, WebPageModel page, HandWebDataTask handTask, object[] addition, params string[] datas) { formatRequest(monitor, page, handTask, addition, RepeatInfo.Once, datas); }
public HttpResponseMessage Crawl(WebPageModel model) { try { var watch = System.Diagnostics.Stopwatch.StartNew(); var result = new CrawlerResult() { IsMatch = false, Urls = new List <KeyValuePair <string, string> >() }; var htmlWeb = new HtmlWeb() { AutoDetectEncoding = true, UserAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36" }; var htmlDocument = htmlWeb.Load(model.Url); var dataLinks = htmlDocument.DocumentNode.SelectNodes("//a[@href]") .Where(w => LevenshteinDistance(w.InnerText, model.Location) < 2).ToList(); if (!dataLinks.Any()) { return(Request.CreateResponse(HttpStatusCode.OK, result)); } var aElement = dataLinks.FirstOrDefault(w => w.InnerText == model.Location) ?? dataLinks.First(); var hrefValue = aElement.Attributes.Single(w => w.Name == "href").Value; var parsedUrl = new Uri(model.Url); var link = BuildUrl(parsedUrl, hrefValue); var results = SeekPath(link); if (!results.Any()) { return(Request.CreateResponse(HttpStatusCode.OK, result)); } var matchUrl = MatchKey(results, model.Key); if (null == matchUrl) { foreach (var r in results) { result.Urls.Add(new KeyValuePair <string, string>(r, GetLatestDataUrl(BuildUrl(parsedUrl, r)))); } } else { result.IsMatch = true; result.Urls.Add(new KeyValuePair <string, string>(null, GetLatestDataUrl(BuildUrl(parsedUrl, matchUrl)))); } var list = new JavaScriptSerializer().Serialize(result); var dataFormatted = JToken.Parse(list).ToString(Formatting.Indented); _db.Data.Add(new DataEntity() { CreatedOn = DateTime.Now, IdCollectionType = (int)CollectionTypeEnum.Nav, JsonObject = dataFormatted }); _db.SaveChanges(); watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; System.Diagnostics.Debug.WriteLine("Timp crawler: " + elapsedMs); return(Request.CreateResponse(HttpStatusCode.OK, result)); } catch (Exception ex) { return(new HttpResponseMessage(HttpStatusCode.InternalServerError)); } }
public ActionResult Create(string url) { WebPageModel _page = new WebPageModel(url); return(View(_page)); }