// GET: Click public JsonResult Click(string url, string target) { BrowserDriver browser = new BrowserDriver(); browser.GoTo(url); bool good = browser.Click(target); if (!good) { browser.Close(); return Json(good, JsonRequestBehavior.AllowGet); } string source = browser.PageSource; UpdateBase(ref source, url); var result = new { Url = browser.Url, Src = source}; browser.Close(); return Json(result, JsonRequestBehavior.AllowGet); }
public int BeginScrape() { List<Models.Website> websites = websiteDb.Websites.Where(x => x.Domain == HostField).ToList(); string guid = websites[0].GUID; List<Models.Filter> filters = filterDb.Filters.Where(x => x.GUID == guid).ToList(); BrowserDriver driver = new BrowserDriver(); //filters = filters.OrderBy(x => x.Id) //.ThenBy(x => x.Column).ToList(); //Orders the list so that the primarykey is first, if not specified it will organize by column string folderPath = "~/Downloads/" + guid; DirectoryInfo directory = new DirectoryInfo(HttpContext.Current.Server.MapPath(folderPath)); if (directory.Exists) { foreach (FileInfo file in directory.GetFiles()) { file.Delete(); } foreach (DirectoryInfo dir in directory.GetDirectories()) { dir.Delete(true); } directory.Delete(true); System.Threading.Thread.Sleep(1000); } directory.Create(); System.Threading.Thread.Sleep(1000); using (CsvFileWriter writer = new CsvFileWriter(directory.ToString() + "/Output.csv")) { CsvRow row = new CsvRow(); foreach (Models.Filter filter in filters.Where(x => x.Action == 0)) row.Add(filter.Column); writer.WriteRow(row); int rowindex = 2; foreach (string link in LinksField) { driver.GoTo(link); row = new CsvRow(); foreach(Models.Filter filter in filters) { bool skip = false; switch (filter.Action) { case 0: string result = driver.GetElement(filter.Selector, false); if(result != null) { string downloadlink = isFile(result); if(downloadlink != null) { DownloadFile(downloadlink, guid, rowindex, filter.Column); } foreach (Models.Filter igfilter in filterDb.Filters.Where(x => x.Action == 1)) { string ignoreexists = driver.GetElement(igfilter.Selector, true); if(ignoreexists != null) { result = result.Replace(ignoreexists, ""); } } result = Regex.Replace(result, @"\s*(?<capture><(?<markUp>\w+)>.*<\/\k<markUp>>)\s*", "${capture}", RegexOptions.Singleline); result = result.Replace("\t", ""); result = result.Replace("\r", ""); result = result.Replace("\n", ""); row.Add(result); } else { if (filter.Required) skip = true; else row.Add(""); } break; case 1://make this work for the new engine break; case 2: bool didClick = driver.Click(filter.Selector); if (!didClick) if (filter.Required) skip = true; break; default: break; } if (skip) { row = null; break; } } if (row != null) { writer.WriteRow(row); rowindex++; } } } driver.Close(); return 0; }
// GET: Goto public JsonResult Goto(string url) { BrowserDriver browser = new BrowserDriver(); browser.GoTo(url); string source = browser.PageSource; UpdateBase(ref source, url); browser.Close(); return Json(source, JsonRequestBehavior.AllowGet); }