private ScanHistory ScanHistoryInsert(ref WebscrapEntities repository, List <Category> categories, string step, int status = SCAN_RUNNING, string statusText = null) { //create command List <int> commandList = new List <int>(); foreach (Category category in categories) { commandList.Add(category.id); } string command = JsonConvert.SerializeObject(commandList, Newtonsoft.Json.Formatting.None, new JsonSerializerSettings { NullValueHandling = NullValueHandling.Ignore }); int categoryId = commandList.FirstOrDefault(); ScanHistory history = repository.ScanHistories.Create(); history.command = command; history.idCategory = categoryId; history.step = step; history.status = status; history.statusText = statusText; history.insertDate = DateTime.Now; history.updateDate = DateTime.Now; if (repository == null) { repository = new WebscrapEntities(); } repository.ScanHistories.Add(history); repository.SaveChanges(); return(history); }
private void AddToListView(ScanHistory item) { var date = item.date.FromUnixEpoch().ToString("G"); var listViewItem = listView.Items.Add(date); listViewItem.SubItems.Add(item.name); listViewItem.Tag = item; }
private void ScanHistoryUpdate(ref ScanHistory scanHistory, int categoryId, string step, int status, string errorText = null) { scanHistory.idCategory = categoryId; scanHistory.step = step; scanHistory.updateDate = DateTime.Now; scanHistory.status = status; if (errorText != null) { scanHistory.statusText = errorText; } }
// check unstable net private bool CheckStability(ScanHistory history, long threshold) { // ---------------- TEMP -------------------- // List <double> samples = new List <double>(); foreach (ScanResult scan in history.Results) { samples.Add(scan.Avg); } // ---------------- TEMP -------------------- // double std_deviation = Probability.ProbabilityOP.StandardDeviation(samples); Debug.WriteLine("[stabiliy: std_dev: " + std_deviation); if (std_deviation > threshold) { return(false); } return(true); }
// check last result private bool CheckLatency(ScanHistory history, ScanHistory DefaultGatewayHistory, long latency_threshold, long dev_threshold) { double delta = 0; int amount_of_ping_failures = 0; foreach (ScanResult scan in history.Results) { double pure_latency = scan.Avg - DefaultGatewayHistory.Avg; delta += pure_latency - (history.Avg - DefaultGatewayHistory.Avg); amount_of_ping_failures += scan.Failed - history.Results.First().Failed; } delta /= history.Results.Count; bool is_stable = CheckStability(history, dev_threshold); if (delta > latency_threshold || !is_stable || amount_of_ping_failures > 1) { return(false); } return(true); }
public string ProcessTask() { WebscrapEntities repository = new WebscrapEntities(); //get all active websites var websites = repository.Sites.Where(item => item.active.HasValue && item.active == true).ToList(); Random randomGenerator = new Random(); Category category = null; int step = 0; string error = null; foreach (var site in websites) { ColorConverter converter = new ColorConverter(); Color websiteColor = (Color)converter.ConvertFromString(site.color != null ? site.color : "#FFFFFF"); LogCallback("Starting scan for website " + site.name + "(" + site.link + ")", websiteColor); //get all categories of website var categories = site.Categories.Where(item => item.active.HasValue && item.active == true).ToList(); var products = categories.SelectMany(item => item.Products); //get last paused or error scan for that site! ScanHistory scanHistory = repository.ScanHistories.FirstOrDefault(item => item.Category.idSite == site.id && (item.status == SCAN_PAUSED || item.status == SCAN_ERROR)); int categoryIndex = 0; int stepIndex = 1; if (scanHistory == null) { scanHistory = ScanHistoryInsert(ref repository, categories.ToList(), "0"); } else { LogCallback("Detected unfinished scan! Status:" + scanHistory.status + " text:" + scanHistory.statusText + " At category:" + scanHistory.Category.name + "(" + scanHistory.idCategory + ") step:" + scanHistory.step, Color.Orange); Category historyCategory = categories.FirstOrDefault(item => item.id == (scanHistory.idCategory ?? 0)); if (historyCategory != null) { categoryIndex = categories.IndexOf(historyCategory); stepIndex = scanHistory.step.Convert <int>(); } LogCallback("Scanning will resume from category:" + categories[categoryIndex].name + "(" + categories[categoryIndex].id + ") at step:" + stepIndex.ToString(), Color.Orange); } //foreach category scan pages until error found or no product found for (int j = categoryIndex; j < categories.Count(); j++) { category = categories[j]; LogCallback("Scanning category " + category.name + "(" + category.link + ")", websiteColor); step = stepIndex; //set the stepindex back to 1...we need the info only for if we found a scanhistory stepIndex = 1; string webPage = category.link; bool continueScan = true; List <Product> productList = new List <Product>(); do { LogCallback("\tProcess at page " + step.ToString(), websiteColor); int scannedObjects = 0; string link = webPage.Replace(_pageParam, step.ToString()); productList = CompareProductLists(SmartScrapPage(site.id, link, ref error), productList); if (error != null) { ScanHistoryUpdate(ref scanHistory, category.id, step.ToString(), SCAN_ERROR, error); repository.SaveChanges(); LogCallback("Error Encountered in website: " + site.name + " at category:" + category.name + "(" + category.id.ToString() + ") on page" + step.ToString() + "! ERR:" + error, Color.Red); return(error); } //some sleep before stepping forward int sleepTime = randomGenerator.Next(_minSleepTime, _maxSleepTime); Thread.Sleep(sleepTime); if (productList != null && productList.Count > 0) { foreach (Product product in productList) { Product existingProduct = products.Where(item => item.link == product.link).FirstOrDefault(); DateTime now = DateTime.Now; ProductScan pScan = new ProductScan() { date = now }; if (existingProduct == null) { //save product if not found in database! product.insertDate = now; product.updateDate = now; product.idCategory = category.id; product.active = true; pScan.price = product.price; product.ProductScans.Add(pScan); repository.Products.Add(product); scannedObjects++; //category.Products.Add(product); } else { if (existingProduct.active.GetValueOrDefault(false) == true) { //save scan to database existingProduct.updateDate = now; existingProduct.price = product.price; if (product.photoLink != null) { existingProduct.photoLink = product.photoLink; } if (product.siteId != null) { existingProduct.siteId = product.siteId; } pScan.price = product.price; pScan.idProduct = existingProduct.id; repository.ProductScans.Add(pScan); scannedObjects++; //existingProduct.ProductScans.Add(pScan); } } } } else { continueScan = false; } LogCallback("\tPage " + step.ToString() + " finished with " + scannedObjects + " objects scanned.", websiteColor); if (CheckStopSignal()) { ScanHistoryUpdate(ref scanHistory, category.id, (step + 1).ToString(), SCAN_PAUSED, "Paused"); repository.SaveChanges(); LogCallback("Scan with id: " + scanHistory.id.ToString() + " has paused at category:" + category.name + "(" + category.id.ToString() + ") on page" + (step + 1).ToString() + "" + error, Color.Orange); return(null); } if (continueScan) { ScanHistoryUpdate(ref scanHistory, category.id, (step + 1).ToString(), SCAN_RUNNING, "Running"); repository.SaveChanges(); } else { LogCallback("Category " + category.name + " finished with " + step.ToString() + " pages scanned.", websiteColor); } step++; } while (continueScan); } ScanHistoryUpdate(ref scanHistory, category.id, step.ToString(), SCAN_FINISHED, "Finished!"); repository.SaveChanges(); } return(null); }