public async Task<string> ScrapeFromProductPageAsync(string productUrl) { if (productUrl.Contains("Combo")) { var message = "Invalid Product."; this.logger.LogWarning(message); return message; } var document = await this.Context.OpenAsync(productUrl); var ssdDataTableRows = this.GetAllTablesRows(document); var ssdDataTables = this.GetAllTables(document); var ssd = new SSD { Price = this.GetPrice(document), ImageUrl = this.GetImageUrl(document), Category = this.GetCategoryFromUrl(productUrl), }; this.logger.LogInformation(productUrl); foreach (var tableRow in ssdDataTableRows) { var rowName = tableRow.FirstChild.TextContent.Trim(); var rowValue = tableRow.LastElementChild.InnerHtml.Replace("<br><br>", "{n}").Replace("<br>", "{n}").Trim(); switch (rowName) { case "Model": if (this.ssdRepo.AllAsNoTracking().Any(x => x.Model == rowValue)) { var message = "Already exists."; this.logger.LogWarning(message); return message; } ssd.Model = rowValue; break; case "Brand": ssd.Brand = this.GetOrCreateBrand(this.brandRepo, rowValue); break; case "Series": ssd.Series = this.GetOrCreateSeries(this.seriesRepo, rowValue); break; case "Used For": var usage = this.usageRepo.All().FirstOrDefault(x => x.Name == rowValue); if (usage == null) { usage = new DiskForUsage { Name = rowValue, }; } ssd.Usage = usage; break; case "Form Factor": var formFactor = this.formFactorRepo.All().FirstOrDefault(x => x.Name == rowValue); if (formFactor == null) { formFactor = new FormFactor { Name = rowValue, }; } ssd.FormFactor = formFactor; break; case "Capacity": var capacityMatch = this.MatchOneOrMoreDigits.Match(rowValue); if (!capacityMatch.Success) { continue; } var capacity = short.Parse(capacityMatch.Value); if (rowValue.ToLower().Contains("tb")) { capacity *= 1024; } ssd.CapacityGb = capacity; break; case "Memory Components": var memoryComponent = this.memoryComponentRepo.All().FirstOrDefault(x => x.Name == rowValue); if (memoryComponent == null) { memoryComponent = new MemoryComponent { Name = rowValue, }; } ssd.MemoryComponent = memoryComponent; break; case "Interface": var ssdInterface = this.interfaceRepo.All().FirstOrDefault(x => x.Name == rowValue); if (ssdInterface == null) { ssdInterface = new Interface { Name = rowValue, }; } ssd.Interface = ssdInterface; break; case "Cache": var cacheMatch = this.MatchOneOrMoreDigits.Match(rowValue); if (!cacheMatch.Success) { continue; } var cache = int.Parse(cacheMatch.Value); if (rowValue.ToLower().Contains("mb")) { cache *= 1024; } else if (rowValue.ToLower().Contains("gb")) { cache *= 1024 * 1024; } ssd.CacheKb = cache; break; case "Max Sequential Read": var seqReadMatch = this.MatchOneOrMoreDigits.Match(rowValue); if (!seqReadMatch.Success) { continue; } ssd.MaxSequentialReadMBps = short.Parse(seqReadMatch.Value); break; case "Max Sequential Write": var seqWriteMatch = this.MatchOneOrMoreDigits.Match(rowValue); if (!seqWriteMatch.Success) { continue; } ssd.MaxSequentialWriteMBps = short.Parse(seqWriteMatch.Value); break; case "4KB Random Read": ssd.FourKBRandomRead = rowValue; break; case "4KB Random Write": ssd.FourKBRandomWrite = rowValue; break; case "MTBF": var mtbfMatch = this.MatchOneOrMoreDigits.Match(rowValue.Replace(",", string.Empty)); if (!mtbfMatch.Success) { continue; } ssd.MeanTimeBetweenFailures = int.Parse(mtbfMatch.Value); break; case "Features": ssd.Features = rowValue; break; case "Height": ssd.Height = this.MatchAndParseFloat(rowValue); break; case "Width": ssd.Width = this.MatchAndParseFloat(rowValue); break; case "Depth": ssd.Length = this.MatchAndParseFloat(rowValue); break; case "Date First Available": ssd.FirstAvailable = DateTime.Parse(rowValue); break; } } if (ssd.Model == null) { var message = "Invalid Model."; this.logger.LogWarning(message); return message; } await this.ssdRepo.AddAsync(ssd); await this.ssdRepo.SaveChangesAsync(); var successMessage = $"Successfully added {ssd.Model}."; this.logger.LogInformation(successMessage); return successMessage; }
public async Task <string> ScrapeFromProductPageAsync(string productUrl) { if (productUrl.Contains("Combo")) { var message = "Invalid Product."; this.logger.LogWarning(message); return(message); } var document = await this.Context.OpenAsync(productUrl); var hddDataTableRows = this.GetAllTablesRows(document); var hddDataTables = this.GetAllTables(document); var hdd = new HDD { Price = this.GetPrice(document), ImageUrl = this.GetImageUrl(document), Category = this.GetCategoryFromUrl(productUrl), }; this.logger.LogInformation(productUrl); foreach (var tableRow in hddDataTableRows) { var rowName = tableRow.FirstChild.TextContent.Trim(); var rowValue = tableRow.LastElementChild.InnerHtml.Replace("<br><br>", "{n}").Replace("<br>", "{n}").Trim(); switch (rowName) { case "Model": if (this.hddRepo.AllAsNoTracking().Any(x => x.Model == rowValue)) { var message = "Already exists."; this.logger.LogWarning(message); return(message); } hdd.Model = rowValue; break; case "Brand": hdd.Brand = this.GetOrCreateBrand(this.brandRepo, rowValue); break; case "Series": hdd.Series = this.GetOrCreateSeries(this.seriesRepo, rowValue); break; case "Interface": var hddInterface = this.interfaceRepo.All().FirstOrDefault(x => x.Name == rowValue); if (hddInterface == null) { hddInterface = new Interface { Name = rowValue, }; } hdd.Interface = hddInterface; break; case "Capacity": var capacityMatch = this.MatchOneOrMoreDigits.Match(rowValue); if (!capacityMatch.Success) { continue; } var capacity = short.Parse(capacityMatch.Value); if (rowValue.ToLower().Contains("tb")) { capacity *= 1024; } hdd.CapacityGb = capacity; break; case "RPM": var rpmMatch = this.MatchOneOrMoreDigits.Match(rowValue); if (!rpmMatch.Success) { continue; } hdd.RevolutionsPerMinute = short.Parse(rpmMatch.Value); break; case "Cache": var cacheMatch = this.MatchOneOrMoreDigits.Match(rowValue); if (!cacheMatch.Success) { continue; } var cache = int.Parse(cacheMatch.Value); if (rowValue.ToLower().Contains("mb")) { cache *= 1024; } hdd.CacheKb = cache; break; case "Features": hdd.Features = rowValue; break; case "Usage": var usage = this.usageRepo.All().FirstOrDefault(x => x.Name == rowValue); if (usage == null) { usage = new DiskForUsage { Name = rowValue, }; } hdd.Usage = usage; break; case "Form Factor": var formFactor = this.formFactorRepo.All().FirstOrDefault(x => x.Name == rowValue); if (formFactor == null) { formFactor = new FormFactor { Name = rowValue, }; } hdd.FormFactor = formFactor; break; case "Height (maximum)": hdd.Height = this.MatchAndParseFloat(rowValue); break; case "Width (maximum)": hdd.Width = this.MatchAndParseFloat(rowValue); break; case "Length (maximum)": hdd.Length = this.MatchAndParseFloat(rowValue); break; case "Date First Available": hdd.FirstAvailable = DateTime.Parse(rowValue); break; } } if (hdd.Model == null) { var message = "Invalid Model."; this.logger.LogWarning(message); return(message); } await this.hddRepo.AddAsync(hdd); await this.hddRepo.SaveChangesAsync(); var successMessage = $"Successfully added {hdd.Model}."; this.logger.LogInformation(successMessage); return(successMessage); }