public void Mapping() { var keywordTypes = new List<KeywordType>(); var shopIds = new List<int>(); using (var db = new NCrawlerEntitiesDbServices()) { shopIds = db.ProductInfoes.Where(m => m.WebsiteId == 4).Select(m => m.ProductInfoId).ToList(); keywordTypes = db.KeywordTypes.ToList(); } var shopNameId = keywordTypes.First(m => m.Name == keywordList[0]).KeywordTypeId; var shopAddressId = keywordTypes.First(m => m.Name == keywordList[1]).KeywordTypeId; var shopTelId = keywordTypes.First(m => m.Name == keywordList[2]).KeywordTypeId; var shopOpenTimeId = keywordTypes.First(m => m.Name == keywordList[3]).KeywordTypeId; var shopCloseDayId = keywordTypes.First(m => m.Name == keywordList[4]).KeywordTypeId; var shopServiceId = keywordTypes.First(m => m.Name == keywordList[5]).KeywordTypeId; //var shopPcId = keywordTypes.First(m => m.Name == keywordList[6]).KeywordTypeId; var shopPayId = keywordTypes.First(m => m.Name == keywordList[7]).KeywordTypeId; var shopStateId = keywordTypes.First(m => m.Name == keywordList[8]).KeywordTypeId; var shopCityId = keywordTypes.First(m => m.Name == keywordList[9]).KeywordTypeId; var shopLatId = keywordTypes.First(m => m.Name == keywordList[10]).KeywordTypeId; var shopLongId = keywordTypes.First(m => m.Name == keywordList[11]).KeywordTypeId; if (shopIds.Any()) { foreach (var shopId in shopIds) { try { var _repoManager = new RepositoryManager(); using (var db = new NCrawlerEntitiesDbServices()) { var retailShop = new RetailerShop(); var keywords = db.ProductKeywords.Where(m => m.ProductId == shopId); var name = keywords.FirstOrDefault(m => m.KeywordTypeId == shopNameId); if (name != null) { retailShop.Name = name.Value; } var address = keywords.FirstOrDefault(m => m.KeywordTypeId == shopAddressId); if (address != null) { retailShop.Address = address.Value; } var tel = keywords.FirstOrDefault(m => m.KeywordTypeId == shopTelId); if (tel != null) { retailShop.Phone = tel.Value; } var openTime = keywords.FirstOrDefault(m => m.KeywordTypeId == shopOpenTimeId); if (openTime != null) { retailShop.OpenTime = openTime.Value; } var closeDay = keywords.Where(m => m.KeywordTypeId == shopCloseDayId).ToList(); if (closeDay.Any()) { retailShop.CloseDay = closeDay.Select(m => m.Value).Aggregate((m1, m2) => m1 + "," + m2); } var service = keywords.Where(m => m.KeywordTypeId == shopServiceId).ToList(); var taxFree = false; if (service.Any()) { //retailShop.Service = service.Select(m => m.Value).Aggregate((m1, m2) => m1 + "," + m2); var serviceString = service.Select(m => m.Value).Aggregate((m1, m2) => m1 + "," + m2); if (serviceString.Contains("免税")) { taxFree = true; } } var unionPay = false; var pay = keywords.Where(m => m.KeywordTypeId == shopPayId).ToList(); if (pay.Any()) { //retailShop.PayWay = pay.Select(m => m.Value).Aggregate((m1, m2) => m1 + "," + m2); var payString = pay.Select(m => m.Value).Aggregate((m1, m2) => m1 + "," + m2); if (payString.Contains("銀聯")) { unionPay = true; } } var lat = keywords.FirstOrDefault(m => m.KeywordTypeId == shopLatId); if (lat != null) { retailShop.Latitude = float.Parse(lat.Value); } var longt = keywords.FirstOrDefault(m => m.KeywordTypeId == shopLongId); if (longt != null) { retailShop.Longitude = float.Parse(longt.Value); } var state = keywords.FirstOrDefault(m => m.KeywordTypeId == shopStateId); if (state != null) { var stateI = _repoManager.StateRepository.Table.FirstOrDefault(m => m.Name == state.Value); if (stateI == null) { stateI = new State() { Name = state.Value }; _repoManager.StateRepository.Create(stateI); _repoManager.StateRepository.Save(); } //retailShop.StateId = stateI.StateId; var city = keywords.FirstOrDefault(m => m.KeywordTypeId == shopCityId); if (city != null) { var cityI = _repoManager.CityRepository.Table.FirstOrDefault(m => m.Name == city.Value); if (cityI == null) { cityI = new City() { Name = city.Value, StateId = stateI.StateId }; _repoManager.CityRepository.Create(cityI); _repoManager.CityRepository.Save(); } retailShop.CityId = cityI.CityId; } } retailShop.Enabled = true; retailShop.TaxFree = taxFree; retailShop.Unionpay = unionPay; retailShop.RetailerId = 1; _repoManager.RetailerShopRepository.Create(retailShop); _repoManager.RetailerShopRepository.Save(); //var saleCategory = keywords.Where(m => m.KeywordTypeId == shopPcId); //if (saleCategory.Any()) //{ // foreach (var sc in saleCategory) // { // var scI = _repoManager.SaleProductCategoryRepository.Table.FirstOrDefault(m => m.Name == sc.Value); // if (scI == null) // { // scI = new SaleProductCategory() { Name = sc.Value }; // _repoManager.SaleProductCategoryRepository.Create(scI); // _repoManager.SaleProductCategoryRepository.Save(); // } // if (!_repoManager.RetailShopSaleProductCategoryRepository.Table.Any(m => m.RetailShopId == retailShop.RetailShopId && m.RetailShopSaleProductCategoryId == scI.SaleProductCategoryId)) // { // _repoManager.RetailShopSaleProductCategoryRepository.Create(new RetailShopSaleProductCategory() { RetailShopId = retailShop.RetailShopId, SaleProductCategoryId = scI.SaleProductCategoryId }); // _repoManager.RetailShopSaleProductCategoryRepository.Save(); // } // } //} } } catch (Exception ex) { Console.WriteLine(ex); } Console.Write("+"); } } }
private void ProcessSingleFile(string filePath, int webSiteId) { try { if (File.Exists(filePath)) { string name, address, tel, openTime; List<string> closeday = new List<string>(); var info = new HtmlDocument(); info.LoadHtml(File.ReadAllText(filePath)); var fileId = int.Parse(Path.GetFileNameWithoutExtension(filePath)); using (var db = new NCrawlerEntitiesDbServices()) { var url = db.CrawlHistory.Single(m => m.Id == fileId).Key; if (db.ProductInfoes.Any(m => m.Url == url)) { Console.WriteLine("Duplicate Url:" + url); return; } var product = AddProduct(url, webSiteId); var nameNode = info.DocumentNode.SelectSingleNode("//table//tr//th[contains(text(),'店舗名')]/following-sibling::td//p"); if (nameNode != null) { name = nameNode.InnerText; db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "店铺名").KeywordTypeId, Value = name }); } else { return; } var addressNode = info.DocumentNode.SelectSingleNode("//table//tr//th[contains(text(),'住所')]/following-sibling::td//p"); if (addressNode != null) { address = addressNode.InnerText; db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "地址").KeywordTypeId, Value = address }); } else { return; } var address12Splic = address.Split(add12.ToArray(), StringSplitOptions.RemoveEmptyEntries).ToArray(); var add1 = address12Splic[0]; var add2 = address12Splic[1]; db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "都道府県").KeywordTypeId, Value = add1 }); db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "市").KeywordTypeId, Value = add2 }); var locationNode = info.DocumentNode.SelectSingleNode("//body").Attributes["onload"].Value.Split(';')[0].Replace("ZdcEmapInit", "").Replace("'", "").Replace("(", "").Replace(")", "").Split(','); db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "lat").KeywordTypeId, Value = locationNode[0] }); db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "long").KeywordTypeId, Value = locationNode[1] }); var telNode = info.DocumentNode.SelectSingleNode("//table//tr//th[contains(text(),'電話番号')]/following-sibling::td//p"); if (telNode != null) { tel = telNode.InnerText; db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "电话").KeywordTypeId, Value = tel }); } else { return; } var openTimeNode = info.DocumentNode.SelectSingleNode("//table//tr//th[contains(text(),'営業時間')]/following-sibling::td//p[@class='opentime']"); if (openTimeNode != null) { openTime = openTimeNode.InnerText; db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "营业时间").KeywordTypeId, Value = openTime }); } else { return; } var closeDayNode = info.DocumentNode.SelectSingleNode("//table//tr//th[contains(text(),'定休日')]/following-sibling::td"); if (closeDayNode != null) { closeday = closeDayNode.InnerHtml.Split(new string[] { "・" }, StringSplitOptions.RemoveEmptyEntries).ToList(); if (closeday.Any()) { foreach (var i in closeday) { db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "休息日").KeywordTypeId, Value = i.StripHtml().Trim() }); } } } var serviceNode = info.DocumentNode.SelectNodes("//table//tr//th[contains(text(),'施設・サービス')]/following-sibling::td//p"); if (serviceNode != null) { foreach (var i in serviceNode) { db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "设施服务").KeywordTypeId, Value = i.InnerHtml.StripHtml().Trim() }); } } var productNode = info.DocumentNode.SelectNodes("//table//tr//th[contains(text(),'取扱商品')]/following-sibling::td//p"); if (productNode != null) { foreach (var i in productNode) { db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "商品类型").KeywordTypeId, Value = i.InnerHtml.StripHtml().Trim() }); } } var payNode = info.DocumentNode.SelectNodes("//table//tr//th[contains(text(),'決済方法')]/following-sibling::td//p"); if (payNode != null) { foreach (var i in payNode) { db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "结算方式").KeywordTypeId, Value = i.InnerHtml.StripHtml().Trim() }); } } db.SaveChanges(); Console.Write("+"); } } } catch (Exception ex) { Console.Write("-"); Console.WriteLine("Single product Error:" + ex); } }
private Website AddWebSite(int groupId, string domain) { Website result; using (var db = new NCrawlerEntitiesDbServices()) { result = db.Websites.SingleOrDefault(m => m.GroupId == groupId); if (result == null) { result = new Website() { GroupId = groupId, Url = domain }; db.Websites.AddObject(result); db.SaveChanges(); } return result; } }
private ProductInfo AddProduct(string url, int siteId) { ProductInfo result; using (var db = new NCrawlerEntitiesDbServices()) { result = db.ProductInfoes.SingleOrDefault(m => m.Url == url); if (result == null) { result = new ProductInfo() { WebsiteId = siteId, Url = url }; db.ProductInfoes.AddObject(result); db.SaveChanges(); } return result; } }
private List<KeywordType> InitKeyword() { using (var db = new NCrawlerEntitiesDbServices()) { foreach (var keyword in keywordList) { if (!db.KeywordTypes.Any(m => m.Name == keyword)) { db.KeywordTypes.AddObject(new KeywordType() { Name = keyword }); } } db.SaveChanges(); return db.KeywordTypes.ToList(); } }
private void ProcessSingleFile(string filePath) { try { var repo = new RepositoryManager(); if (File.Exists(filePath)) { string title, corver, body, bodyHtml; var info = new HtmlDocument(); info.LoadHtml(File.ReadAllText(filePath)); var fileId = int.Parse(Path.GetFileNameWithoutExtension(filePath)); using (var db = new NCrawlerEntitiesDbServices()) { var url = db.CrawlHistory.Single(m => m.Id == fileId).Key; //if(repo.NArticleRepository.Table.Any(m=>m.SourceUrl==url)) // return; if (db.ProductInfoes.Any(m => m.Url == url)) { Console.WriteLine("Duplicate Url:" + url); return; } //Todo add article //var product = AddArticle(url); var titleNode = info.DocumentNode.SelectNodes( "//head//title"); if (titleNode != null) { title = titleNode.First().InnerText; } else { return; } var imageNode = info.DocumentNode.SelectSingleNode( "//meta[@property='og:image']"); if (imageNode != null) { corver = imageNode.Attributes["content"].Value; } else { return; } var bodyNode = info.DocumentNode.SelectSingleNode( "//article[@class='news-detail']//div[@class='body']//section//div[@class='inner']"); if (bodyNode != null) { body = bodyNode.InnerText; bodyHtml = bodyNode.InnerHtml; } else { return; } var article = new NArticle() { Body = body, BodyHtml = bodyHtml, CorverUrl = corver, CreateDate = DateTime.Now, PublicshDate = DateTime.Now, SourceUrl = url, Title = title }; //repo.NArticleRepository.Create(article); //repo.NArticleRepository.Save(); var imagesNode = info.DocumentNode.SelectNodes("//article[@class='news-detail']//div[@class='body']//img"); if (imagesNode != null && imagesNode.Any()) { foreach (var image in imagesNode) { var imageUrl = image.Attributes["src"].Value; //repo.NArticleImageRepository.Create(new NArticleImage() { NArticleId = article.NArticleId, Url = imageUrl }); } //repo.NArticleRepository.Save(); } Console.Write("+"); } } } catch (Exception ex) { Console.Write("-"); Console.WriteLine("Single product Error:" + ex); } }
private void ProcessSingleFile(string filePath) { try { var repo = new RepositoryManager(); if (File.Exists(filePath)) { string title, bodyHtml; var info = new HtmlDocument(); info.LoadHtml(File.ReadAllText(filePath)); var fileId = int.Parse(Path.GetFileNameWithoutExtension(filePath)); using (var db = new NCrawlerEntitiesDbServices()) { var url = db.CrawlHistory.Single(m => m.Id == fileId).Key; //if(repo.NArticleRepository.Table.Any(m=>m.SourceUrl==url)) // return; if (db.ProductInfoes.Any(m => m.Url == url)) { Console.WriteLine("Duplicate Url:" + url); return; } //Todo add article //var product = AddArticle(url); var titleNode = info.DocumentNode.SelectNodes( "//h1[@class='title']"); if (titleNode != null) { title = titleNode.First().InnerText; } else { return; } var bodyNode = info.DocumentNode.SelectSingleNode( "//article[@class='entry']"); if (bodyNode != null) { bodyHtml = bodyNode.InnerHtml; } else { return; } var article = new Article() { Body = bodyHtml, Title = title, ArticleTypeId = 4, CreatedDate = DateTime.Now, ModifiedBy = "Admin", CreatedBy = "Admin", ModifiedDate = DateTime.Now, IsPublished = true }; repo.ArticleRepository.Create(article); repo.ArticleRepository.Save(); var articleFolder = "article"; var articleFolderS = articleFolder + "/" + article.ArticleId; var articleFolderST = articleFolderS + "/thumbnails"; Directory.CreateDirectory(articleFolderST); var imagesNode = info.DocumentNode.SelectNodes("//article[@class='entry']//img[contains(@src,'.jpg')]"); if (imagesNode != null && imagesNode.Any()) { foreach (var image in imagesNode) { try { var imageUrl = image.Attributes["src"].Value; var imageName = Guid.NewGuid(); if (imageUrl != null) { var storePath = article.ArticleId + "/" + imageName + Path.GetExtension(imageUrl); var storeTPath = article.ArticleId + "/thumbnails/" + imageName + Path.GetExtension(imageUrl); var c = new WebClient(); c.DownloadFile(imageUrl, articleFolder + "/" + storePath); c.DownloadFile(imageUrl, articleFolder + "/" + storeTPath); repo.ArticleImageRepository.Create(new ArticleImage() { ArticleId = article.ArticleId, Url = storePath, ThumbnailUrl = storeTPath, IsCoverImage = true }); } } catch (Exception ex) { Console.WriteLine(ex); throw; } } repo.ArticleImageRepository.Save(); } Console.Write("+"); } } } catch (Exception ex) { Console.Write("-"); Console.WriteLine("Single product Error:" + ex); } }
private void ProcessSingleFile(string filePath, int webSiteId) { try { if (File.Exists(filePath)) { string barcode, usage, distinguish, shape, saleCompany; List<string> function = new List<string>(); List<string> additive = new List<string>(); List<string> component = new List<string>(); var info = new HtmlDocument(); info.LoadHtml(File.ReadAllText(filePath)); var fileId = int.Parse(Path.GetFileNameWithoutExtension(filePath)); using (var db = new NCrawlerEntitiesDbServices()) { var url = db.CrawlHistory.Single(m => m.Id == fileId).Key; if (db.ProductInfoes.Any(m => m.Url == url)) { Console.WriteLine("Duplicate Url:" + url); return; } var product = AddProduct(url, webSiteId); var barcodeNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'JAN')]/following-sibling::td"); if (barcodeNode != null) { barcode = barcodeNode.InnerText; db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "条形码").KeywordTypeId, Value = barcode }); } else { return; } var functionNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'効能・効果')]/following-sibling::td"); if (functionNode != null) { function = functionNode.InnerHtml.Split(new string[] { "<br>", "、", "・", "●", ",", "。",}, StringSplitOptions.RemoveEmptyEntries).ToList(); if (function.Any()) { foreach (var i in function) { db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "効能効果").KeywordTypeId, Value = i.StripHtml().Trim() }); } } } var usageNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'用法・用量')]/following-sibling::td"); if (usageNode != null) { usage = usageNode.InnerText; db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "用法用量").KeywordTypeId, Value = usage }); } var distinguishNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'商品区分')]/following-sibling::td"); if (distinguishNode != null) { distinguish = distinguishNode.InnerText; db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "商品区分").KeywordTypeId, Value = distinguish }); } var shapeNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'剤形')]/following-sibling::td"); if (shapeNode != null) { shape = shapeNode.InnerText; db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "剂形").KeywordTypeId, Value = shape }); } var additiveNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'添加物')]/following-sibling::td"); if (additiveNode != null) { additive = additiveNode.InnerHtml.Split(new string[2] { "<br>", "、" }, StringSplitOptions.RemoveEmptyEntries).ToList(); if (additive.Any()) { foreach (var i in additive) { db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "添加剂").KeywordTypeId, Value = i }); } } } var componenteNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'成分・分量')]/following-sibling::td"); if (componenteNode != null) { component = componenteNode.InnerHtml.Split(new string[2] { "<br>", "、" }, StringSplitOptions.RemoveEmptyEntries).ToList(); if (component.Any()) { foreach (var i in component) { db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "成分分量").KeywordTypeId, Value = i }); } } } var saleCompanyNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'製造販売会社')]/following-sibling::td"); if (saleCompanyNode != null) { saleCompany = saleCompanyNode.InnerText; db.ProductKeywords.AddObject(new ProductKeyword() { ProductId = product.ProductInfoId, KeywordTypeId = keywordTypeList.Single(m => m.Name == "生产销售公司").KeywordTypeId, Value = saleCompany }); } db.SaveChanges(); Console.Write("+"); } } } catch (Exception ex) { Console.Write("-"); Console.WriteLine("Single product Error:" + ex); } }
public void Mapping() { var _repoManager = new RepositoryManager(); var punchedBarcode = new List<string>(); using (var db = new NCrawlerEntitiesDbServices()) { var productBarcode = _repoManager.ProductRepository.Table.Select(m => m.BarCode.ToString()).ToList(); var crawlBarcode = db.ProductKeywords.Where(m => m.KeywordTypeId == 1).Select(m => m.Value.Substring(0, 13)).ToList(); punchedBarcode = crawlBarcode.Intersect(productBarcode).ToList(); Console.WriteLine(punchedBarcode.Count); } foreach (var barcode in punchedBarcode) { using (var db = new NCrawlerEntitiesDbServices()) { var productId = db.ProductKeywords.First(m => m.KeywordTypeId == 1 && m.Value.Substring(0, 13) == barcode) .ProductId; var functions = db.ProductKeywords.Where(m => m.KeywordTypeId == 2 && m.ProductId == productId).ToList(); var pId = _repoManager.ProductRepository.Table.First(m => m.BarCode == barcode).ProductId; if (functions.Any()) { foreach (var f in functions) { if (f.Value.Length<20) { var tag = _repoManager.TagRepository.Table.FirstOrDefault(m => m.Name == f.Value); if (tag == null) { tag = new Tag() { Name = f.Value }; _repoManager.TagRepository.Create(tag); _repoManager.TagRepository.Save(); Console.Write("+"); } else { Console.Write("-"); } if (!_repoManager.ProductTagRepository.Table.Any(m=>m.ProductId==pId&&m.TagId==tag.TagId)) { _repoManager.ProductTagRepository.Create(new ProductTag() { ProductId = pId, TagId = tag.TagId }); _repoManager.ProductRepository.Save(); } } } } Console.WriteLine("!"); } } }