예제 #1
0
        private void ProcessSingleFile(string filePath, int webSiteId)
        {
            try
            {
                if (File.Exists(filePath))
                {
                    string name, address, tel, openTime;
                    List<string> closeday = new List<string>();



                    var info = new HtmlDocument();
                    info.LoadHtml(File.ReadAllText(filePath));
                    var fileId = int.Parse(Path.GetFileNameWithoutExtension(filePath));
                    using (var db = new NCrawlerEntitiesDbServices())
                    {

                        var url = db.CrawlHistory.Single(m => m.Id == fileId).Key;
                        if (db.ProductInfoes.Any(m => m.Url == url))
                        {
                            Console.WriteLine("Duplicate Url:" + url);
                            return;
                        }
                        var product = AddProduct(url, webSiteId);

                        var nameNode = info.DocumentNode.SelectSingleNode("//table//tr//th[contains(text(),'店舗名')]/following-sibling::td//p");
                        if (nameNode != null)
                        {
                            name = nameNode.InnerText;

                            db.ProductKeywords.AddObject(new ProductKeyword()
                            {
                                ProductId = product.ProductInfoId,
                                KeywordTypeId = keywordTypeList.Single(m => m.Name == "店铺名").KeywordTypeId,
                                Value = name
                            });
                        }
                        else
                        {
                            return;
                        }

                        var addressNode = info.DocumentNode.SelectSingleNode("//table//tr//th[contains(text(),'住所')]/following-sibling::td//p");
                        if (addressNode != null)
                        {
                            address = addressNode.InnerText;

                            db.ProductKeywords.AddObject(new ProductKeyword()
                            {
                                ProductId = product.ProductInfoId,
                                KeywordTypeId = keywordTypeList.Single(m => m.Name == "地址").KeywordTypeId,
                                Value = address
                            });
                        }
                        else
                        {
                            return;
                        }
                        var address12Splic = address.Split(add12.ToArray(), StringSplitOptions.RemoveEmptyEntries).ToArray();
                        var add1 = address12Splic[0];
                        var add2 = address12Splic[1];
                        db.ProductKeywords.AddObject(new ProductKeyword()
                        {
                            ProductId = product.ProductInfoId,
                            KeywordTypeId = keywordTypeList.Single(m => m.Name == "都道府県").KeywordTypeId,
                            Value = add1
                        });
                        db.ProductKeywords.AddObject(new ProductKeyword()
                        {
                            ProductId = product.ProductInfoId,
                            KeywordTypeId = keywordTypeList.Single(m => m.Name == "市").KeywordTypeId,
                            Value = add2
                        });

                        var locationNode = info.DocumentNode.SelectSingleNode("//body").Attributes["onload"].Value.Split(';')[0].Replace("ZdcEmapInit", "").Replace("'", "").Replace("(", "").Replace(")", "").Split(',');
                        db.ProductKeywords.AddObject(new ProductKeyword()
                        {
                            ProductId = product.ProductInfoId,
                            KeywordTypeId = keywordTypeList.Single(m => m.Name == "lat").KeywordTypeId,
                            Value = locationNode[0]
                        });
                        db.ProductKeywords.AddObject(new ProductKeyword()
                        {
                            ProductId = product.ProductInfoId,
                            KeywordTypeId = keywordTypeList.Single(m => m.Name == "long").KeywordTypeId,
                            Value = locationNode[1]
                        });


                        var telNode = info.DocumentNode.SelectSingleNode("//table//tr//th[contains(text(),'電話番号')]/following-sibling::td//p");
                        if (telNode != null)
                        {
                            tel = telNode.InnerText;

                            db.ProductKeywords.AddObject(new ProductKeyword()
                            {
                                ProductId = product.ProductInfoId,
                                KeywordTypeId = keywordTypeList.Single(m => m.Name == "电话").KeywordTypeId,
                                Value = tel
                            });
                        }
                        else
                        {
                            return;
                        }
                        var openTimeNode = info.DocumentNode.SelectSingleNode("//table//tr//th[contains(text(),'営業時間')]/following-sibling::td//p[@class='opentime']");
                        if (openTimeNode != null)
                        {
                            openTime = openTimeNode.InnerText;

                            db.ProductKeywords.AddObject(new ProductKeyword()
                            {
                                ProductId = product.ProductInfoId,
                                KeywordTypeId = keywordTypeList.Single(m => m.Name == "营业时间").KeywordTypeId,
                                Value = openTime
                            });
                        }
                        else
                        {
                            return;
                        }
                        var closeDayNode = info.DocumentNode.SelectSingleNode("//table//tr//th[contains(text(),'定休日')]/following-sibling::td");
                        if (closeDayNode != null)
                        {
                            closeday = closeDayNode.InnerHtml.Split(new string[] { "・" }, StringSplitOptions.RemoveEmptyEntries).ToList();

                            if (closeday.Any())
                            {
                                foreach (var i in closeday)
                                {
                                    db.ProductKeywords.AddObject(new ProductKeyword()
                                    {
                                        ProductId = product.ProductInfoId,
                                        KeywordTypeId = keywordTypeList.Single(m => m.Name == "休息日").KeywordTypeId,
                                        Value = i.StripHtml().Trim()
                                    });
                                }
                            }
                        }
                        var serviceNode = info.DocumentNode.SelectNodes("//table//tr//th[contains(text(),'施設・サービス')]/following-sibling::td//p");
                        if (serviceNode != null)
                        {
                            foreach (var i in serviceNode)
                            {
                                db.ProductKeywords.AddObject(new ProductKeyword()
                                {
                                    ProductId = product.ProductInfoId,
                                    KeywordTypeId = keywordTypeList.Single(m => m.Name == "设施服务").KeywordTypeId,
                                    Value = i.InnerHtml.StripHtml().Trim()
                                });
                            }
                        }
                        var productNode = info.DocumentNode.SelectNodes("//table//tr//th[contains(text(),'取扱商品')]/following-sibling::td//p");
                        if (productNode != null)
                        {
                            foreach (var i in productNode)
                            {
                                db.ProductKeywords.AddObject(new ProductKeyword()
                                {
                                    ProductId = product.ProductInfoId,
                                    KeywordTypeId = keywordTypeList.Single(m => m.Name == "商品类型").KeywordTypeId,
                                    Value = i.InnerHtml.StripHtml().Trim()
                                });
                            }
                        }

                        var payNode = info.DocumentNode.SelectNodes("//table//tr//th[contains(text(),'決済方法')]/following-sibling::td//p");
                        if (payNode != null)
                        {
                            foreach (var i in payNode)
                            {
                                db.ProductKeywords.AddObject(new ProductKeyword()
                                {
                                    ProductId = product.ProductInfoId,
                                    KeywordTypeId = keywordTypeList.Single(m => m.Name == "结算方式").KeywordTypeId,
                                    Value = i.InnerHtml.StripHtml().Trim()
                                });
                            }
                        }

                        db.SaveChanges();
                        Console.Write("+");
                    }
                }
            }
            catch (Exception ex)
            {
                Console.Write("-");
                Console.WriteLine("Single product Error:" + ex);
            }
        }
예제 #2
0
 private ProductInfo AddProduct(string url, int siteId)
 {
     ProductInfo result;
     using (var db = new NCrawlerEntitiesDbServices())
     {
         result = db.ProductInfoes.SingleOrDefault(m => m.Url == url);
         if (result == null)
         {
             result = new ProductInfo() { WebsiteId = siteId, Url = url };
             db.ProductInfoes.AddObject(result);
             db.SaveChanges();
         }
         return result;
     }
 }
예제 #3
0
 private List<KeywordType> InitKeyword()
 {
     using (var db = new NCrawlerEntitiesDbServices())
     {
         foreach (var keyword in keywordList)
         {
             if (!db.KeywordTypes.Any(m => m.Name == keyword))
             {
                 db.KeywordTypes.AddObject(new KeywordType() { Name = keyword });
             }
         }
         db.SaveChanges();
         return db.KeywordTypes.ToList();
     }
 }
예제 #4
0
 private Website AddWebSite(int groupId, string domain)
 {
     Website result;
     using (var db = new NCrawlerEntitiesDbServices())
     {
         result = db.Websites.SingleOrDefault(m => m.GroupId == groupId);
         if (result == null)
         {
             result = new Website() { GroupId = groupId, Url = domain };
             db.Websites.AddObject(result);
             db.SaveChanges();
         }
         return result;
     }
 }
예제 #5
0
        private void ProcessSingleFile(string filePath, int webSiteId)
        {
            try
            {
                if (File.Exists(filePath))
                {
                    string barcode, usage, distinguish, shape, saleCompany;
                    List<string> function = new List<string>();
                    List<string> additive = new List<string>();
                    List<string> component = new List<string>();


                    var info = new HtmlDocument();
                    info.LoadHtml(File.ReadAllText(filePath));
                    var fileId = int.Parse(Path.GetFileNameWithoutExtension(filePath));
                    using (var db = new NCrawlerEntitiesDbServices())
                    {

                        var url = db.CrawlHistory.Single(m => m.Id == fileId).Key;
                        if (db.ProductInfoes.Any(m => m.Url == url))
                        {
                            Console.WriteLine("Duplicate Url:" + url);
                            return;
                        }
                        var product = AddProduct(url, webSiteId);

                        var barcodeNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'JAN')]/following-sibling::td");
                        if (barcodeNode != null)
                        {
                            barcode = barcodeNode.InnerText;

                            db.ProductKeywords.AddObject(new ProductKeyword()
                            {
                                ProductId = product.ProductInfoId,
                                KeywordTypeId = keywordTypeList.Single(m => m.Name == "条形码").KeywordTypeId,
                                Value = barcode
                            });
                        }
                        else
                        {
                            return;
                        }

                        var functionNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'効能・効果')]/following-sibling::td");
                        if (functionNode != null)
                        {
                            function = functionNode.InnerHtml.Split(new string[] { "<br>", "、", "・", "●", ",", "。",}, StringSplitOptions.RemoveEmptyEntries).ToList();

                            if (function.Any())
                            {
                                foreach (var i in function)
                                {
                                    db.ProductKeywords.AddObject(new ProductKeyword()
                                    {
                                        ProductId = product.ProductInfoId,
                                        KeywordTypeId = keywordTypeList.Single(m => m.Name == "効能効果").KeywordTypeId,
                                        Value = i.StripHtml().Trim()
                                    });
                                }
                            }
                        }

                        var usageNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'用法・用量')]/following-sibling::td");
                        if (usageNode != null)
                        {
                            usage = usageNode.InnerText;
                            db.ProductKeywords.AddObject(new ProductKeyword()
                            {
                                ProductId = product.ProductInfoId,
                                KeywordTypeId = keywordTypeList.Single(m => m.Name == "用法用量").KeywordTypeId,
                                Value = usage
                            });
                        }

                        var distinguishNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'商品区分')]/following-sibling::td");
                        if (distinguishNode != null)
                        {
                            distinguish = distinguishNode.InnerText;
                            db.ProductKeywords.AddObject(new ProductKeyword()
                            {
                                ProductId = product.ProductInfoId,
                                KeywordTypeId = keywordTypeList.Single(m => m.Name == "商品区分").KeywordTypeId,
                                Value = distinguish
                            });
                        }

                        var shapeNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'剤形')]/following-sibling::td");
                        if (shapeNode != null)
                        {
                            shape = shapeNode.InnerText;
                            db.ProductKeywords.AddObject(new ProductKeyword()
                            {
                                ProductId = product.ProductInfoId,
                                KeywordTypeId = keywordTypeList.Single(m => m.Name == "剂形").KeywordTypeId,
                                Value = shape
                            });
                        }

                        var additiveNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'添加物')]/following-sibling::td");
                        if (additiveNode != null)
                        {
                            additive = additiveNode.InnerHtml.Split(new string[2] { "<br>", "、" }, StringSplitOptions.RemoveEmptyEntries).ToList();
                            if (additive.Any())
                            {
                                foreach (var i in additive)
                                {
                                    db.ProductKeywords.AddObject(new ProductKeyword()
                                    {
                                        ProductId = product.ProductInfoId,
                                        KeywordTypeId = keywordTypeList.Single(m => m.Name == "添加剂").KeywordTypeId,
                                        Value = i
                                    });
                                }
                            }
                        }

                        var componenteNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'成分・分量')]/following-sibling::td");
                        if (componenteNode != null)
                        {
                            component = componenteNode.InnerHtml.Split(new string[2] { "<br>", "、" }, StringSplitOptions.RemoveEmptyEntries).ToList();
                            if (component.Any())
                            {
                                foreach (var i in component)
                                {
                                    db.ProductKeywords.AddObject(new ProductKeyword()
                                    {
                                        ProductId = product.ProductInfoId,
                                        KeywordTypeId = keywordTypeList.Single(m => m.Name == "成分分量").KeywordTypeId,
                                        Value = i
                                    });
                                }
                            }
                        }


                        var saleCompanyNode = info.DocumentNode.SelectSingleNode("//table//tr//td[contains(text(),'製造販売会社')]/following-sibling::td");
                        if (saleCompanyNode != null)
                        {
                            saleCompany = saleCompanyNode.InnerText;
                            db.ProductKeywords.AddObject(new ProductKeyword()
                            {
                                ProductId = product.ProductInfoId,
                                KeywordTypeId = keywordTypeList.Single(m => m.Name == "生产销售公司").KeywordTypeId,
                                Value = saleCompany
                            });
                        }

                        db.SaveChanges();
                        Console.Write("+");
                    }
                }
            }
            catch (Exception ex)
            {
                Console.Write("-");
                Console.WriteLine("Single product Error:" + ex);
            }
        }