예제 #1
0
 /// <summary>
 /// Dla serwisu ceneo.pl pobiera całą zawartość z komentarzami
 /// </summary>
 /// <param name="statistic"></param>
 /// <param name="product"></param>
 /// <returns></returns>
 public Product getPagesContent(IStatisctics statistic, Product product)
 {
     System.Net.WebClient client = new System.Net.WebClient();
     client.Encoding = Encoding.UTF8;
     ILinkProvider provider = new CeneoLinkProvider(m_downloadString);
         string link = "";
         while ((link = provider.getLink()) != "")
         {
             statistic.addDowlodedPage(link);
             string pageContent = client.DownloadString(link);
             pagesContent.Add(pageContent);
         }
         return product;
 }
예제 #2
0
 /// <summary>
 /// Pobieranie kontentów strony komentarzy dla danego produkru z serwisów ceneo.pl i skapice.pl
 /// </summary>
 public HttpCommentGeter(string productId, IStatisctics statistic)
 {
     product = new Product();
     string pageName = "http://www.ceneo.pl/" + productId + "#tab=reviews";
     fillProductPropertis(product, pageName);
     m_webCrawlerCeneo = new CeneoWebCrawler(pageName);
     m_webCrawlerCeneo.getPagesContent( statistic, product);
     ILinkToProductFinder productFinder = new SkapiecLinkToProductFinder();
     string foundProduct = productFinder.getLinkToProduct(product);
     if (foundProduct != null)
     {
         m_webCrawlerSkapiec = new SkapiecWebCrawler("http://www.skapiec.pl" + productFinder.getLinkToProduct(product) + "#opinie");
         m_webCrawlerSkapiec.getPagesContent(statistic, product);
     }
 }
        /// <summary>
        /// Pobiera link do produktu ze strony skapiec.pl na podstawie danych model i producent
        /// </summary>
        /// <param name="product"></param>
        /// <returns></returns>
        public string getLinkToProduct(Product product)
        {
            string productName = product.Brand +" "+ product.Model;
            productName = productName.TrimEnd();
            productName =productName.Replace(' ', '+');
            productName = productName.ToLower();
            int distance = Levenshtein.CalculateDistance(productName, "", 1);
            System.Net.WebClient client = new System.Net.WebClient();
            client.Encoding = Encoding.UTF8;
            Uri uriAddres = new Uri("http://www.skapiec.pl/szukaj/w_calym_serwisie/" + productName);
            client.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0");
            client.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
            client.Headers.Add("Accept-Language", "pl,en-US;q=0.7,en;q=0.3");
            client.Headers.Add("Accept-Encoding", "gzip, deflate");
            client.Headers.Add("Cookie", "PHPSESSID=e82ovdm91g5vobf0700n6k6dk6; skapiec_track=MTE5MTc0NzIyNg%3D%3D; YII_CSRF_TOKEN=8cc33c83714d40df25451e3b10a93f8e675eeae4; _ga=GA1.2.288452357.1451829433; __utmx=197911341.0T-zQrfuTne0iqXdL--tYQ$73259467-63:.DHy8J0MdR82UaXE7-wwR2w$73259467-66:; __utmxx=197911341.0T-zQrfuTne0iqXdL--tYQ$73259467-63:1451829432:15552000.DHy8J0MdR82UaXE7-wwR2w$73259467-66:1451829719:15552000; __utma=197911341.288452357.1451829433.1451829435.1451829435.1; __utmb=197911341.6.9.1451829720200; __utmc=197911341; __utmz=197911341.1451829435.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __cktest=123; groki_uuid=f73327cb-113d-4372-8657-75d9eb124fd2; groki_usid=318888af-58b9-40b8-93ec-281d1d92dd73; __utmv=197911341.|2=UID=Brak=1; __gfp_64b=.XiwTpeFNPvRQK5GN82_ZmEnZo8ft2afgYYTqbCJuTT.07; ea_uuid=201601031457157309300828; SkaPaginationSearchPagination=20");
            var responseStream = new System.IO.Compression.GZipStream(client.OpenRead(uriAddres), System.IO.Compression.CompressionMode.Decompress);
            var reader = new System.IO.StreamReader(responseStream);
            string pageContent = reader.ReadToEnd();
            HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
            htmlDoc.LoadHtml(pageContent);
            HtmlAgilityPack.HtmlNodeCollection nodes = htmlDoc.DocumentNode.SelectNodes("//div[@class=\"partial products js results\"]/div[@class=\"partial box-row js groki click\"]/div[@class =\"box\"] | //div[@class=\"partial products js results\"]/div[@class=\"partial box-row js groki click\"]/div[@class =\"box mono-offer\"]");//("//*[@id=\"body\"]/div[2]/div/div/div[2]/div[3]/div[2]/ol/li/div/div[1]/p");// //body//div[@id='body']class=\"product - review - body\"");
            Tuple<string, string, double> bestFit = new Tuple<string, string, double>("","", Double.MaxValue);
            if (nodes != null)
            {
                foreach (HtmlAgilityPack.HtmlNode node in nodes)
                {
                    HtmlAgilityPack.HtmlNodeCollection bodyNodes = node.SelectNodes(".//a[1]");
                    foreach (HtmlAgilityPack.HtmlNode nodeA in bodyNodes)
                    {
                        string tet = nodeA.InnerHtml.ToLower();
                        if(bestFit.Item3 > Levenshtein.CalculateDistance(productName, tet, 1))
                        {

                            string page = nodeA.GetAttributeValue("href", "");
                            int levenshtein = Levenshtein.CalculateDistance(productName, tet, 1);
                            bestFit = new Tuple<string, string, double>(tet, page, levenshtein);
                        }

                        break;
                    }
                }
            }
            if (bestFit.Item3 < 20)
                return bestFit.Item2;
            else
                return null;
        }
예제 #4
0
 /// <summary>
 /// Metoda pobiera komentarze z pageContent do product
 /// </summary>
 /// <param name="pageContent"></param>
 /// <param name="product"></param>
 public void getCommentsContentFromPage(string pageContent, Product product)
 {
     htmlDoc = new HtmlAgilityPack.HtmlDocument();
     htmlDoc.LoadHtml(pageContent);
     if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
     {
         throw new System.ExecutionEngineException();
         // Handle any parse errors as required
     }
     else
     {
         if (htmlDoc.DocumentNode != null)
         {
             fillProductInfo(product);
         }
     }
 }
예제 #5
0
 /// <summary>
 /// Wypełnienie produktu danymi o nazwie modelu i producenta
 /// </summary>
 /// <param name="product"></param>
 private void fillBrandAndModel(Product product)
 {
     HtmlAgilityPack.HtmlNodeCollection bodyNodes = htmlDoc.DocumentNode.SelectNodes("//nav[@class=\"breadcrumbs\"]//dl//strong");
     if (bodyNodes != null)
     {
         foreach (HtmlAgilityPack.HtmlNode nodeType in bodyNodes)
         {
             string[] brand = nodeType.InnerHtml.Split(' ');
             product.Brand = brand[0];
             //get model
             string model = "";
             for (int i = 1; i < brand.Length; i++)
             {
                 model += brand[i] + " ";
             }
             product.Model = model;
         }
     }
 }
예제 #6
0
        /// <summary>
        /// Pobiera dane z node i sprawdza czy product się nie powtarza
        /// </summary>
        /// <param name="product"></param>
        /// <param name="node"></param>
        /// <returns></returns>
        private bool isCommentExistInProduct(Product product, HtmlNode node)
        {
            HtmlAgilityPack.HtmlNodeCollection bodyNodes = node.SelectNodes(".//div[@class=\"opinion-container\"]//p");
            string commentToParse = "";

            if (bodyNodes != null)
            {
                foreach (HtmlAgilityPack.HtmlNode commentNode in bodyNodes)
                {
                    commentToParse += commentNode.InnerText;
                }
            }
            foreach (CommentDb commentsInDb in product.Comments)
            {
                if (commentsInDb.Comment == null) { continue; }
                if (commentsInDb.Comment.Equals(commentToParse) && commentsInDb.PortalName.Contains("Skapiec"))
                {
                    return true;
                }
            }

            return false;
        }
예제 #7
0
 /// <summary>
 /// Wypełnienie produktu daną o typie
 /// </summary>
 /// <param name="product"></param>
 private void fillType(Product product)
 {
     HtmlAgilityPack.HtmlNodeCollection bodyNodes = htmlDoc.DocumentNode.SelectNodes("//nav[@class=\"breadcrumbs\"]//dd//span[last()]//span");
     if (bodyNodes != null)
     {
         foreach (HtmlAgilityPack.HtmlNode nodeType in bodyNodes)
         {
             product.Type = nodeType.InnerText;
         }
     }
 }
예제 #8
0
 /// <summary>
 /// Metoda dla danego produktu pobiera informacje o produkcie
 /// </summary>
 /// <param name="product"></param>
 private void fillProductInfo(Product product)
 {
     fillComments(product);
 }
예제 #9
0
        /// <summary>
        /// Uzupełnienie komentarza danymi komentarza
        /// </summary>
        /// <param name="product"></param>
        void fillComments(Product product)
        {
            HtmlAgilityPack.HtmlNodeCollection bodyNodes = htmlDoc.DocumentNode.SelectNodes("//ul[@class=\"opinion-list\"]/li");
            if (bodyNodes != null)
            {
                int dd = 0;
                foreach (HtmlAgilityPack.HtmlNode node in bodyNodes)
                {
                    if (!isCommentExistInProduct(product, node))
                    {

                        CommentDb comment = new CommentDb();
                        comment.PortalName = "Skapiec";
                        fillComment(comment, node);
                        product.Comments.Add(comment);

                    }
                    dd++;
                }
            }
        }
예제 #10
0
 /// <summary>
 /// Wypełnia produkt zawartością komentarzy
 /// </summary>
 /// <param name="product"></param>
 public void fillProduct(Product product)
 {
     foreach(string pageContent in pagesContent)
         ceneoParser.getCommentsContentFromPage(pageContent, product);
 }
예제 #11
0
        /// <summary>
        /// Pobiera dane z node i sprawdza czy product się nie powtarza
        /// </summary>
        /// <param name="product"></param>
        /// <param name="node"></param>
        /// <returns></returns>
        private bool isCommentExistInProduct(Product product, HtmlNode node)
        {
            HtmlAgilityPack.HtmlNodeCollection bodyNodes = node.SelectNodes(".//p[@class=\"product-review-body\"]");//("//*[@id=\"body\"]/div[2]/div/div/div[2]/div[3]/div[2]/ol/li/div/div[1]/p");// //body//div[@id='body']class=\"product - review - body\"");
            string commentToParse = "";

            if (bodyNodes != null)
            {
                foreach (HtmlAgilityPack.HtmlNode commentNode in bodyNodes)
                {
                    commentToParse += commentNode.InnerText;
                }
            }
            foreach (CommentDb commentsInDb in product.Comments)
            {
                if (commentsInDb.Comment == null) { continue; }
                if (commentsInDb.Comment.Equals(commentToParse))
                {
                    return true;
                }
            }

            return false;
        }
예제 #12
0
 /// <summary>
 ///  Metoda dla danego produktu pobiera informacje o produkcie
 /// </summary>
 /// <param name="product"></param>
 private void fillProductInfo(Product product)
 {
     fillComments(product);
     fillType(product);
     fillBrandAndModel(product);
 }
예제 #13
0
        /// <summary>
        /// Uzupełnienie komentarza danymi komentarza
        /// </summary>
        /// <param name="product"></param>
        void fillComments(Product product)
        {
            HtmlAgilityPack.HtmlNodeCollection bodyNodes = htmlDoc.DocumentNode.SelectNodes("//ol[@class=\"product-reviews js_product-reviews js_reviews-hook\"]/li");//("//*[@id=\"body\"]/div[2]/div/div/div[2]/div[3]/div[2]/ol/li/div/div[1]/p");// //body//div[@id='body']class=\"product - review - body\"");
            if (bodyNodes != null)
            {
                foreach (HtmlAgilityPack.HtmlNode node in bodyNodes)
                {
                    if (!isCommentExistInProduct(product, node))
                    {

                        CommentDb comment = new CommentDb();
                        comment.PortalName = "Ceneo";
                        fillComment(comment, node);
                        product.Comments.Add(comment);
                    }
                }
            }
        }
예제 #14
0
        /// <summary>
        /// Funcja dodająca proddukt do bd, wraz ze sprawdzaniem czy dany komentarz dla danego produktu istnieje
        /// </summary>
        /// <param name="product"></param>
        /// <param name="statistic"></param>
        private void addProductToDatabase(Product product, IStatisctics statistic)
        {
            try
            {
                using (var db = new DatabaseContext())
                {
                    IQueryable<Product> productsInDb = from p in db.Product
                                                       where
                         p.Brand.Equals(product.Brand) &&
                         p.Model.Equals(product.Model) &&
                         p.Type.Equals(product.Type)
                                                       select p;// select db.Product;//and p.Model.E;

                    if (productsInDb != null)
                    {
                        foreach (Product productInDb in productsInDb)
                        {
                            DateTime time = DateTime.Now;

                            foreach (CommentDb dowloadedProd in product.Comments)
                            {
                                bool contains = productInDb.Comments.Any(x => {
                                    bool returnValue = true;
                                    if (dowloadedProd.Advantages != null && x.Advantages != null)
                                        returnValue &= dowloadedProd.Advantages.Equals(x.Advantages);
                                    if (dowloadedProd.Disadvantages != null && x.Disadvantages != null)
                                        returnValue &= dowloadedProd.Disadvantages.Equals(x.Disadvantages);
                                    if (dowloadedProd.Comment != null && x.Comment != null)
                                        returnValue &= dowloadedProd.Comment.Equals(x.Comment);
                                    if (dowloadedProd.Date != null && x.Date != null)
                                        returnValue &= dowloadedProd.Date.Equals(x.Date);
                                    returnValue &= dowloadedProd.Recommend.Equals(x.Recommend);
                                    returnValue &= dowloadedProd.Stars.Equals(x.Stars);
                                    returnValue &= dowloadedProd.Usability.Equals(x.Usability);
                                    returnValue &= dowloadedProd.UsabilityVotes.Equals(x.UsabilityVotes);
                                    returnValue &= dowloadedProd.Author.Equals(x.Author);
                                    return returnValue;
                                });
                                if (contains) { }
                                else
                                {
                                    dowloadedProd.LoadDate = time;
                                    statistic.addAddedComment(dowloadedProd.Comment);

                                    productInDb.Comments.Add(dowloadedProd);
                                }
                            }
                        }
                        if (productsInDb.Count() == 0)
                        {
                            DateTime time = DateTime.Now;
                            foreach (CommentDb com in product.Comments)
                            {
                                com.LoadDate = time;
                                statistic.addAddedComment(com.Comment);
                            }

                            db.Product.Add(product);
                        }

                    }
                    else
                    {
                        DateTime time = DateTime.Now;
                        foreach (CommentDb com in product.Comments)
                        {
                                com.LoadDate = time;
                                statistic.addAddedComment(com.Comment);
                        }
                        db.Product.Add(product);
                    }
                    db.SaveChanges();
                }
            }
            catch (System.Data.Entity.Validation.DbEntityValidationException ex)
            {
                // Retrieve the error messages as a list of strings.
                var errorMessages = ex.EntityValidationErrors
                        .SelectMany(x => x.ValidationErrors)
                        .Select(x => x.ErrorMessage);

                // Join the list to a single string.
                var fullErrorMessage = string.Join("; ", errorMessages);

                // Combine the original exception message with the new one.
                var exceptionMessage = string.Concat(ex.Message, " The validation errors are: ", fullErrorMessage);

                // Throw a new DbEntityValidationException with the improved exception message.
                throw new System.Data.Entity.Validation.DbEntityValidationException(exceptionMessage, ex.EntityValidationErrors);
            }
        }
예제 #15
0
 /// <summary>
 /// Tłumaczenie nazwy id na nazwę produktu
 /// </summary>
 /// <param name="product"></param>
 /// <param name="pageName"></param>
 private void fillProductPropertis(Product product, string pageName )
 {
     System.Net.WebClient client = new System.Net.WebClient();
     client.Encoding = Encoding.UTF8;
     string pageContent = client.DownloadString(pageName);
     htmlDoc = new HtmlAgilityPack.HtmlDocument();
     htmlDoc.LoadHtml(pageContent);
     if (htmlDoc.ParseErrors != null && htmlDoc.ParseErrors.Count() > 0)
     {
         throw new System.ExecutionEngineException();
         // Handle any parse errors as required
     }
     else
     {
         if (htmlDoc.DocumentNode != null)
         {
             fillBrandAndModel(product);
             fillType(product);
         }
     }
 }