static void ParseDataSub(string data, string name, distroTrend.Model.Distro distro)
        {
            string description = string.Empty;
            string imageSrc    = string.Empty;

            var doc = new HtmlAgilityPack.HtmlDocument();

            doc.LoadHtml(data);

            var nodesMain = doc.DocumentNode.SelectNodes(@"//td[@class='TablesTitle']/a");

            foreach (HtmlNode node in nodesMain)
            {
                if (node.ParentNode.ChildNodes.Count > 10)
                {
                    description = node.ParentNode.ChildNodes[10].InnerText;
                }

                var nodes = doc.DocumentNode.SelectNodes(@"//img");

                foreach (HtmlNode nodeImg in nodes)
                {
                    string search = "title=" + "\"" + name + "\"";
                    if (nodeImg.OuterHtml.Contains(search))
                    {
                        HtmlAttribute img = nodeImg.Attributes.Where(i => i.Name == "src").FirstOrDefault();
                        distro.ImageURL = img.Value;
                    }
                }
            }

            distro.Description = description;
        }
        static void UpdateDB(List <distroTrend.Model.Distro> listDistro)
        {
            BLL.Distro distroBL = new BLL.Distro();

            string sqlConn = System.Configuration.ConfigurationManager.AppSettings["dbConnection"];

            string message = string.Empty;

            foreach (distroTrend.Model.Distro distro in listDistro)
            {
                message = string.Empty;

                distroTrend.Model.Distro distroDb = distroBL.GetDistro(distro.Code, sqlConn);

                if (distroDb == null)
                {
                    message = distro.Name + " is not found in DB. Inserting...";
                }
                else
                {
                    if (distroDb.Description != distro.Description || distroDb.ImageURL != distro.ImageURL)
                    {
                        message = distro.Name + " found in DB but details are outdated. Updating...";
                        distroBL.Update(sqlConn, distroDb.Id, distro);
                    }
                }

                if (!string.IsNullOrEmpty(message))
                {
                    logger.Info(message);
                    Console.WriteLine(message);
                }
            }
        }
        static void SetSubData(string dwCode, string name, distroTrend.Model.Distro distro)
        {
            string   url  = "https://distrowatch.com/table.php?distribution=" + dwCode;
            DateTime dt   = DateTime.Now;
            string   data = GetWebSiteData(url);

            ParseDataSub(data, name, distro);
        }
        static List <distroTrend.Model.Distro> CrawlData()
        {
            string   url  = "https://distrowatch.com/dwres.php?resource=popularity";
            DateTime dt   = DateTime.Now;
            string   data = GetWebSiteData(url);

            Console.WriteLine("Parsing data started.");
            //logger.Debug(data);
            List <distroTrend.Model.Distro> listDistro = ParseData(data);

            Console.WriteLine("Parsing data completed.");

            string fileName = @"C:\temp\distro.csv";

            List <distroTrend.Model.Distro> listDistroCSV = new List <distroTrend.Model.Distro>();

            //Transform list for CSV.
            foreach (distroTrend.Model.Distro distro in listDistro)
            {
                distroTrend.Model.Distro distroCSV = new distroTrend.Model.Distro();
                distroCSV.Id          = distro.Id;
                distroCSV.Code        = distro.Code;
                distroCSV.Name        = distro.Name;
                distroCSV.Description = "\"" + distro.Description.Replace("\"", "\"\"") + "\"";
                distroCSV.HomePage    = distro.HomePage;
                distroCSV.ImageURL    = distro.ImageURL;

                //Download Image.
                if (!string.IsNullOrEmpty(distro.ImageURL))
                {
                    string urlImage = "https://distrowatch.com/" + distro.ImageURL;

                    string directoryName = Path.GetDirectoryName(distro.ImageURL);

                    if (!System.IO.Directory.Exists(directoryName))
                    {
                        System.IO.Directory.CreateDirectory(directoryName);
                    }
                    using (WebClient client = new WebClient())
                    {
                        client.DownloadFile(new Uri(urlImage), distro.ImageURL);
                    }
                }

                listDistroCSV.Add(distroCSV);
            }

            Helper.Utility.WriteCSV(listDistroCSV, fileName);

            Console.WriteLine("Date Exported to " + fileName);
            string message = "Data was extracted in " + (DateTime.Now - dt).Seconds + " secs.";

            logger.Info(message);
            Console.WriteLine(message);

            return(listDistro);
        }
        static List <distroTrend.Model.Distro> ParseData(string data)
        {
            List <distroTrend.Model.Distro> listDistro = new List <distroTrend.Model.Distro>();
            var doc = new HtmlAgilityPack.HtmlDocument();

            doc.LoadHtml(data);

            var nodes = doc.DocumentNode.SelectNodes(@"//td[@class='phr2']/a");

            int counter = 0;

            foreach (HtmlNode node in nodes)
            {
                counter++;
                string title = node.InnerText;
                distroTrend.Model.Distro distro = new distroTrend.Model.Distro();

                string[] token  = node.OuterHtml.Split(new char[] { '\"' }, StringSplitOptions.RemoveEmptyEntries);
                string   dwCode = string.Empty;
                if (token.Length > 1)
                {
                    dwCode = token[1];
                }

                distro.Code = GetCode(title);
                distro.Name = title;
                SetSubData(dwCode, name: title, distro);

                logger.Debug("Code=" + distro.Code + ", Name=" + distro.Name + ", Desc=" + distro.Description);

                if (!listDistro.Exists(x => x.Name == title))
                {
                    listDistro.Add(distro);
                }

                string message = counter + ". data Parsed for " + distro.Name;
                Console.WriteLine(message);

                //TODO: Temporary Break.
                if (listDistro.Count > 30)
                {
                    break;
                }
            }

            return(listDistro);
        }
Beispiel #6
0
        public int Update(string connString, int id, distroTrend.Model.Distro distro)
        {
            DBConn conn  = new DBConn();
            String query = "UPDATE [dbo].[tbl_Distro] SET [Description] = @Description, ImageURL = @ImageURL WHERE Id = @Id";

            List <SqlParameter> sp = new List <SqlParameter>()
            {
                new SqlParameter()
                {
                    ParameterName = "@Id", SqlDbType = SqlDbType.Int, Value = id
                },
                new SqlParameter()
                {
                    ParameterName = "@Description", SqlDbType = SqlDbType.NVarChar, Value = distro.Description
                },
                new SqlParameter()
                {
                    ParameterName = "@ImageURL", SqlDbType = SqlDbType.NVarChar, Value = distro.ImageURL
                }
            };

            return(conn.UpdateData(connString, query, sp));
        }
        public int Update(string connString, int id, distroTrend.Model.Distro distro)
        {
            DAL.Distro objDistro = new DAL.Distro();

            return(objDistro.Update(connString, id, distro));
        }
        static List <distroTrend.Model.Points> ParseDataDWPoints(string data, string connString)
        {
            BLL.Distro distroBL = new BLL.Distro();
            List <distroTrend.Model.Distro> listDistro       = distroBL.GetDistro(connString);
            List <distroTrend.Model.Points> listDistroPoints = new List <distroTrend.Model.Points>();
            var doc = new HtmlAgilityPack.HtmlDocument();

            doc.LoadHtml(data);

            var nodes = doc.DocumentNode.SelectNodes(@"//td[@class='phr2']/a");

            int counter = 0;

            foreach (HtmlNode node in nodes)
            {
                counter++;
                string   title  = node.InnerText;
                string[] token  = node.OuterHtml.Split(new char[] { '\"' }, StringSplitOptions.RemoveEmptyEntries);
                string   dwCode = string.Empty;
                if (token.Length > 1)
                {
                    dwCode = token[1];
                }

                string  code         = GetCode(title);
                Decimal points       = 0;
                string  pointsString = string.Empty;

                HtmlNode nodeTr = node.ParentNode.ParentNode;
                if (nodeTr != null && nodeTr.ChildNodes.Count > 4)
                {
                    pointsString = nodeTr.ChildNodes[5].InnerText;
                }
                //HtmlNodeCollection nodePoints = nodeTr.SelectNodes(@"//td[@class='phr3']");

                //foreach (HtmlNode nodeTd in nodePoints)
                //{
                //    pointsString = nodeTd.InnerText;
                //}

                Decimal.TryParse(pointsString, out points);
                logger.Debug("Points in string=" + pointsString + ", and after convertion points=" + points);

                distroTrend.Model.Distro distro = listDistro.Where(x => x.Code.Trim() == code).FirstOrDefault();
                //distro.Id;

                if (distro != null)
                {
                    distroTrend.Model.Points objPoints = new distroTrend.Model.Points();
                    objPoints.distroId          = distro.Id;
                    objPoints.DistroWatchPoints = points;
                    objPoints.Date = DateTime.Now;

                    distroTrend.Model.Points pointE = listDistroPoints.FirstOrDefault(p => p.distroId == objPoints.distroId);
                    if (pointE != null)
                    {
                        pointE.DistroWatchPoints = points;
                        logger.Debug("Updated in list, Id=" + objPoints.distroId + ", Points=" + objPoints.DistroWatchPoints);
                    }
                    else
                    {
                        listDistroPoints.Add(objPoints);
                        logger.Debug("Added to list, Id=" + objPoints.distroId + ", Points=" + objPoints.DistroWatchPoints);
                    }
                }

                //if (listDistroPoints.Count > 30)
                //    break;
            }

            return(listDistroPoints);
        }