static void ParseDataSub(string data, string name, distroTrend.Model.Distro distro) { string description = string.Empty; string imageSrc = string.Empty; var doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(data); var nodesMain = doc.DocumentNode.SelectNodes(@"//td[@class='TablesTitle']/a"); foreach (HtmlNode node in nodesMain) { if (node.ParentNode.ChildNodes.Count > 10) { description = node.ParentNode.ChildNodes[10].InnerText; } var nodes = doc.DocumentNode.SelectNodes(@"//img"); foreach (HtmlNode nodeImg in nodes) { string search = "title=" + "\"" + name + "\""; if (nodeImg.OuterHtml.Contains(search)) { HtmlAttribute img = nodeImg.Attributes.Where(i => i.Name == "src").FirstOrDefault(); distro.ImageURL = img.Value; } } } distro.Description = description; }
static void UpdateDB(List <distroTrend.Model.Distro> listDistro) { BLL.Distro distroBL = new BLL.Distro(); string sqlConn = System.Configuration.ConfigurationManager.AppSettings["dbConnection"]; string message = string.Empty; foreach (distroTrend.Model.Distro distro in listDistro) { message = string.Empty; distroTrend.Model.Distro distroDb = distroBL.GetDistro(distro.Code, sqlConn); if (distroDb == null) { message = distro.Name + " is not found in DB. Inserting..."; } else { if (distroDb.Description != distro.Description || distroDb.ImageURL != distro.ImageURL) { message = distro.Name + " found in DB but details are outdated. Updating..."; distroBL.Update(sqlConn, distroDb.Id, distro); } } if (!string.IsNullOrEmpty(message)) { logger.Info(message); Console.WriteLine(message); } } }
static void SetSubData(string dwCode, string name, distroTrend.Model.Distro distro) { string url = "https://distrowatch.com/table.php?distribution=" + dwCode; DateTime dt = DateTime.Now; string data = GetWebSiteData(url); ParseDataSub(data, name, distro); }
static List <distroTrend.Model.Distro> CrawlData() { string url = "https://distrowatch.com/dwres.php?resource=popularity"; DateTime dt = DateTime.Now; string data = GetWebSiteData(url); Console.WriteLine("Parsing data started."); //logger.Debug(data); List <distroTrend.Model.Distro> listDistro = ParseData(data); Console.WriteLine("Parsing data completed."); string fileName = @"C:\temp\distro.csv"; List <distroTrend.Model.Distro> listDistroCSV = new List <distroTrend.Model.Distro>(); //Transform list for CSV. foreach (distroTrend.Model.Distro distro in listDistro) { distroTrend.Model.Distro distroCSV = new distroTrend.Model.Distro(); distroCSV.Id = distro.Id; distroCSV.Code = distro.Code; distroCSV.Name = distro.Name; distroCSV.Description = "\"" + distro.Description.Replace("\"", "\"\"") + "\""; distroCSV.HomePage = distro.HomePage; distroCSV.ImageURL = distro.ImageURL; //Download Image. if (!string.IsNullOrEmpty(distro.ImageURL)) { string urlImage = "https://distrowatch.com/" + distro.ImageURL; string directoryName = Path.GetDirectoryName(distro.ImageURL); if (!System.IO.Directory.Exists(directoryName)) { System.IO.Directory.CreateDirectory(directoryName); } using (WebClient client = new WebClient()) { client.DownloadFile(new Uri(urlImage), distro.ImageURL); } } listDistroCSV.Add(distroCSV); } Helper.Utility.WriteCSV(listDistroCSV, fileName); Console.WriteLine("Date Exported to " + fileName); string message = "Data was extracted in " + (DateTime.Now - dt).Seconds + " secs."; logger.Info(message); Console.WriteLine(message); return(listDistro); }
static List <distroTrend.Model.Distro> ParseData(string data) { List <distroTrend.Model.Distro> listDistro = new List <distroTrend.Model.Distro>(); var doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(data); var nodes = doc.DocumentNode.SelectNodes(@"//td[@class='phr2']/a"); int counter = 0; foreach (HtmlNode node in nodes) { counter++; string title = node.InnerText; distroTrend.Model.Distro distro = new distroTrend.Model.Distro(); string[] token = node.OuterHtml.Split(new char[] { '\"' }, StringSplitOptions.RemoveEmptyEntries); string dwCode = string.Empty; if (token.Length > 1) { dwCode = token[1]; } distro.Code = GetCode(title); distro.Name = title; SetSubData(dwCode, name: title, distro); logger.Debug("Code=" + distro.Code + ", Name=" + distro.Name + ", Desc=" + distro.Description); if (!listDistro.Exists(x => x.Name == title)) { listDistro.Add(distro); } string message = counter + ". data Parsed for " + distro.Name; Console.WriteLine(message); //TODO: Temporary Break. if (listDistro.Count > 30) { break; } } return(listDistro); }
public int Update(string connString, int id, distroTrend.Model.Distro distro) { DBConn conn = new DBConn(); String query = "UPDATE [dbo].[tbl_Distro] SET [Description] = @Description, ImageURL = @ImageURL WHERE Id = @Id"; List <SqlParameter> sp = new List <SqlParameter>() { new SqlParameter() { ParameterName = "@Id", SqlDbType = SqlDbType.Int, Value = id }, new SqlParameter() { ParameterName = "@Description", SqlDbType = SqlDbType.NVarChar, Value = distro.Description }, new SqlParameter() { ParameterName = "@ImageURL", SqlDbType = SqlDbType.NVarChar, Value = distro.ImageURL } }; return(conn.UpdateData(connString, query, sp)); }
public int Update(string connString, int id, distroTrend.Model.Distro distro) { DAL.Distro objDistro = new DAL.Distro(); return(objDistro.Update(connString, id, distro)); }
static List <distroTrend.Model.Points> ParseDataDWPoints(string data, string connString) { BLL.Distro distroBL = new BLL.Distro(); List <distroTrend.Model.Distro> listDistro = distroBL.GetDistro(connString); List <distroTrend.Model.Points> listDistroPoints = new List <distroTrend.Model.Points>(); var doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(data); var nodes = doc.DocumentNode.SelectNodes(@"//td[@class='phr2']/a"); int counter = 0; foreach (HtmlNode node in nodes) { counter++; string title = node.InnerText; string[] token = node.OuterHtml.Split(new char[] { '\"' }, StringSplitOptions.RemoveEmptyEntries); string dwCode = string.Empty; if (token.Length > 1) { dwCode = token[1]; } string code = GetCode(title); Decimal points = 0; string pointsString = string.Empty; HtmlNode nodeTr = node.ParentNode.ParentNode; if (nodeTr != null && nodeTr.ChildNodes.Count > 4) { pointsString = nodeTr.ChildNodes[5].InnerText; } //HtmlNodeCollection nodePoints = nodeTr.SelectNodes(@"//td[@class='phr3']"); //foreach (HtmlNode nodeTd in nodePoints) //{ // pointsString = nodeTd.InnerText; //} Decimal.TryParse(pointsString, out points); logger.Debug("Points in string=" + pointsString + ", and after convertion points=" + points); distroTrend.Model.Distro distro = listDistro.Where(x => x.Code.Trim() == code).FirstOrDefault(); //distro.Id; if (distro != null) { distroTrend.Model.Points objPoints = new distroTrend.Model.Points(); objPoints.distroId = distro.Id; objPoints.DistroWatchPoints = points; objPoints.Date = DateTime.Now; distroTrend.Model.Points pointE = listDistroPoints.FirstOrDefault(p => p.distroId == objPoints.distroId); if (pointE != null) { pointE.DistroWatchPoints = points; logger.Debug("Updated in list, Id=" + objPoints.distroId + ", Points=" + objPoints.DistroWatchPoints); } else { listDistroPoints.Add(objPoints); logger.Debug("Added to list, Id=" + objPoints.distroId + ", Points=" + objPoints.DistroWatchPoints); } } //if (listDistroPoints.Count > 30) // break; } return(listDistroPoints); }