public static void DuplicateResult(int SimiPecentage) { DateTime starttime = DateTime.Now; DataSet ds = GetRtDealsDataSet(); Similarity s = new Similarity(); int Total = 0; if (ds != null) { string Title = ""; string SourceID = ""; string DealsID = ""; int Percentage = 0; foreach (DataRow dr in ds.Tables["rssdeals"].Rows) { Title = dr["Title"].ToString(); SourceID = dr["SourceID"].ToString(); DealsID = dr["dealsID"].ToString(); foreach (DataRow drr in ds.Tables["rssdeals"].Rows) { string Title1 = ""; string SourceID1 = ""; string DealsID1 = ""; Title1 = drr["Title"].ToString(); SourceID1 = drr["SourceID"].ToString(); DealsID1 = drr["dealsID"].ToString(); if ((DealsID != DealsID1) && (Convert.ToInt32(DealsID) < Convert.ToInt32(DealsID1))) { Percentage = Convert.ToInt16(s.sim(Title, Title1) * 100); if (Percentage > SimiPecentage) { Total++; Console.WriteLine(string.Format("{0}-{1} found duplicate at {2}-{3} Similarity:{4}%", SourceID, DealsID, SourceID1, DealsID1, Percentage)); } } } } Console.WriteLine("Total Duplicate Found: " + Total.ToString()); } }
public static void GetRSSDeals(int PercentageLimitation) { DateTime starttime = DateTime.Now; DataSet ds = GetRtDealsDataSet(); Similarity s=new Similarity(); string strHtml=""; int TotalInsert = 0; int TotalFailed = 0; int TotalDuplicate = 0; int TotalSimilar = 0; int TotalExsitDuplicate = 0; int TotalExsitSameSource = 0; int k = 0; //For SubCount string[] SubSourceName=new string[ds.Tables["sourcerssseed"].Rows.Count]; int[] SubInsert = new int[ds.Tables["sourcerssseed"].Rows.Count]; int[] SubDuplicate = new int[ds.Tables["sourcerssseed"].Rows.Count]; int[] SubFailed = new int[ds.Tables["sourcerssseed"].Rows.Count]; string SimilarDuplicateResult=""; string SourceFailedResult = ""; if (ds != null) { foreach (DataRow dr in ds.Tables["sourcerssseed"].Rows) { int id = Convert.ToInt16(dr["SourceID"]); string Additional = dr["Additional"].ToString(); string[] AddiURL = Additional.Split(','); SubSourceName[k]=id.ToString(); for (int i = 0; i < AddiURL.Length; i++) { try { if (AddiURL[i] == "") continue; string[] temp = AddiURL[i].Split('*'); string RssType = temp[0]; string RssUrl = temp[1]; List<RssDealsModel> lrd = new List<RssDealsModel>(); RSS.Feed feed = new RSS.Feed(RssUrl, DateTime.Parse(System.DateTime.Now.AddDays(-3).ToShortDateString())); feed.Read(); for (int j = 0; j < feed.Channel.Items.Count; j++) { strHtml += " <a href=" + feed.Channel.Items[j].link + " target=_blank><B>" + feed.Channel.Items[j].title + "</B></a><br>"; strHtml += " <font color=red>" + feed.Channel.Items[j].pubDate + "</font><br>"; strHtml += " " + feed.Channel.Items[j].description + "<br>"; // if (strHtml.Length >= 8000) // strHtml = strHtml.Substring(0, 7999); strHtml = ""; int isAppliances=0; int isFinance = 0; int isTravel = 0; int isDrug = 0; int isElectronic = 0; int isBeauty = 0; int isOfficeSupplies = 0; int isRestaurant = 0; int isJewelry = 0; int isOthers = 0; int isAppeal = 0; int isHot = 0; if (RssType.ToLower().Contains("hot")) isHot = 1; //else if (RssType.ToLower().Contains("finance")) // isFinance = 1; //else if (RssType.ToLower().Contains("travel")) // isTravel = 1; //else if (RssType.ToLower().Contains("drug")) // isDrug = 1; //else if (RssType.ToLower().Contains("electronic")) // isElectronic = 1; switch (GetDealsCatetogry(feed.Channel.Items[j].title,ds)) { case "Appeal": isAppeal = 1; break; case "Appliances": isAppliances = 1; break; case "Beauty": isBeauty = 1; break; case "Drug": isBeauty = 1; break; case "Electronics": isElectronic = 1; break; case "Finance": isFinance = 1; break; case "Jewelry & Watches": isJewelry = 1; break; case "Office Supplies": isOfficeSupplies = 1; break; case "Restaurant & Food": isRestaurant = 1; break; case "Travels": isTravel = 1; break; default: isOthers = 1; break; } string OriginalLink = feed.Channel.Items[j].link; // Keep as Unique Parameter if (ds.Tables["rssdeals"].Rows.Count == 0) { string ss = InsertRssDeals(id,feed.Channel.Items[j].title, feed.Channel.Items[j].link, 0, OriginalLink, strHtml, feed.Channel.Items[j].pubDate,isAppliances,isFinance,isTravel,isDrug,isElectronic,isBeauty,isOfficeSupplies,isRestaurant,isJewelry,isOthers,isAppeal,isHot); } int count = 0; bool isSimilar = false; bool isExist = false; bool isDuplicateExsit = false; bool isExsitSameSource = false; string Title = ""; string SourceID=""; string DealsID=""; string tempDuplicate = ""; //string UniqueParameter=""; int Percentage = 0; // if (feed.Channel.Items[j].title.Contains("Direct")) // Percentage = 0; isExsitSameSource = isRssTitleExsit(feed.Channel.Items[j].title); if (!isExsitSameSource) isExist = isRssDealExist(feed.Channel.Items[j].link); if(!isExist) isDuplicateExsit = isRssDealDuplicateExist(feed.Channel.Items[j].link); ; if (!isExist && !isDuplicateExsit && !isExsitSameSource) //Skip Compare if already exsit { ds = GetRtDealsDataSet(); foreach (DataRow drr in ds.Tables["rssdeals"].Rows) { Title = drr["Title"].ToString(); SourceID = drr["SourceID"].ToString(); DealsID = drr["dealsID"].ToString(); count++; if (SubSourceName[k] == SourceID) continue; //Skipped Compare with its' Source Percentage = Convert.ToInt16(s.sim(feed.Channel.Items[j].title, Title) * 100); //get_semblance_By_2words(feed.Channel.Items[j].title, Title); if (Percentage >= PercentageLimitation) { isSimilar = true; break; //Stop Comparation } else { isSimilar = false; } // } } } if (isSimilar || isExist || isDuplicateExsit || isExsitSameSource) { if (isExist) { Console.ForegroundColor = ConsoleColor.DarkCyan; SimilarDuplicateResult = string.Format(feed.Channel.Items[j].title + " from {1} Found Duplicate Record At {0}", DateTime.Now.ToString(),SubSourceName[k]); } else if(isSimilar) { TotalSimilar++; Console.ForegroundColor = ConsoleColor.Cyan; SimilarDuplicateResult = string.Format(feed.Channel.Items[j].title + " from {2} Found Duplicate Record {3}% At {0}-{1}: {4}", SourceID, DealsID, SubSourceName[k], Percentage, DateTime.Now.ToString()); tempDuplicate = InsertRssDealsDuplicate(id,feed.Channel.Items[j].title,feed.Channel.Items[j].link,0,OriginalLink, strHtml, feed.Channel.Items[j].pubDate,isAppliances,isFinance,isTravel,isDrug,isElectronic,isBeauty,isOfficeSupplies,isRestaurant,isJewelry,isOthers,isAppeal,isHot); } else if (isDuplicateExsit) { TotalExsitDuplicate++; Console.ForegroundColor = ConsoleColor.Magenta; SimilarDuplicateResult = string.Format(feed.Channel.Items[j].title + " from {1} Found Duplicate Record At {0}", DateTime.Now.ToString(),SubSourceName[k]); } else if (isExsitSameSource) { TotalExsitSameSource++; Console.ForegroundColor = ConsoleColor.Yellow; SimilarDuplicateResult = string.Format(feed.Channel.Items[j].title + " From {0} found Duplicate Record at {1}", DateTime.Now.ToString(), SubSourceName[k]); } TotalDuplicate++; SubDuplicate[k]++; Console.WriteLine(SimilarDuplicateResult); continue; //Fetch next Node } // if (id == 1) // id = 1; string tempURLReasult = AlterDealURL(feed.Channel.Items[j].link, id, ds); // Check the Alter URL if (tempURLReasult != "") feed.Channel.Items[j].link = tempURLReasult + feed.Channel.Items[j].link; if (feed.Channel.Items[j].title.ToLower().Contains("’s")) feed.Channel.Items[j].title = feed.Channel.Items[j].title.Replace("’s", ""); string result = InsertRssDeals(id,feed.Channel.Items[j].title,feed.Channel.Items[j].link,0,OriginalLink,strHtml,feed.Channel.Items[j].pubDate,isAppliances,isFinance,isTravel,isDrug,isElectronic,isBeauty,isOfficeSupplies,isRestaurant,isJewelry,isOthers,isAppeal,isHot); string resultHistory = InsertRssDealsHistory(id,feed.Channel.Items[j].title,feed.Channel.Items[j].link,0,OriginalLink, strHtml,feed.Channel.Items[j].pubDate,isAppliances,isFinance,isTravel,isDrug,isElectronic,isBeauty,isOfficeSupplies,isRestaurant,isJewelry,isOthers,isAppeal,isHot); if (result == "") { TotalInsert++; SubInsert[k]++; Console.ForegroundColor = ConsoleColor.White; Console.WriteLine(feed.Channel.Items[j].title + " Inserted Successfully! from "+ SubSourceName[k] +" At " + DateTime.Now.ToShortTimeString()); } else { TotalFailed++; SubFailed[k]++; SendEmail.SendDealsEmail("*****@*****.**", "*****@*****.**", "Scaner Error @ " + DateTime.Now.ToString(), result); Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine(result + " At " + DateTime.Now.ToShortTimeString()); } //Thread.Sleep(100); } } catch (Exception ex) { string ss = ex.Message; SourceFailedResult += string.Format("{0} {1} At {2}*", SubSourceName[k], ss, DateTime.Now.ToString()); SendEmail.SendDealsEmail("*****@*****.**", "*****@*****.**", "Scaner Error @ " + DateTime.Now.ToString(), ex.Message); //continue; } } k++; } } Console.ForegroundColor = ConsoleColor.White; Console.WriteLine(string.Format("Total Inserted: {0}, Total Duplicate: {1}, Total Failed: {2}", TotalInsert, TotalDuplicate, TotalFailed)); for (int m = 0; m < ds.Tables["sourcerssseed"].Rows.Count; m++) { Console.WriteLine(string.Format("{0} Inserted: {1}, Duplicate: {2}, Failed: {3}", SubSourceName[m], SubInsert[m], SubDuplicate[m], SubFailed[m])); } // string[] SourceErrors=Regex.Split(SourceFailedResult,"***"); // for (int p = 0; p < SourceErrors.Length; p++) // { // Console.ForegroundColor = ConsoleColor.White; // Console.WriteLine(SourceErrors[p]); // } Console.WriteLine("Total similar deals found : " + TotalSimilar.ToString()); Console.WriteLine("Total Exsit Duplicate deals found : " + TotalExsitDuplicate.ToString()); Console.WriteLine("Total Exsit Duplicate in same source found : " + TotalExsitSameSource.ToString()); Console.WriteLine("Scan Time is : " + (DateTime.Now - starttime).Minutes + " Minutes " + (DateTime.Now - starttime).Seconds + " Seconds "); }