sim() public method

public sim ( String str1, String str2 ) : double
str1 String
str2 String
return double
コード例 #1
0
        public static void DuplicateResult(int SimiPecentage)
        {
            DateTime starttime = DateTime.Now;
            DataSet ds = GetRtDealsDataSet();
            Similarity s = new Similarity();
            int Total = 0;
            if (ds != null)
            {
                string Title = "";
                string SourceID = "";
                string DealsID = "";
                int Percentage = 0;

                foreach (DataRow dr in ds.Tables["rssdeals"].Rows)
                {
                    Title = dr["Title"].ToString();
                    SourceID = dr["SourceID"].ToString();
                    DealsID = dr["dealsID"].ToString();

                    foreach (DataRow drr in ds.Tables["rssdeals"].Rows)
                    {
                        string Title1 = "";
                        string SourceID1 = "";
                        string DealsID1 = "";
                        Title1 = drr["Title"].ToString();
                        SourceID1 = drr["SourceID"].ToString();
                        DealsID1 = drr["dealsID"].ToString();
                        if ((DealsID != DealsID1) && (Convert.ToInt32(DealsID) < Convert.ToInt32(DealsID1)))
                        {
                            Percentage = Convert.ToInt16(s.sim(Title, Title1) * 100);
                            if (Percentage > SimiPecentage)
                            {
                                Total++;
                                Console.WriteLine(string.Format("{0}-{1} found duplicate at {2}-{3} Similarity:{4}%", SourceID, DealsID, SourceID1, DealsID1, Percentage));
                            }
                        }

                    }
                }
                Console.WriteLine("Total Duplicate Found: " + Total.ToString());
            }
        }
コード例 #2
0
ファイル: ScanDeals.cs プロジェクト: jediskywalker/RTDeals
        public static void GetRSSDeals(int PercentageLimitation)
        {
            DateTime starttime = DateTime.Now;
                DataSet ds = GetRtDealsDataSet();
                Similarity s=new Similarity();
                string strHtml="";
                int TotalInsert = 0;
                int TotalFailed = 0;
                int TotalDuplicate = 0;
                int TotalSimilar = 0;
                int TotalExsitDuplicate = 0;
                int TotalExsitSameSource = 0;
                int k = 0;  //For SubCount
                string[] SubSourceName=new string[ds.Tables["sourcerssseed"].Rows.Count];
                int[] SubInsert = new int[ds.Tables["sourcerssseed"].Rows.Count];
                int[] SubDuplicate = new int[ds.Tables["sourcerssseed"].Rows.Count];
                int[] SubFailed = new int[ds.Tables["sourcerssseed"].Rows.Count];
                string SimilarDuplicateResult="";
                string SourceFailedResult = "";
                if (ds != null)
                {
                     foreach (DataRow dr in ds.Tables["sourcerssseed"].Rows)
                     {

                             int id = Convert.ToInt16(dr["SourceID"]);

                             string Additional = dr["Additional"].ToString();
                             string[] AddiURL = Additional.Split(',');
                             SubSourceName[k]=id.ToString();

                             for (int i = 0; i < AddiURL.Length; i++)
                             {
                                 try
                                 {
                                     if (AddiURL[i] == "")
                                         continue;

                                     string[] temp = AddiURL[i].Split('*');
                                     string RssType = temp[0];
                                     string RssUrl = temp[1];

                                     List<RssDealsModel> lrd = new List<RssDealsModel>();
                                     RSS.Feed feed = new RSS.Feed(RssUrl, DateTime.Parse(System.DateTime.Now.AddDays(-3).ToShortDateString()));
                                     feed.Read();
                                     for (int j = 0; j < feed.Channel.Items.Count; j++)
                                     {

                                         strHtml += "  <a href=" + feed.Channel.Items[j].link + " target=_blank><B>" + feed.Channel.Items[j].title + "</B></a><br>";
                                         strHtml += "  <font color=red>" + feed.Channel.Items[j].pubDate + "</font><br>";
                                         strHtml += "  " + feed.Channel.Items[j].description + "<br>";
                                         // if (strHtml.Length >= 8000)
                                         // strHtml = strHtml.Substring(0, 7999);
                                         strHtml = "";

                                         int isAppliances=0;
                                         int isFinance = 0;
                                         int isTravel = 0;
                                         int isDrug = 0;
                                         int isElectronic = 0;
                                         int isBeauty = 0;
                                         int isOfficeSupplies = 0;
                                         int isRestaurant = 0;
                                         int isJewelry = 0;
                                         int isOthers = 0;
                                         int isAppeal = 0;
                                         int isHot = 0;

                                         if (RssType.ToLower().Contains("hot"))
                                             isHot = 1;
                                         //else if (RssType.ToLower().Contains("finance"))
                                         //    isFinance = 1;
                                         //else if (RssType.ToLower().Contains("travel"))
                                         //    isTravel = 1;
                                         //else if (RssType.ToLower().Contains("drug"))
                                         //    isDrug = 1;
                                         //else if (RssType.ToLower().Contains("electronic"))
                                         //    isElectronic = 1;

                                         switch (GetDealsCatetogry(feed.Channel.Items[j].title,ds))
                                         {
                                             case "Appeal":
                                                 isAppeal = 1;
                                                 break;
                                             case "Appliances":
                                                 isAppliances = 1;
                                                 break;
                                             case "Beauty":
                                                 isBeauty = 1;
                                                 break;
                                             case "Drug":
                                                 isBeauty = 1;
                                                 break;
                                             case "Electronics":
                                                 isElectronic = 1;
                                                 break;
                                             case "Finance":
                                                 isFinance = 1;
                                                 break;
                                             case "Jewelry & Watches":
                                                 isJewelry = 1;
                                                 break;
                                             case "Office Supplies":
                                                 isOfficeSupplies = 1;
                                                 break;
                                             case "Restaurant & Food":
                                                 isRestaurant = 1;
                                                 break;
                                             case "Travels":
                                                 isTravel = 1;
                                                 break;

                                             default:
                                                 isOthers = 1;
                                                 break;

                                         }

                                         string OriginalLink = feed.Channel.Items[j].link; // Keep as Unique Parameter

                                         if (ds.Tables["rssdeals"].Rows.Count == 0)
                                         {
                                             string ss = InsertRssDeals(id,feed.Channel.Items[j].title, feed.Channel.Items[j].link, 0, OriginalLink, strHtml, feed.Channel.Items[j].pubDate,isAppliances,isFinance,isTravel,isDrug,isElectronic,isBeauty,isOfficeSupplies,isRestaurant,isJewelry,isOthers,isAppeal,isHot);

                                         }

                                         int count = 0;
                                         bool isSimilar = false;
                                         bool isExist = false;
                                         bool isDuplicateExsit = false;
                                         bool isExsitSameSource = false;
                                         string Title = "";
                                         string SourceID="";
                                         string DealsID="";
                                         string tempDuplicate = "";
                                         //string UniqueParameter="";
                                         int Percentage = 0;

                                        // if (feed.Channel.Items[j].title.Contains("Direct"))
                                           //  Percentage = 0;
                                          isExsitSameSource = isRssTitleExsit(feed.Channel.Items[j].title);
                                          if (!isExsitSameSource)
                                              isExist = isRssDealExist(feed.Channel.Items[j].link);
                                         if(!isExist)
                                             isDuplicateExsit = isRssDealDuplicateExist(feed.Channel.Items[j].link); ;

                                          if (!isExist && !isDuplicateExsit && !isExsitSameSource) //Skip Compare if already exsit
                                          {
                                              ds = GetRtDealsDataSet();
                                              foreach (DataRow drr in ds.Tables["rssdeals"].Rows)
                                              {

                                                  Title = drr["Title"].ToString();
                                                  SourceID = drr["SourceID"].ToString();
                                                  DealsID = drr["dealsID"].ToString();
                                                  count++;
                                                  if (SubSourceName[k] == SourceID)
                                                      continue; //Skipped Compare with its' Source

                                                  Percentage = Convert.ToInt16(s.sim(feed.Channel.Items[j].title, Title) * 100);    //get_semblance_By_2words(feed.Channel.Items[j].title, Title);
                                                  if (Percentage >= PercentageLimitation)
                                                  {
                                                      isSimilar = true;
                                                      break;  //Stop Comparation
                                                  }
                                                  else
                                                  {
                                                      isSimilar = false;
                                                  }
                                                  //  }
                                              }
                                          }

                                          if (isSimilar || isExist || isDuplicateExsit || isExsitSameSource)
                                         {
                                             if (isExist)
                                             {
                                                 Console.ForegroundColor = ConsoleColor.DarkCyan;
                                                 SimilarDuplicateResult = string.Format(feed.Channel.Items[j].title + " from {1} Found Duplicate Record At {0}", DateTime.Now.ToString(),SubSourceName[k]);
                                             }
                                             else if(isSimilar)
                                             {
                                                 TotalSimilar++;
                                                 Console.ForegroundColor = ConsoleColor.Cyan;
                                                 SimilarDuplicateResult = string.Format(feed.Channel.Items[j].title + " from {2} Found Duplicate Record {3}% At {0}-{1}: {4}", SourceID, DealsID, SubSourceName[k], Percentage, DateTime.Now.ToString());
                                                 tempDuplicate = InsertRssDealsDuplicate(id,feed.Channel.Items[j].title,feed.Channel.Items[j].link,0,OriginalLink, strHtml, feed.Channel.Items[j].pubDate,isAppliances,isFinance,isTravel,isDrug,isElectronic,isBeauty,isOfficeSupplies,isRestaurant,isJewelry,isOthers,isAppeal,isHot);
                                             }
                                             else if (isDuplicateExsit)
                                             {
                                                 TotalExsitDuplicate++;
                                                 Console.ForegroundColor = ConsoleColor.Magenta;
                                                 SimilarDuplicateResult = string.Format(feed.Channel.Items[j].title + " from {1} Found Duplicate Record  At {0}", DateTime.Now.ToString(),SubSourceName[k]);

                                             }
                                             else if (isExsitSameSource)
                                             {
                                                 TotalExsitSameSource++;
                                                 Console.ForegroundColor = ConsoleColor.Yellow;
                                                 SimilarDuplicateResult = string.Format(feed.Channel.Items[j].title + " From {0} found Duplicate Record at {1}", DateTime.Now.ToString(), SubSourceName[k]);

                                             }
                                             TotalDuplicate++;
                                             SubDuplicate[k]++;

                                             Console.WriteLine(SimilarDuplicateResult);
                                             continue;  //Fetch next Node

                                         }
                                         // if (id == 1)
                                           //   id = 1;
                                          string tempURLReasult = AlterDealURL(feed.Channel.Items[j].link, id, ds); // Check the Alter URL
                                          if (tempURLReasult != "")
                                              feed.Channel.Items[j].link = tempURLReasult + feed.Channel.Items[j].link;

                                          if (feed.Channel.Items[j].title.ToLower().Contains("’s"))
                                              feed.Channel.Items[j].title = feed.Channel.Items[j].title.Replace("’s", "");

                                          string result = InsertRssDeals(id,feed.Channel.Items[j].title,feed.Channel.Items[j].link,0,OriginalLink,strHtml,feed.Channel.Items[j].pubDate,isAppliances,isFinance,isTravel,isDrug,isElectronic,isBeauty,isOfficeSupplies,isRestaurant,isJewelry,isOthers,isAppeal,isHot);
                                          string resultHistory = InsertRssDealsHistory(id,feed.Channel.Items[j].title,feed.Channel.Items[j].link,0,OriginalLink, strHtml,feed.Channel.Items[j].pubDate,isAppliances,isFinance,isTravel,isDrug,isElectronic,isBeauty,isOfficeSupplies,isRestaurant,isJewelry,isOthers,isAppeal,isHot);

                                                 if (result == "")
                                                 {
                                                     TotalInsert++;
                                                     SubInsert[k]++;
                                                     Console.ForegroundColor = ConsoleColor.White;
                                                     Console.WriteLine(feed.Channel.Items[j].title + " Inserted Successfully! from "+ SubSourceName[k] +" At " + DateTime.Now.ToShortTimeString());
                                                 }
                                                 else
                                                 {
                                                     TotalFailed++;
                                                     SubFailed[k]++;
                                                     SendEmail.SendDealsEmail("*****@*****.**", "*****@*****.**", "Scaner Error @ " + DateTime.Now.ToString(), result);
                                                     Console.ForegroundColor = ConsoleColor.Red;
                                                     Console.WriteLine(result + " At " + DateTime.Now.ToShortTimeString());

                                                 }

                                                 //Thread.Sleep(100);

                                     }
                                 }
                                 catch (Exception ex)
                                 {

                                     string ss = ex.Message;
                                     SourceFailedResult += string.Format("{0} {1} At {2}*", SubSourceName[k], ss, DateTime.Now.ToString());
                                     SendEmail.SendDealsEmail("*****@*****.**", "*****@*****.**", "Scaner Error @ " + DateTime.Now.ToString(), ex.Message);
                                     //continue;
                                 }

                             }

                             k++;
                     }

                }
                Console.ForegroundColor = ConsoleColor.White;
                Console.WriteLine(string.Format("Total Inserted: {0}, Total Duplicate: {1}, Total Failed: {2}", TotalInsert, TotalDuplicate, TotalFailed));
                for (int m = 0; m < ds.Tables["sourcerssseed"].Rows.Count; m++)
                {
                    Console.WriteLine(string.Format("{0} Inserted: {1}, Duplicate: {2}, Failed: {3}", SubSourceName[m], SubInsert[m], SubDuplicate[m], SubFailed[m]));
                }
               // string[] SourceErrors=Regex.Split(SourceFailedResult,"***");
             //   for (int p = 0; p < SourceErrors.Length; p++)
              //  {
              //      Console.ForegroundColor = ConsoleColor.White;
              //      Console.WriteLine(SourceErrors[p]);
             //   }
            Console.WriteLine("Total similar deals found : " + TotalSimilar.ToString());
            Console.WriteLine("Total Exsit Duplicate deals found : " + TotalExsitDuplicate.ToString());
            Console.WriteLine("Total Exsit Duplicate in same source found : " + TotalExsitSameSource.ToString());
            Console.WriteLine("Scan Time is : " + (DateTime.Now - starttime).Minutes + " Minutes " + (DateTime.Now - starttime).Seconds + " Seconds ");
        }