public HelperClass()
 {
     SQLClass          = new SQLClass();
     HTMLCriteriaClass = new HTMLCriteriaClass();
 }
Beispiel #2
0
 public IEnumerable<string> Scrapper()
 {
     var watch = System.Diagnostics.Stopwatch.StartNew();
     SQLClass SQLClass = new SQLClass();
     HelperClass HelperClass = new HelperClass();
     HTMLCriteriaClass HTMLCriteriaClass = new HTMLCriteriaClass();
     List<string> result = new List<string>();
     string siteContent = string.Empty;
     result.Add("Step 1");
     DataTable dtSearchMaster = SQLClass.GetDataTable("SELECT ID, ADVERTTYPEID FROM TABLE_SEARCH_MASTER (NOLOCK) WHERE ISACTIVE = 1", out string Error);
     result.Add("Step 2: " + Error);
     int searchMasterID;
     foreach (DataRow item in dtSearchMaster.Rows)
     {
         result.Add("Step 3: " + Error);
         searchMasterID = Convert.ToInt32(item["ID"]);
         DataTable dtAdvert = SQLClass.GetDataTable("SELECT AdvertID FROM TABLE_ADVERT (NOLOCK) WHERE SearchMasterID = " + searchMasterID, out Error);
         result.Add("Step 4: " + Error);
         List<int> advertDBList = HelperClass.DataTabletoIntList(dtAdvert);
         List<int> advertWebList = new List<int>();
         int advertTypeID = Convert.ToInt32(item["ADVERTTYPEID"]);
         bool contiuneOnNextPage = true;
         int currentPage = 1;
         string siteAddress;
         while (contiuneOnNextPage)
         {
             List<int> advertWebList_ = new List<int>();
             siteAddress = SQLClass.GetSingleCellDataComplex("SP_GETSEARCHURL " + searchMasterID.ToString() + ", " + currentPage.ToString());
             result.Add(siteAddress);
             using (HttpClient client = new HttpClient())
             {
                 client.DefaultRequestHeaders.Add("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36");
                 using HttpResponseMessage response = client.GetAsync(siteAddress).Result;
                 using HttpContent content = response.Content;
                 siteContent = content.ReadAsStringAsync().Result;
             }
             result.Add(siteContent);
             if (siteContent.Contains("too-many-requests"))
             {
                 result = new List<string>() { "We are banned :)" };
                 return result;
             }
             else if (siteContent.Contains("forceLoginPageMessage"))
             {
                 //AutomatedUILogin selenium = new AutomatedUILogin();
                 //selenium.SahibindenLogin();
                 //selenium.Dispose();
             }
             string trimmedSiteContent = HelperClass.TrimHelper(HTMLCriteriaClass.AdvertTrimCriteria, siteContent);
             result.Add(trimmedSiteContent);
             string cleanedSiteContent = WebUtility.HtmlDecode(HelperClass.ReplaceNonAnsiChars(HelperClass.CleanData(trimmedSiteContent)));
             result.Add(cleanedSiteContent);
             List<string> splittedInput = HelperClass.SplitDivisionHelper(HTMLCriteriaClass.AdvertSplitDivisionCriteria, cleanedSiteContent, false);
             List<ResultModel> ResultModelList = HelperClass.PopulateResultModel(splittedInput, advertTypeID, searchMasterID, advertDBList, out advertWebList_);
             using (DataTable dataTable = HelperClass.ConvertListToDataTable(ResultModelList))
                 SQLClass.BulkInsert(dataTable, "TABLE_ADVERT");
             if (splittedInput.Count < 20)
                 contiuneOnNextPage = false;
             currentPage++;
             advertWebList.AddRange(advertWebList_);
         }
         if (advertWebList.Count > 0)
             HelperClass.MarkAsDeleted(advertDBList, advertWebList);
         SendNotification(searchMasterID);
     }
     watch.Stop();
     result.Add("Done in " + (watch.ElapsedMilliseconds / 1000).ToString() + " seconds.");
     return result;
 }