public HelperClass() { SQLClass = new SQLClass(); HTMLCriteriaClass = new HTMLCriteriaClass(); }
public IEnumerable<string> Scrapper() { var watch = System.Diagnostics.Stopwatch.StartNew(); SQLClass SQLClass = new SQLClass(); HelperClass HelperClass = new HelperClass(); HTMLCriteriaClass HTMLCriteriaClass = new HTMLCriteriaClass(); List<string> result = new List<string>(); string siteContent = string.Empty; result.Add("Step 1"); DataTable dtSearchMaster = SQLClass.GetDataTable("SELECT ID, ADVERTTYPEID FROM TABLE_SEARCH_MASTER (NOLOCK) WHERE ISACTIVE = 1", out string Error); result.Add("Step 2: " + Error); int searchMasterID; foreach (DataRow item in dtSearchMaster.Rows) { result.Add("Step 3: " + Error); searchMasterID = Convert.ToInt32(item["ID"]); DataTable dtAdvert = SQLClass.GetDataTable("SELECT AdvertID FROM TABLE_ADVERT (NOLOCK) WHERE SearchMasterID = " + searchMasterID, out Error); result.Add("Step 4: " + Error); List<int> advertDBList = HelperClass.DataTabletoIntList(dtAdvert); List<int> advertWebList = new List<int>(); int advertTypeID = Convert.ToInt32(item["ADVERTTYPEID"]); bool contiuneOnNextPage = true; int currentPage = 1; string siteAddress; while (contiuneOnNextPage) { List<int> advertWebList_ = new List<int>(); siteAddress = SQLClass.GetSingleCellDataComplex("SP_GETSEARCHURL " + searchMasterID.ToString() + ", " + currentPage.ToString()); result.Add(siteAddress); using (HttpClient client = new HttpClient()) { client.DefaultRequestHeaders.Add("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36"); using HttpResponseMessage response = client.GetAsync(siteAddress).Result; using HttpContent content = response.Content; siteContent = content.ReadAsStringAsync().Result; } result.Add(siteContent); if (siteContent.Contains("too-many-requests")) { result = new List<string>() { "We are banned :)" }; return result; } else if (siteContent.Contains("forceLoginPageMessage")) { //AutomatedUILogin selenium = new AutomatedUILogin(); //selenium.SahibindenLogin(); //selenium.Dispose(); } string trimmedSiteContent = HelperClass.TrimHelper(HTMLCriteriaClass.AdvertTrimCriteria, siteContent); result.Add(trimmedSiteContent); string cleanedSiteContent = WebUtility.HtmlDecode(HelperClass.ReplaceNonAnsiChars(HelperClass.CleanData(trimmedSiteContent))); result.Add(cleanedSiteContent); List<string> splittedInput = HelperClass.SplitDivisionHelper(HTMLCriteriaClass.AdvertSplitDivisionCriteria, cleanedSiteContent, false); List<ResultModel> ResultModelList = HelperClass.PopulateResultModel(splittedInput, advertTypeID, searchMasterID, advertDBList, out advertWebList_); using (DataTable dataTable = HelperClass.ConvertListToDataTable(ResultModelList)) SQLClass.BulkInsert(dataTable, "TABLE_ADVERT"); if (splittedInput.Count < 20) contiuneOnNextPage = false; currentPage++; advertWebList.AddRange(advertWebList_); } if (advertWebList.Count > 0) HelperClass.MarkAsDeleted(advertDBList, advertWebList); SendNotification(searchMasterID); } watch.Stop(); result.Add("Done in " + (watch.ElapsedMilliseconds / 1000).ToString() + " seconds."); return result; }