public static ScrappingExecutionResult SearchViaScrapping(ScrappingSearch scrappingSearch, int SearchTripProviderId) { ScrappingExecutionResult result = new ScrappingExecutionResult(); try { int nbMaxAttempts = 50; bool continueProcess = true; int attemtNumber = 0; while (continueProcess) { attemtNumber = attemtNumber + 1; Console.WriteLine(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + " ** START SearchViaScrapping ** : " + attemtNumber); result = Run(scrappingSearch, SearchTripProviderId); if (scrappingSearch.ProxiesList == null || scrappingSearch.ProxiesList.Count == 0 || (!result.Success && (attemtNumber == 10 || attemtNumber == 20 || attemtNumber == 30))) { scrappingSearch.ProxiesList = ProxyHelper.GetProxies(); } else { ProxyItem proxyItemToModify = scrappingSearch.ProxiesList.Find(p => p.Proxy == scrappingSearch.Proxy); if (proxyItemToModify != null) { scrappingSearch.ProxiesList.Find(p => p.Proxy == scrappingSearch.Proxy).UseNumber = proxyItemToModify.UseNumber + 1; if (!result.Success) { scrappingSearch.ProxiesList.Find(p => p.Proxy == scrappingSearch.Proxy).Failure = proxyItemToModify.Failure + 1; } } } if (!result.Success) { scrappingSearch.Proxy = ProxyHelper.GetBestProxy(scrappingSearch.ProxiesList); scrappingSearch.NewProxy = true; } continueProcess = !result.Success && attemtNumber < nbMaxAttempts; } result.LastProxy = scrappingSearch.Proxy; result.AttemptsNumber = attemtNumber; result.ProxiesList = scrappingSearch.ProxiesList; } catch (Exception e) { result.Success = false; result.Error = e.ToString(); FlightsEngine.Utils.Logger.GenerateError(e, System.Reflection.MethodBase.GetCurrentMethod().DeclaringType, "Provider = " + scrappingSearch.Provider + " and Proxy = " + scrappingSearch.Proxy + " and url = " + scrappingSearch.Url); } finally { Console.WriteLine(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + " ** END SearchViaScrapping **"); } return(result); }
public static ScrappingExecutionResult Run(ScrappingSearch scrappingSearch, int SearchTripProviderId) { Console.WriteLine(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + " *** START Scrapping *** : " + (scrappingSearch?.Provider ?? "") + " | " + (scrappingSearch?.Proxy ?? "")); ScrappingExecutionResult result = new ScrappingExecutionResult(); bool success = false; System.Diagnostics.Process cmd = new System.Diagnostics.Process(); try { #region preparation and purge //1) PREPARATION // model => D:\DEV\FlightEngine\Batch\WebScraperBash\PrepareScrapping.cmd "D:\DEV\FlightEngine\Batch\WebScraperBash" "126" "83.166.99.11" 54457 string args = "\"" + scrappingSearch.ScrappingFolder + "\" \"" + SearchTripProviderId + "\""; if (!scrappingSearch.NewProxy || String.IsNullOrWhiteSpace(scrappingSearch.Proxy)) { args = args + " \"\" -1"; } else { args = args + " \"" + scrappingSearch.Proxy.Split(':')[0] + "\" " + scrappingSearch.Proxy.Split(':')[1].Split(' ')[0]; } // string args = "\"" + scrappingSearch.ScrappingFolder + "\" \"" + scrappingSearch.SearchTripProviderId + "\" \"\" \"\" "; System.Diagnostics.ProcessStartInfo startInfoPreparation = new System.Diagnostics.ProcessStartInfo(); startInfoPreparation.WindowStyle = System.Diagnostics.ProcessWindowStyle.Hidden; startInfoPreparation.FileName = "cmd.exe"; startInfoPreparation.Arguments = string.Format("/C {0} {1}", scrappingSearch.ScrappingPreparationScript, args); startInfoPreparation.RedirectStandardInput = true; startInfoPreparation.RedirectStandardOutput = true; cmd.StartInfo.CreateNoWindow = false; startInfoPreparation.UseShellExecute = false;; cmd.StartInfo = startInfoPreparation; cmd.Start(); string strResult = ""; List <string> resultList = new List <string>(); while (!cmd.StandardOutput.EndOfStream) { strResult = cmd.StandardOutput.ReadLine(); resultList.Add(strResult); } if (!String.IsNullOrWhiteSpace(strResult)) { if (strResult.StartsWith("OK")) { success = true; result.Error = null; } } #endregion #region Scrapping // Check if preparation is OK if (success) { Console.WriteLine(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + " *** END Preparation / Call AutoHotKey ***"); // 2 SCRAPPING // "D:\DEV\FlightEngine\Batch\WebScraperBash\Scrapper.exe" "https://www.edreams.com/#results/type=R;dep=2018-10-22;from=YVR;to=LON;ret=2018-11-19;collectionmethod=false;airlinescodes=false;internalSearch=true" "126" "C:\Users\franc\AppData\Local\Mozilla Firefox\firefox.exe" "eDreams" // string url = "https://www.edreams.com/#results/type=R;dep=2018-10-22;from=YVR;to=LON;ret=2018-11-19;collectionmethod=false;airlinescodes=false;internalSearch=true"; args = "\"" + scrappingSearch.Url + "\" \"" + SearchTripProviderId + "\" \"" + scrappingSearch.FirefoxExeFolder + "\" \"" + scrappingSearch.Provider + "\""; // args= ""C:\DEV\FlightEngine\Batch\WebScraperBash\Scrapper.exe" "https://www.edreams.com/#results/type=R;dep=2018-10-22;from=YVR;to=LON;ret=2018-11-19;collectionmethod=false;airlinescodes=false;internalSearch=true" "16" "C:\Program Files\Mozilla Firefox\firefox.exe" "Edreams"" System.Diagnostics.ProcessStartInfo startInfoScrapping = new System.Diagnostics.ProcessStartInfo(); startInfoScrapping.WindowStyle = System.Diagnostics.ProcessWindowStyle.Hidden; startInfoScrapping.FileName = "cmd.exe"; startInfoScrapping.Arguments = string.Format("/C {0} {1}", scrappingSearch.ScrappingExeScript, args); startInfoScrapping.RedirectStandardInput = true; startInfoScrapping.RedirectStandardOutput = true; cmd.StartInfo.CreateNoWindow = false; startInfoScrapping.UseShellExecute = false;; cmd.StartInfo = startInfoScrapping; cmd.Start(); strResult = ""; resultList = new List <string>(); while (!cmd.StandardOutput.EndOfStream) { strResult = cmd.StandardOutput.ReadLine(); resultList.Add(strResult); } int i = 0; string HtmlFile = "D:\\Html\\search_" + SearchTripProviderId + ".html"; string HtmlErrorFile = "D:\\Html\\search_" + SearchTripProviderId + ".xht"; string StopSearchFile = "D:\\Html\\stopsearch_" + SearchTripProviderId + ".txt"; success = false; while (!File.Exists(HtmlFile) && !File.Exists(HtmlErrorFile) && !File.Exists(StopSearchFile) && i <= 20) //limit the time to whait to 30 sec { Thread.Sleep(500); i++; } if (File.Exists(HtmlFile)) { if (scrappingSearch.Provider == Providers.ToString(Providers.Kayak)) { success = IsKayakSuccessfullSearch(HtmlFile); } else { success = true; } if (!success) { Task.Factory.StartNew(() => { Thread.Sleep(500); File.Delete(HtmlFile); }); } } } #endregion } catch (Exception e) { success = false; result.Error = e.ToString(); FlightsEngine.Utils.Logger.GenerateError(e, System.Reflection.MethodBase.GetCurrentMethod().DeclaringType, "Provider = " + scrappingSearch.Provider + " and Proxy = " + scrappingSearch.Proxy + " and url = " + scrappingSearch.Url); } finally { cmd.StandardInput.WriteLine("exit"); cmd.WaitForExit(); cmd.Close(); result.Success = success; } if (success) { Console.WriteLine(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + " *** END Scrapping *** : SUCCESS "); } else { Console.WriteLine(DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss") + " *** END Scrapping *** : FAILURE => " + (result.Error ?? "")); } return(result); }