private async Task <Tuple <List <string>, List <DomainInfo> > > GetCustomerShoppingPointsAsync(CustomLink curLink, int taskNumber) { string driverResult = "Driver :" + taskNumber.ToString("0000") + " WebPage : " + curLink.Url.ToString(20, ' ', true); try { if (mFlagStop) { // WriteToConsole(":( Operation is terminated by the user. Going to save the current results."); btnStop.BeginInvoke((MethodInvoker) delegate { btnStop.Enabled = false; }); UpdateTaskCount(taskNumber, true); mProcessedUrlCount++; driverResult += "\t operation canceled by user"; //driverStatues[taskNumber] = driverResult; Thread.Sleep(100); WriteToConsole(driverResult); return(null); } // WriteToConsole("TaskNumber:" + taskNumber.ToString() + "Driver kitlendi."); List <string> tempList = new List <string>(); var webDriver = new WebDriverController(); try { await webDriver.Navigate(curLink.Url, mPageLoadTimeout); Thread.Sleep(mPageLoadTimeout); } catch (Exception ex) { Thread.Sleep(100); driverResult += "\t webpage could not load. Ignoring this url"; WriteToConsole(driverResult); UpdateTaskCount(taskNumber, true); return(null); } Thread.Sleep(200); tempList = await webDriver.ExtractLinks(); Thread.Sleep(200); driverResult += "\t total extracted links :" + tempList.Count.ToString("00000"); var remainingLinkTuple = await mDbController.EliminateLinksWhoDoesntHaveDomainInfo(tempList); Thread.Sleep(200); try { driverResult += "\t remaining url count after elimination :" + remainingLinkTuple.Item1.Count.ToString("00000"); Thread.Sleep(200); WriteToConsole(driverResult); } catch (Exception) { } // WriteToConsole("Candidates are : " + mDbController.GetCandidates()); UpdateTaskCount(taskNumber, true); return(remainingLinkTuple); } catch (Exception ex) { UpdateTaskCount(taskNumber, true); mProcessedUrlCount++; driverResult = "\t task is chrashed with exception : " + ex.Message; Thread.Sleep(100); WriteToConsole(driverResult); return(null); } }
private void StartProcessing() { ClearConsole(); ClearProcessInfo(); this.BeginInvoke((MethodInvoker) delegate { btnStart.Enabled = false; btnStop.Enabled = true; btnFix.Enabled = false; }); /// Önce girilen linkler başlangıç için okunur. List <string> tempList = new List <string>(); bool isRead = mDbController.TryReadAllLines(mSelectedFilePath, out tempList); if (!isRead) { MessageBox.Show("Girilen input dosyasındaki linkler okunamadı! İşlem iptal ediliyor."); return; } // Son işlenen url bilgisini okunan satırlardan bul ve daha sonrasında process initial links içerisinde işleme tabi tutulacak // linkleri belirlemede kullan bool isStartUrlFound = mDbController.ExtractLastlyProcessedLink(ref tempList); if (!isStartUrlFound) { MessageBox.Show("Girilen input dosyasında son işlenen linke ait satır bulunamadı, işlem iptal edilecektir." + Environment.NewLine + "Lütfen referans linkler dosyasının başına hangi urlden devam etmek istediğinizi belirtmek için : " + Environment.NewLine + "## Lastly Processed link :www.exampleurl.com.tr" + Environment.NewLine + "ifadesini ekleyiniz." ); return; } // Listeye ekle. mDbController.AddRangeCandidates(tempList); // Başlangıçta txt'den okunan toplam link sayısı initialLinkCount = tempList.Count; // Memory'i boşalt tempList.Clear(); var totalLinks = (int)numMaxNumberDomainCollected.Value * (int)numMaxNumberLinkDiversity.Value; CustomLink curLink = new CustomLink(""); List <CustomLink> currentLinks = new List <CustomLink>(); mFlagStop = false; int connectionErrorCount = 0; WriteToConsole("Going to process initial links to create candidate urls"); bool initialLinksProcessed = mDbController.ProcessInitialLinks().Result; WriteToConsole("Candidate urls are initialized"); mProcessedUrlCount = 0; Thread worker = new Thread(async() => { while (mDbController.GetCandidatesCount() > 0 && !mDbController.IsSearchCompleted) { SetTaskCount(0); if (mFlagStop) { WriteToConsole(":( Operation is terminated by the user. Going to save the current results."); btnStop.BeginInvoke((MethodInvoker) delegate { btnStop.Enabled = false; }); break; } currentLinks = await mDbController.TakeMax(); List <Task <bool> > extractTasks = new List <Task <bool> >(); var tasks = new List <Task <Tuple <List <string>, List <DomainInfo> > > >(); driverStatues = new List <string>(); taskResults = new List <bool>(); SetTaskCount(currentLinks.Count); for (int i = 0; i < currentLinks.Count; i++) { taskResults.Add(false); driverStatues.Add(""); tasks.Add(GetCustomerShoppingPointsAsync(currentLinks[i], i)); } await Task.WhenAll(tasks); ClearConsole(); SetTaskCount(0); WriteToConsole("#### Whole tasks are finished for this loop ###"); List <string> candidateUrls = new List <string>(); List <DomainInfo> candidateDomains = new List <DomainInfo>(); for (int i = 0; i < tasks.Count; i++) { var taskingResult = tasks[i].Result; if (taskingResult != null) { candidateDomains.AddRange(taskingResult.Item2); candidateUrls.AddRange(taskingResult.Item1); } } TaskHelper.RemoveTaskList(ref tasks); WriteToConsole("#### Going to insert candidates into the database ###"); await mDbController.InsertCandidatesToDatabase(new Tuple <List <string>, List <DomainInfo> >(candidateUrls, candidateDomains)); var candidateCount = mDbController.GetCandidatesCount(); int toplamİslenen = mDbController.GetCount(); SetLastIteratioInformation("", toplamİslenen, candidateCount, initialLinkCount); /// UPDATED: 13.12.2019 /// Added : saving results after each iteration. mDbController.Save(mOutputFilePath, currentLinks); ClearConsole(); } WriteToConsole("#### URL extraction is Finished ###"); // mDbController.Save(mOutputFilePath); WriteToConsole("#### URLs are saved ###"); btnStart.BeginInvoke((MethodInvoker) delegate { btnStart.Enabled = true; btnFix.Enabled = true; }); }); worker.IsBackground = true; worker.Start(); }