Exemple #1
0
        private async Task <Tuple <List <string>, List <DomainInfo> > > GetCustomerShoppingPointsAsync(CustomLink curLink, int taskNumber)
        {
            string driverResult = "Driver :" + taskNumber.ToString("0000") + " WebPage : " + curLink.Url.ToString(20, ' ', true);

            try
            {
                if (mFlagStop)
                {
                    // WriteToConsole(":( Operation is terminated by the user. Going to save the current results.");
                    btnStop.BeginInvoke((MethodInvoker) delegate
                    {
                        btnStop.Enabled = false;
                    });
                    UpdateTaskCount(taskNumber, true);
                    mProcessedUrlCount++;
                    driverResult += "\t  operation canceled by user";
                    //driverStatues[taskNumber] = driverResult;
                    Thread.Sleep(100);
                    WriteToConsole(driverResult);
                    return(null);
                }

                // WriteToConsole("TaskNumber:" + taskNumber.ToString() + "Driver kitlendi.");
                List <string> tempList  = new List <string>();
                var           webDriver = new WebDriverController();

                try
                {
                    await webDriver.Navigate(curLink.Url, mPageLoadTimeout);

                    Thread.Sleep(mPageLoadTimeout);
                }
                catch (Exception ex)
                {
                    Thread.Sleep(100);
                    driverResult += "\t  webpage could not load. Ignoring this url";
                    WriteToConsole(driverResult);
                    UpdateTaskCount(taskNumber, true);
                    return(null);
                }

                Thread.Sleep(200);
                tempList = await webDriver.ExtractLinks();

                Thread.Sleep(200);
                driverResult += "\t  total extracted links :" + tempList.Count.ToString("00000");

                var remainingLinkTuple = await mDbController.EliminateLinksWhoDoesntHaveDomainInfo(tempList);

                Thread.Sleep(200);

                try
                {
                    driverResult += "\t  remaining url count after elimination :" + remainingLinkTuple.Item1.Count.ToString("00000");
                    Thread.Sleep(200);
                    WriteToConsole(driverResult);
                }
                catch (Exception)
                {
                }
                // WriteToConsole("Candidates are : " + mDbController.GetCandidates());
                UpdateTaskCount(taskNumber, true);

                return(remainingLinkTuple);
            }
            catch (Exception ex)
            {
                UpdateTaskCount(taskNumber, true);
                mProcessedUrlCount++;
                driverResult = "\t  task is chrashed with exception : " + ex.Message;
                Thread.Sleep(100);
                WriteToConsole(driverResult);
                return(null);
            }
        }
Exemple #2
0
        private void StartProcessing()
        {
            ClearConsole();
            ClearProcessInfo();

            this.BeginInvoke((MethodInvoker) delegate
            {
                btnStart.Enabled = false;
                btnStop.Enabled  = true;
                btnFix.Enabled   = false;
            });


            /// Önce girilen linkler başlangıç için okunur.
            List <string> tempList = new List <string>();
            bool          isRead   = mDbController.TryReadAllLines(mSelectedFilePath, out tempList);


            if (!isRead)
            {
                MessageBox.Show("Girilen input dosyasındaki linkler okunamadı! İşlem iptal ediliyor.");
                return;
            }

            // Son işlenen url bilgisini okunan satırlardan bul ve daha sonrasında process initial links içerisinde işleme tabi tutulacak
            // linkleri belirlemede kullan

            bool isStartUrlFound = mDbController.ExtractLastlyProcessedLink(ref tempList);

            if (!isStartUrlFound)
            {
                MessageBox.Show("Girilen input dosyasında son işlenen linke ait satır bulunamadı, işlem iptal edilecektir." + Environment.NewLine
                                + "Lütfen referans linkler dosyasının başına hangi urlden devam etmek istediğinizi belirtmek için : " + Environment.NewLine
                                + "## Lastly Processed link :www.exampleurl.com.tr" + Environment.NewLine
                                + "ifadesini ekleyiniz."
                                );
                return;
            }

            // Listeye ekle.
            mDbController.AddRangeCandidates(tempList);
            // Başlangıçta txt'den okunan toplam link sayısı
            initialLinkCount = tempList.Count;
            // Memory'i boşalt
            tempList.Clear();

            var totalLinks = (int)numMaxNumberDomainCollected.Value * (int)numMaxNumberLinkDiversity.Value;


            CustomLink        curLink      = new CustomLink("");
            List <CustomLink> currentLinks = new List <CustomLink>();

            mFlagStop = false;

            int connectionErrorCount = 0;

            WriteToConsole("Going to process initial links to create candidate urls");
            bool initialLinksProcessed = mDbController.ProcessInitialLinks().Result;

            WriteToConsole("Candidate urls are initialized");
            mProcessedUrlCount = 0;

            Thread worker = new Thread(async() =>
            {
                while (mDbController.GetCandidatesCount() > 0 && !mDbController.IsSearchCompleted)
                {
                    SetTaskCount(0);
                    if (mFlagStop)
                    {
                        WriteToConsole(":( Operation is terminated by the user. Going to save the current results.");
                        btnStop.BeginInvoke((MethodInvoker) delegate
                        {
                            btnStop.Enabled = false;
                        });
                        break;
                    }

                    currentLinks = await mDbController.TakeMax();

                    List <Task <bool> > extractTasks = new List <Task <bool> >();

                    var tasks     = new List <Task <Tuple <List <string>, List <DomainInfo> > > >();
                    driverStatues = new List <string>();
                    taskResults   = new List <bool>();

                    SetTaskCount(currentLinks.Count);
                    for (int i = 0; i < currentLinks.Count; i++)
                    {
                        taskResults.Add(false);
                        driverStatues.Add("");
                        tasks.Add(GetCustomerShoppingPointsAsync(currentLinks[i], i));
                    }

                    await Task.WhenAll(tasks);

                    ClearConsole();
                    SetTaskCount(0);
                    WriteToConsole("#### Whole tasks are finished for this loop ###");
                    List <string> candidateUrls        = new List <string>();
                    List <DomainInfo> candidateDomains = new List <DomainInfo>();
                    for (int i = 0; i < tasks.Count; i++)
                    {
                        var taskingResult = tasks[i].Result;
                        if (taskingResult != null)
                        {
                            candidateDomains.AddRange(taskingResult.Item2);
                            candidateUrls.AddRange(taskingResult.Item1);
                        }
                    }

                    TaskHelper.RemoveTaskList(ref tasks);

                    WriteToConsole("#### Going to insert candidates into the database  ###");
                    await mDbController.InsertCandidatesToDatabase(new Tuple <List <string>, List <DomainInfo> >(candidateUrls, candidateDomains));


                    var candidateCount = mDbController.GetCandidatesCount();
                    int toplamİslenen  = mDbController.GetCount();
                    SetLastIteratioInformation("", toplamİslenen, candidateCount, initialLinkCount);



                    /// UPDATED: 13.12.2019
                    /// Added : saving results after each iteration.
                    mDbController.Save(mOutputFilePath, currentLinks);

                    ClearConsole();
                }

                WriteToConsole("#### URL extraction is Finished ###");

                // mDbController.Save(mOutputFilePath);

                WriteToConsole("#### URLs are saved ###");

                btnStart.BeginInvoke((MethodInvoker) delegate
                {
                    btnStart.Enabled = true;
                    btnFix.Enabled   = true;
                });
            });

            worker.IsBackground = true;
            worker.Start();
        }