Beispiel #1
0
        public async Task StopAsync()
        {
            lock (mStatusSyncLock)
            {
                if (Status != CrawlerStatus.RUNNING)
                {
                    return;
                }
            }

            mLogger.Log(nameof(Crawler), "Stopping");

            mCancellationTokenSource.Cancel();
            await Task.Run(() =>
            {
                try
                {
                    Task.WaitAll(mTasks.Keys.ToArray());
                    lock (mStatusSyncLock)
                    {
                        Status = CrawlerStatus.STOPPED;
                    }
                }
                finally
                {
                    mCancellationTokenSource.Dispose();
                }
            });

            mLogger.Log(nameof(Crawler), "Stopped");
        }
Beispiel #2
0
        void HandleTrigger(string argument)
        {
            switch (argument)
            {
            case "GoUp":
                _crawlerStatus = CrawlerStatus.GoUp;
                _lowerLandingGear.Lock();
                _upperLandingGear.Unlock();
                ExtendPistons();
                break;

            case "GoDown":
                _crawlerStatus = CrawlerStatus.GoDown;
                _upperLandingGear.Lock();
                _lowerLandingGear.Unlock();
                ExtendPistons();
                break;

            case "Stop":
                _crawlerStatus = CrawlerStatus.Stop;
                break;

            default:
                Echo($"Invalid option: {argument}\n");
                break;
            }
        }
Beispiel #3
0
        public async Task StartAsync(bool restart = false)
        {
            mLogger.Log(nameof(Crawler), "Running");
            lock (mStatusSyncLock)
            {
                if (Status == CrawlerStatus.RUNNING)
                {
                    return;
                }
                Status = CrawlerStatus.RUNNING;
            }

            if (restart)
            {
                mUrlFrontier = new SimpleUrlFrontier(mConfig);
            }

            mTasks.Clear();
            mFetchSemaphore = new SemaphoreSlim(mConfig.MaxFetchingConcurrency);

            mCancellationTokenSource = new CancellationTokenSource();
            var token = mCancellationTokenSource.Token;
            await Task.Run(() =>
            {
                while (true)
                {
                    if (token.IsCancellationRequested)
                    {
                        break;
                    }

                    string url = null;

                    try
                    {
                        url = mUrlFrontier.PopUrl();
                    }
                    catch (Exception e)
                    {
                        mLogger.LogException(nameof(Crawler), "Failed to pop url", e);
                        mErrorLogger.LogException(nameof(Crawler), "Failed to pop url", e);
                    }

                    if (url != null)
                    {
                        FetchUrlAsync(url);
                    }
                    else if (mTasks.Keys.Any())
                    {
                        Task.WaitAny(mTasks.Keys.ToArray());
                    }
                    else
                    {
                        Thread.Sleep(5000);
                    }
                }
            }, token);
        }
 public XtreamCrawler(DictionaryManager dictionary, string server, int concurrent_tasks, bool force_crawl, bool uppercase)
 {
     this.dictionary = dictionary;
     this.server = server;
     client = new WebClient();
     status = CrawlerStatus.Idle;
     this.concurrent_tasks = concurrent_tasks;
     this.force_crawl = force_crawl;
     this.uppercase = uppercase;
 }
Beispiel #5
0
 public XtreamCrawler(DictionaryManager dictionary, string server, int concurrent_tasks, bool force_crawl, bool uppercase)
 {
     this.dictionary       = dictionary;
     this.server           = server;
     client                = new WebClient();
     status                = CrawlerStatus.Idle;
     this.concurrent_tasks = concurrent_tasks;
     this.force_crawl      = force_crawl;
     this.uppercase        = uppercase;
 }
Beispiel #6
0
 public Crawler(CrawlerConfig config,
                IUrlFrontier urlFrontier,
                IFetcher fetcher,
                ISimilarContentManager similarContentManager,
                List <IUrlFilter> urlFilters)
 {
     mConfig               = config;
     Status                = CrawlerStatus.STOPPED;
     mUrlFrontier          = urlFrontier;
     mFetcher              = fetcher;
     mSimilarContentJudger = similarContentManager;
     mUrlFilters           = urlFilters;
     mLogger               = new RuntimeLogger(Path.Combine(config.LogDirectory, "Crawler.Log"), true);
     mErrorLogger          = new RuntimeLogger(Path.Combine(config.LogDirectory, "Crawler Error.Log"), false);
 }
        public static string ToAppStateName(this CrawlerStatus status)
        {
            switch (status)
            {
            case CrawlerStatus.Ready:
                return("Не запущен");

            case CrawlerStatus.Run:
                return("Запущен");

            case CrawlerStatus.Done:
                return("Работа выполнена");

            default:
                return("Статус не определен");
            }
        }
        private void Crawl_Manager()
        {
            string output_result = string.Empty;
            bool found_result = false;
            progress = 0;
            num_results = 0;
            try
            {
                bool valid_site;
                if(force_crawl)
                {
                    valid_site = true;
                }
                else
                {
                    string page = client.DownloadString(server.Trim());
                    valid_site = page != "" && page.Contains("Xtream Codes");
                }
                


                if (valid_site && dictionary.entries.Count > 0)
                {                  

                    while (progress < dictionary.entries.Count)
                    {
                        int increment;
                        if ((progress + concurrent_tasks) < dictionary.entries.Count)
                        {
                            increment = concurrent_tasks;
                        }
                        else
                        {
                            increment = dictionary.entries.Count - progress;
                        }

                        crawl_tasks = new CrawlTaskData[increment];

                        for (int j = 0; j < crawl_tasks.Length; j++)
                        {
                            int x = progress + j;

                            string search_string;
                            if(uppercase)
                            {
                                search_string = dictionary.entries[x];
                            }
                            else
                            {
                                search_string = char.ToUpper(dictionary.entries[x][0]) + dictionary.entries[x].Substring(1);
                            }               
                            crawl_tasks[j] = new CrawlTaskData();
                            crawl_tasks[j].task.WorkerSupportsCancellation = true;
                            crawl_tasks[j].task.WorkerReportsProgress = true;
                            crawl_tasks[j].task.DoWork += new DoWorkEventHandler(Crawl_Method);
                            var arguments = Tuple.Create<string, int>(search_string, j);
                            crawl_tasks[j].task.RunWorkerAsync(arguments);

                        }

                        WaitWorkers();                      

                        foreach (CrawlTaskData crawl_task in crawl_tasks)
                        {
                            if (crawl_task.result != string.Empty)
                            {
                                output_result = crawl_task.result;
                                found_result = true;
                                num_results++;
                                if (!Directory.Exists("output")) Directory.CreateDirectory("output");
                                string output = "output/tv_channels_" + num_results + ".m3u";
                                StreamWriter outputFile = new StreamWriter(output, true);
                                outputFile.WriteLine(output_result);
                                outputFile.Flush();
                                outputFile.Close();
                                outputFile.Dispose();
                            }
                        }

                        progress += increment;
                    }
                    
                    if(found_result)
                    {
                        status = CrawlerStatus.CompletedWithResults;    
                    }
                    else
                    {
                        status = CrawlerStatus.CompletedWithoutResults;
                    }
                }
                else
                {
                    status = CrawlerStatus.InvalidSite;
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                status = CrawlerStatus.InvalidSite;
            }
        }
 public void DoCrawl(object sender, DoWorkEventArgs e)
 {
     status = CrawlerStatus.Working;
     Crawl_Manager();
 }
Beispiel #10
0
        public async void Run()
        {
            if (Status != CrawlerStatus.Ready)
            {
                return;
            }

            List <string> codesList = Repository.EuroMade.GetAllVendorCodes().ToList();

            codesList = codesList.Select(x => x.ToLower()).ToList();

            HashSet <string> codes = new HashSet <string>(codesList);

            status = CrawlerStatus.Run;

            CategoryQueue = Variables.CategoryQueue;
            const string domainName = "https://www.verkkokauppa.com";

            while (CategoryQueue.Any())
            {
                Category currentCategory = CategoryQueue.Dequeue();
                var      pageIndex       = 1;

                var currentURL = currentCategory.URL.ToString() + pageIndex;
                var config     = Configuration.Default.WithDefaultLoader();
                var document   = await BrowsingContext.New(config).OpenAsync(currentURL);

                var pageSelector     = ".page-selector__page-count-indicator";
                var pagesCountString = document.QuerySelector(pageSelector)
                                       .TextContent
                                       .Replace("/", string.Empty)
                                       .Trim();

                if (!int.TryParse(pagesCountString, out int pagesCountValue))
                {
                    continue;
                }

                for (pageIndex = 1; pageIndex <= pagesCountValue; pageIndex++)
                {
                    currentURL = currentCategory.URL.ToString() + pageIndex;
                    config     = Configuration.Default.WithDefaultLoader();
                    document   = await BrowsingContext.New(config).OpenAsync(currentURL);

                    var cellSelector = ".list-product";
                    var cells        = document.QuerySelectorAll(cellSelector);
                    var siteItems    = new List <VerkkItem>();

                    foreach (var c in cells)
                    {
                        List <string> problems = new List <string>();

                        int     vendorCode = -1;
                        string  name       = "error_value";
                        Uri     url        = new Uri("https://www.ERROR_URL.com");
                        bool    discount   = false;
                        Decimal price      = new decimal(-1);

                        try
                        {
                            bool isConverted = int.TryParse(c.QuerySelectorAll(".image__product-id").Select(e => e.TextContent).First(), out int number);
                            if (!isConverted)
                            {
                                problems.Add("Артикул не является числом.");
                            }
                            vendorCode = number;
                        }
                        catch (InvalidOperationException)
                        {
                            problems.Add("Артикул не найден.");
                        }

                        try
                        {
                            name = c.QuerySelectorAll(".list-product-link__name")
                                   .Select(e => e.TextContent)
                                   .First();
                        }
                        catch (InvalidOperationException)
                        {
                            problems.Add("Наименование товара не найдено");
                        }

                        try
                        {
                            string partialUrl = c.QuerySelectorAll(".data__list-product-link")
                                                .Select(e => e.Attributes.GetNamedItem("href").Value)
                                                .First();

                            url = new Uri(domainName + partialUrl);
                        }
                        catch (InvalidOperationException)
                        {
                            problems.Add("URL не найдено");
                        }
                        catch (UriFormatException)
                        {
                            problems.Add("Не удается преобразовать URL");
                        }

                        string itemUrl  = "https://www.verkkokauppa.com" + c.QuerySelector(".data__list-product-link")?.GetAttribute("href");
                        bool   uploaded = false;

                        if (vendorCode != -1)
                        {
                            if (codes.Contains("v" + vendorCode))
                            {
                                uploaded = true;
                            }
                        }

                        var goodsPage = await BrowsingContext.New(config).OpenAsync(itemUrl);

                        var discountList = goodsPage.QuerySelector(".price-tag-discount__amount");

                        if (discountList != null)
                        {
                            discount = true;
                        }
                        else
                        {
                            discount = false;
                        }

                        try
                        {
                            var priceVerkk = goodsPage
                                             .QuerySelector(".price-tag-content__price-tag-price--current .price-tag-price__euros")
                                             ?.GetAttribute("content");

                            bool isConverted = decimal.TryParse(
                                priceVerkk,
                                NumberStyles.AllowDecimalPoint,
                                CultureInfo.InvariantCulture,
                                out decimal priceVal);

                            if (!isConverted)
                            {
                                problems.Add("Цена не является числом.");
                            }
                            price = priceVal;
                        }
                        catch (InvalidOperationException)
                        {
                            problems.Add("Цена не найдена.");
                        }

                        if (problems.Any())
                        {
                            Repository.Logger.Log(ErrorFrom.Crawler, problems, new Uri(currentURL));
                        }
                        else
                        {
                            siteItems.Add(new VerkkItem
                            {
                                Discount   = discount,
                                Name       = name,
                                PriceEuro  = price,
                                ULR        = url,
                                VendorCode = vendorCode,
                                Cathegory  = currentCategory.RusName,
                                Uploaded   = uploaded
                            });
                        }
                    }

                    if (siteItems.Any())
                    {
                        Repository.VerkkItems.Insert(siteItems);
                    }
                }
            }

            status = CrawlerStatus.Done;
        }
 protected void AddModule(CrawlerStatus portal)
 {
     _portais.Add(portal);
 }
Beispiel #12
0
        private void Crawl_Manager()
        {
            string output_result = string.Empty;
            bool   found_result  = false;

            progress    = 0;
            num_results = 0;
            try
            {
                bool valid_site;
                if (force_crawl)
                {
                    valid_site = true;
                }
                else
                {
                    string page = client.DownloadString(server.Trim());
                    valid_site = page != "" && page.Contains("Xtream Codes");
                }



                if (valid_site && dictionary.entries.Count > 0)
                {
                    while (progress < dictionary.entries.Count)
                    {
                        int increment;
                        if ((progress + concurrent_tasks) < dictionary.entries.Count)
                        {
                            increment = concurrent_tasks;
                        }
                        else
                        {
                            increment = dictionary.entries.Count - progress;
                        }

                        crawl_tasks = new CrawlTaskData[increment];

                        for (int j = 0; j < crawl_tasks.Length; j++)
                        {
                            int x = progress + j;

                            string search_string;
                            if (uppercase)
                            {
                                search_string = dictionary.entries[x];
                            }
                            else
                            {
                                search_string = char.ToUpper(dictionary.entries[x][0]) + dictionary.entries[x].Substring(1);
                            }
                            crawl_tasks[j] = new CrawlTaskData();
                            crawl_tasks[j].task.WorkerSupportsCancellation = true;
                            crawl_tasks[j].task.WorkerReportsProgress      = true;
                            crawl_tasks[j].task.DoWork += new DoWorkEventHandler(Crawl_Method);
                            var arguments = Tuple.Create <string, int>(search_string, j);
                            crawl_tasks[j].task.RunWorkerAsync(arguments);
                        }

                        WaitWorkers();

                        foreach (CrawlTaskData crawl_task in crawl_tasks)
                        {
                            if (crawl_task.result != string.Empty)
                            {
                                output_result = crawl_task.result;
                                found_result  = true;
                                num_results++;
                                if (!Directory.Exists("output"))
                                {
                                    Directory.CreateDirectory("output");
                                }
                                string       output     = "output/tv_channels_" + num_results + ".m3u";
                                StreamWriter outputFile = new StreamWriter(output, true);
                                outputFile.WriteLine(output_result);
                                outputFile.Flush();
                                outputFile.Close();
                                outputFile.Dispose();
                            }
                        }

                        progress += increment;
                    }

                    if (found_result)
                    {
                        status = CrawlerStatus.CompletedWithResults;
                    }
                    else
                    {
                        status = CrawlerStatus.CompletedWithoutResults;
                    }
                }
                else
                {
                    status = CrawlerStatus.InvalidSite;
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
                status = CrawlerStatus.InvalidSite;
            }
        }
Beispiel #13
0
 public void DoCrawl(object sender, DoWorkEventArgs e)
 {
     status = CrawlerStatus.Working;
     Crawl_Manager();
 }
Beispiel #14
0
 public TwitterStreamClient(Queue <string> unprocessedTweets, CrawlerStatus crawlerStatus, ref Object syncRoot)
 {
     UnprocessedTweets = unprocessedTweets;
     SyncRoot          = syncRoot;
     CrawlerStatus     = crawlerStatus;
 }
        static void Main(string[] args)
        {
            CrawlerStatus = new CrawlerStatus()
            {
                KeepRunning = true
            };

            UnprocessedHashtags = new Queue <string>();
            UnprocessedRetweets = new Queue <string>();
            UnprocessedTweets   = new Queue <string>();

            StreamReaderThread = new Thread(ReadTweets);
            StreamReaderThread.Start();

            Console.WriteLine("");
            Console.WriteLine("---------------------------------------------------");
            Console.WriteLine("Reading dictionary...");
            LoadDictionary();
            Console.WriteLine(Dictionary.Count + " dictionary entries read.");
            Console.WriteLine("---------------------------------------------------");
            Console.WriteLine("");

            StreamProcessorThread = new Thread(ProcessTweets);
            StreamProcessorThread.Start();

            StatusMessageThread = new Thread(PrintStatusMessage);
            StatusMessageThread.Start();

            TweetClassifierThread = new Thread(TweetClassifier);
            TweetClassifierThread.Start();

            HashtagRetweetProcessorThread = new Thread(HashtagRetweetProcessor);
            HashtagRetweetProcessorThread.Start();

            Console.ReadLine();
            CrawlerStatus.KeepRunning = false;

            lock (_syncRootOutput)
            {
                Console.WriteLine("");
                Console.WriteLine("Stopping Threads.....");
                Console.WriteLine("");
                Console.WriteLine("Attempting stop Tweet Classifier....");
            }

            TweetClassifierThread.Join();

            lock (_syncRootOutput)
            {
                Console.WriteLine("Attempting stop Stream Reader....");
            }
            StreamReaderThread.Join();

            lock (_syncRootOutput)
            {
                Console.WriteLine("Attempting stop Stream Processor....");
            }
            StreamProcessorThread.Join();

            lock (_syncRootOutput)
            {
                Console.WriteLine("Attempting stop Status Message....");
            }
            StatusMessageThread.Join();

            lock (_syncRootOutput)
            {
                Console.WriteLine("Attempting stop Hashtag Retweet Processor....");
            }
            HashtagRetweetProcessorThread.Join();
        }
Beispiel #16
0
        public Program()
        {
            var error = false;

            _upperLandingGear = GridTerminalSystem.GetBlockWithName("Lower Landing Gear") as IMyLandingGear;
            _lowerLandingGear = GridTerminalSystem.GetBlockWithName("Upper Landing Gear") as IMyLandingGear;

            _portBaseLandingGear      = GridTerminalSystem.GetBlockWithName("Port Base Landing Gear") as IMyLandingGear;
            _starboardBaseLandingGear = GridTerminalSystem.GetBlockWithName("Starboard Base Landing Gear") as IMyLandingGear;

            _piston1 = GridTerminalSystem.GetBlockWithName("Piston 1") as IMyExtendedPistonBase;
            _piston2 = GridTerminalSystem.GetBlockWithName("Piston 2") as IMyExtendedPistonBase;
            _piston3 = GridTerminalSystem.GetBlockWithName("Piston 3") as IMyExtendedPistonBase;

            if (_upperLandingGear == null)
            {
                Echo("No top landing gear");
                error = true;
            }

            if (_lowerLandingGear == null)
            {
                Echo("No bottom landing gear");
                error = true;
            }

            if (_portBaseLandingGear == null)
            {
                Echo("No port landing gear");
                error = true;
            }

            if (_starboardBaseLandingGear == null)
            {
                Echo("No starboard landing gear");
                error = true;
            }

            if (_piston1 == null)
            {
                Echo("No piston #1");
                error = true;
            }

            if (_piston2 == null)
            {
                Echo("No piston #2");
                error = true;
            }

            if (_piston3 == null)
            {
                Echo("No piston #3");
                error = true;
            }

            if (error)
            {
                Echo("Errors found. Please fix and recompile");
                return;
            }

            Runtime.UpdateFrequency = UpdateFrequency.Update10;

            _upperLandingGear.AutoLock = false;
            _upperLandingGear.Lock();

            _lowerLandingGear.AutoLock = false;
            _lowerLandingGear.Lock();

            _portBaseLandingGear.AutoLock      = false;
            _starboardBaseLandingGear.AutoLock = false;

            _crawlerStatus = CrawlerStatus.Stop;

            _init = true;
            Echo($"Crawler startup @ {DateTime.Now}");
        }