Beispiel #1
0
        private static async Task StartJob(
            IJobManager jobManager,
            TwitterCredentialsOptions twitterCredentialsOptions,
            TwitterMetadata metadata
            )
        {
            // var query = "snakebite;snakebites;\"morsure de serpent\";\"morsures de serpents\";\"لدغات الأفاعي\";\"لدغة الأفعى\";\"لدغات أفاعي\";\"لدغة أفعى\"";
            // TODO add NOT cocktail NOT music
            // var query = "snake bite NOT cocktail NOT darts NOT piercing";

            var jobId     = Guid.Parse("a43e8bb4-9c15-48a8-a0a3-7479b75eb6d0");
            var jobConfig = new DataAcquirerJobConfig()
            {
                Attributes = new Dictionary <string, string>
                {
                    { "TopicQuery", metadata.Query },
                    { "AccessToken", twitterCredentialsOptions.AccessToken },
                    { "AccessTokenSecret", twitterCredentialsOptions.AccessTokenSecret },
                    { "ApiKey", twitterCredentialsOptions.ApiKey },
                    { "ApiSecretKey", twitterCredentialsOptions.ApiSecretKey },
                },
                JobId = jobId,
                OutputMessageBrokerChannels = new string[] { "job_management.component_data_input.DataAnalyser_sentiment" }
            };

            try
            {
                await jobManager.StartNewJobAsync(jobConfig);
            }
            catch
            {
            }
        }
Beispiel #2
0
        private async Task RunJobAsync(DataAcquirerJobConfig jobConfig,
                                       CancellationToken cancellationToken)
        {
            try
            {
                var translate = jobConfig.Attributes.TryGetValue("Translate", out string value)
                                //&& value.ToLower() == "true"
                ;
                // TODO validate job config
                if (!jobConfig.Attributes.ContainsKey("TopicQuery"))
                {
                    _logger.TrackError(
                        "StartNewJob",
                        "TopicQuery attribute is not present. Job did not start",
                        new { jobId = jobConfig.JobId });
                    return;
                }
                string queryLanguage = null;
                if (jobConfig.Attributes.TryGetValue("Language", out var desiredLanguage))
                {
                    queryLanguage = desiredLanguage;
                }

                await _dataAcquirerJobStorage.SaveAsync(jobConfig.JobId, jobConfig);

                var batchSize = 100;

                var dataAcquirerInputModel = DataAcquirerInputModel.FromValues(
                    jobConfig.JobId,
                    jobConfig.Attributes["TopicQuery"],
                    queryLanguage,
                    new DataAcquirerAttributes(jobConfig.Attributes),
                    batchSize
                    );

                var batch = _acquirer.GetPostsAsync(
                    dataAcquirerInputModel,
                    cancellationToken);

                _logger.TrackInfo("MessageTracking", "Starting");

                await ProcessBatch(jobConfig, dataAcquirerInputModel, batch, translate);
            }
            catch (TaskCanceledException) { }
            catch (Exception e)
            {
                _runningJobsRecords.Remove(jobConfig.JobId);
                _logger.TrackError(
                    "RunJob",
                    "Job encountered an error and stopped.",
                    new
                {
                    jobId     = jobConfig.JobId,
                    exception = e
                });
            }
        }
Beispiel #3
0
        public Task StartNewJobAsync(DataAcquirerJobConfig jobConfig)
        {
            lock (_dictionaryLock)
            {
                var jobId = jobConfig.JobId;
                if (_isStopping)
                {
                    _logger.TrackWarning(
                        "StartNewJob",
                        "Could not start job, because the component is stopping",
                        new { jobId = jobId });

                    return(Task.CompletedTask);
                }

                if (_runningJobsRecords.ContainsKey(jobId))
                {
                    _logger.TrackWarning(
                        "StartNewJob",
                        "Job is with this id already running",
                        new { jobId = jobId });
                    return(Task.CompletedTask);
                }

                _logger.TrackInfo(
                    "StartNewJob",
                    "Config recieved",
                    new { config = jobConfig });

                var cancellationTokenSource = new CancellationTokenSource();
                var downloadingTask         = RunJobAsync(jobConfig, cancellationTokenSource.Token).
                                              ContinueWith(async r =>
                {
                    try
                    {
                        await r;
                    }
                    catch (TaskCanceledException) { }

                    _runningJobsRecords.Remove(jobId, out _);
                    _logger.TrackInfo(
                        "StartNewJob",
                        "Job removed",
                        new { jobId = jobId.ToString() });
                });

                var jobManagerJobRecord = new JobManagerJobRecord
                {
                    JobId   = jobConfig.JobId,
                    JobTask = downloadingTask,
                    CancellationTokenSource = cancellationTokenSource
                };

                _runningJobsRecords.TryAdd(jobManagerJobRecord.JobId, jobManagerJobRecord);
                return(Task.CompletedTask);
            }
        }
Beispiel #4
0
 private void LogProgress(DataAcquirerJobConfig jobConfig, int count)
 {
     if (count % 1000 == 0)
     {
         _logger.TrackInfo("MessageTracking", $"Downloaded: {count}", new
         {
             jobId = jobConfig.JobId
         });
     }
 }
Beispiel #5
0
        private async Task StopJob(Guid jobId)
        {
            var dataAcquirerJobConfig = new DataAcquirerJobConfig()
            {
                Command = JobCommand.Stop,
                JobId   = jobId,
            };

            await _interactiveConsumer.AddMessageToBeConsumed(
                _componentOptions.UpdateChannelName,
                dataAcquirerJobConfig);
        }
Beispiel #6
0
        public Queue <DataAcquirerJobConfig> GetFixed()
        {
            var attributes = _customAttributes.ToDictionary(r => r.Key, r => r.Value);

            attributes.Add("TopicQuery", "?");

            var fixedGuid = Guid.Parse("01c3ee17-c9f4-492f-ac9c-e9f6ecd1fa7e");
            var config    = new DataAcquirerJobConfig()
            {
                JobId      = fixedGuid,
                Attributes = attributes,
                Command    = JobCommand.Start,
                OutputMessageBrokerChannels = new string[] { "s1" }
            };
            var queue = new Queue <DataAcquirerJobConfig>();

            queue.Enqueue(config);
            return(queue);
        }
Beispiel #7
0
        private static Guid CalculatePostId(DataAcquirerJobConfig jobConfig, DataAcquirerPost dataPost)
        {
            var bytes = new byte[16];

            var textHash   = dataPost.Text.GetHashCode();
            var postIdHash = dataPost.OriginalPostId.GetHashCode();
            var userIdHash = dataPost.UserId.GetHashCode();
            var dateIdHash = dataPost.DateTime.GetHashCode();
            var jobId      = jobConfig.JobId.GetHashCode();

            dateIdHash += jobId;

            BitConverter.GetBytes(textHash).CopyTo(bytes, 0);
            BitConverter.GetBytes(postIdHash).CopyTo(bytes, 3);
            BitConverter.GetBytes(userIdHash).CopyTo(bytes, 7);
            BitConverter.GetBytes(dateIdHash).CopyTo(bytes, 11);

            var postId = new Guid(bytes);

            return(postId);
        }
Beispiel #8
0
        public Queue <DataAcquirerJobConfig> PrepareConfigQueue(IEnumerable <string> topics)
        {
            var configs = new Queue <DataAcquirerJobConfig>();

            foreach (var topic in topics)
            {
                var attributes = _customAttributes.ToDictionary(r => r.Key, r => r.Value);
                attributes.Add("TopicQuery", topic);

                var config = new DataAcquirerJobConfig()
                {
                    JobId      = Guid.NewGuid(),
                    Attributes = attributes,
                    Command    = JobCommand.Start,
                    OutputMessageBrokerChannels = new string[] { "o_1" }
                };

                configs.Enqueue(config);
            }

            return(configs);
        }
Beispiel #9
0
        private async Task StartJob(Guid jobId)
        {
            var attributes = new Dictionary <string, string>()
            {
                { "TopicQuery", "matfyz" },
                { "ApiKey", _twitterCredentials.ApiKey },
                { "ApiSecretKey", _twitterCredentials.ApiSecretKey },
                { "AccessToken", _twitterCredentials.AccessToken },
                { "AccessTokenSecret", _twitterCredentials.AccessTokenSecret }
            };

            var dataAcquirerJobConfig = new DataAcquirerJobConfig()
            {
                Attributes = attributes,
                Command    = JobCommand.Start,
                OutputMessageBrokerChannels = new[] { "c1" },
                JobId = jobId,
            };

            await _interactiveConsumer.AddMessageToBeConsumed(
                _componentOptions.UpdateChannelName,
                dataAcquirerJobConfig);
        }
Beispiel #10
0
        private async Task ProcessBatch(
            DataAcquirerJobConfig jobConfig,
            DataAcquirerInputModel dataAcquirerInputModel,
            IAsyncEnumerable <DataAcquirerPost> batch,
            bool translate)
        {
            int count = 0;

            await foreach (var dataPost in batch)
            {
                LogProgress(jobConfig, count);
                count++;

                var postId = CalculatePostId(jobConfig, dataPost);

                var    text         = ClearText(dataPost.Text);
                string originalText = null;

                if (translate && dataPost.Language != "en" && dataPost.Language != null)
                {
                    try
                    {
                        var translatedText = await _translationService
                                             .TranslateToEnglishAsync(dataPost.Language, text);

                        originalText = text;
                        text         = translatedText;
                    }
                    catch (DataAcquirerException ex)
                    {
                        _logger.TrackWarning("TranslationError", "Could not translate",
                                             new
                        {
                            jobId     = dataAcquirerInputModel.JobId,
                            exception = ex,
                            text
                        });
                    }
                }

                var uniPost = UniPostModel.FromValues(
                    postId,
                    dataPost.OriginalPostId,
                    text,
                    originalText,
                    dataPost.Language,
                    dataPost.Source,
                    dataPost.UserId,
                    dataPost.DateTime,
                    dataAcquirerInputModel.JobId,
                    dataPost.Query);


                var jsonData             = JsonConvert.SerializeObject(uniPost);
                var messageBrokerMessage = new MessageBrokerMessage(
                    "acquired-data-post",
                    jsonData);

                await SendRecordToOutputs(jobConfig.OutputMessageBrokerChannels,
                                          messageBrokerMessage);
            }
        }