private static async Task StartJob( IJobManager jobManager, TwitterCredentialsOptions twitterCredentialsOptions, TwitterMetadata metadata ) { // var query = "snakebite;snakebites;\"morsure de serpent\";\"morsures de serpents\";\"لدغات الأفاعي\";\"لدغة الأفعى\";\"لدغات أفاعي\";\"لدغة أفعى\""; // TODO add NOT cocktail NOT music // var query = "snake bite NOT cocktail NOT darts NOT piercing"; var jobId = Guid.Parse("a43e8bb4-9c15-48a8-a0a3-7479b75eb6d0"); var jobConfig = new DataAcquirerJobConfig() { Attributes = new Dictionary <string, string> { { "TopicQuery", metadata.Query }, { "AccessToken", twitterCredentialsOptions.AccessToken }, { "AccessTokenSecret", twitterCredentialsOptions.AccessTokenSecret }, { "ApiKey", twitterCredentialsOptions.ApiKey }, { "ApiSecretKey", twitterCredentialsOptions.ApiSecretKey }, }, JobId = jobId, OutputMessageBrokerChannels = new string[] { "job_management.component_data_input.DataAnalyser_sentiment" } }; try { await jobManager.StartNewJobAsync(jobConfig); } catch { } }
private async Task RunJobAsync(DataAcquirerJobConfig jobConfig, CancellationToken cancellationToken) { try { var translate = jobConfig.Attributes.TryGetValue("Translate", out string value) //&& value.ToLower() == "true" ; // TODO validate job config if (!jobConfig.Attributes.ContainsKey("TopicQuery")) { _logger.TrackError( "StartNewJob", "TopicQuery attribute is not present. Job did not start", new { jobId = jobConfig.JobId }); return; } string queryLanguage = null; if (jobConfig.Attributes.TryGetValue("Language", out var desiredLanguage)) { queryLanguage = desiredLanguage; } await _dataAcquirerJobStorage.SaveAsync(jobConfig.JobId, jobConfig); var batchSize = 100; var dataAcquirerInputModel = DataAcquirerInputModel.FromValues( jobConfig.JobId, jobConfig.Attributes["TopicQuery"], queryLanguage, new DataAcquirerAttributes(jobConfig.Attributes), batchSize ); var batch = _acquirer.GetPostsAsync( dataAcquirerInputModel, cancellationToken); _logger.TrackInfo("MessageTracking", "Starting"); await ProcessBatch(jobConfig, dataAcquirerInputModel, batch, translate); } catch (TaskCanceledException) { } catch (Exception e) { _runningJobsRecords.Remove(jobConfig.JobId); _logger.TrackError( "RunJob", "Job encountered an error and stopped.", new { jobId = jobConfig.JobId, exception = e }); } }
public Task StartNewJobAsync(DataAcquirerJobConfig jobConfig) { lock (_dictionaryLock) { var jobId = jobConfig.JobId; if (_isStopping) { _logger.TrackWarning( "StartNewJob", "Could not start job, because the component is stopping", new { jobId = jobId }); return(Task.CompletedTask); } if (_runningJobsRecords.ContainsKey(jobId)) { _logger.TrackWarning( "StartNewJob", "Job is with this id already running", new { jobId = jobId }); return(Task.CompletedTask); } _logger.TrackInfo( "StartNewJob", "Config recieved", new { config = jobConfig }); var cancellationTokenSource = new CancellationTokenSource(); var downloadingTask = RunJobAsync(jobConfig, cancellationTokenSource.Token). ContinueWith(async r => { try { await r; } catch (TaskCanceledException) { } _runningJobsRecords.Remove(jobId, out _); _logger.TrackInfo( "StartNewJob", "Job removed", new { jobId = jobId.ToString() }); }); var jobManagerJobRecord = new JobManagerJobRecord { JobId = jobConfig.JobId, JobTask = downloadingTask, CancellationTokenSource = cancellationTokenSource }; _runningJobsRecords.TryAdd(jobManagerJobRecord.JobId, jobManagerJobRecord); return(Task.CompletedTask); } }
private void LogProgress(DataAcquirerJobConfig jobConfig, int count) { if (count % 1000 == 0) { _logger.TrackInfo("MessageTracking", $"Downloaded: {count}", new { jobId = jobConfig.JobId }); } }
private async Task StopJob(Guid jobId) { var dataAcquirerJobConfig = new DataAcquirerJobConfig() { Command = JobCommand.Stop, JobId = jobId, }; await _interactiveConsumer.AddMessageToBeConsumed( _componentOptions.UpdateChannelName, dataAcquirerJobConfig); }
public Queue <DataAcquirerJobConfig> GetFixed() { var attributes = _customAttributes.ToDictionary(r => r.Key, r => r.Value); attributes.Add("TopicQuery", "?"); var fixedGuid = Guid.Parse("01c3ee17-c9f4-492f-ac9c-e9f6ecd1fa7e"); var config = new DataAcquirerJobConfig() { JobId = fixedGuid, Attributes = attributes, Command = JobCommand.Start, OutputMessageBrokerChannels = new string[] { "s1" } }; var queue = new Queue <DataAcquirerJobConfig>(); queue.Enqueue(config); return(queue); }
private static Guid CalculatePostId(DataAcquirerJobConfig jobConfig, DataAcquirerPost dataPost) { var bytes = new byte[16]; var textHash = dataPost.Text.GetHashCode(); var postIdHash = dataPost.OriginalPostId.GetHashCode(); var userIdHash = dataPost.UserId.GetHashCode(); var dateIdHash = dataPost.DateTime.GetHashCode(); var jobId = jobConfig.JobId.GetHashCode(); dateIdHash += jobId; BitConverter.GetBytes(textHash).CopyTo(bytes, 0); BitConverter.GetBytes(postIdHash).CopyTo(bytes, 3); BitConverter.GetBytes(userIdHash).CopyTo(bytes, 7); BitConverter.GetBytes(dateIdHash).CopyTo(bytes, 11); var postId = new Guid(bytes); return(postId); }
public Queue <DataAcquirerJobConfig> PrepareConfigQueue(IEnumerable <string> topics) { var configs = new Queue <DataAcquirerJobConfig>(); foreach (var topic in topics) { var attributes = _customAttributes.ToDictionary(r => r.Key, r => r.Value); attributes.Add("TopicQuery", topic); var config = new DataAcquirerJobConfig() { JobId = Guid.NewGuid(), Attributes = attributes, Command = JobCommand.Start, OutputMessageBrokerChannels = new string[] { "o_1" } }; configs.Enqueue(config); } return(configs); }
private async Task StartJob(Guid jobId) { var attributes = new Dictionary <string, string>() { { "TopicQuery", "matfyz" }, { "ApiKey", _twitterCredentials.ApiKey }, { "ApiSecretKey", _twitterCredentials.ApiSecretKey }, { "AccessToken", _twitterCredentials.AccessToken }, { "AccessTokenSecret", _twitterCredentials.AccessTokenSecret } }; var dataAcquirerJobConfig = new DataAcquirerJobConfig() { Attributes = attributes, Command = JobCommand.Start, OutputMessageBrokerChannels = new[] { "c1" }, JobId = jobId, }; await _interactiveConsumer.AddMessageToBeConsumed( _componentOptions.UpdateChannelName, dataAcquirerJobConfig); }
private async Task ProcessBatch( DataAcquirerJobConfig jobConfig, DataAcquirerInputModel dataAcquirerInputModel, IAsyncEnumerable <DataAcquirerPost> batch, bool translate) { int count = 0; await foreach (var dataPost in batch) { LogProgress(jobConfig, count); count++; var postId = CalculatePostId(jobConfig, dataPost); var text = ClearText(dataPost.Text); string originalText = null; if (translate && dataPost.Language != "en" && dataPost.Language != null) { try { var translatedText = await _translationService .TranslateToEnglishAsync(dataPost.Language, text); originalText = text; text = translatedText; } catch (DataAcquirerException ex) { _logger.TrackWarning("TranslationError", "Could not translate", new { jobId = dataAcquirerInputModel.JobId, exception = ex, text }); } } var uniPost = UniPostModel.FromValues( postId, dataPost.OriginalPostId, text, originalText, dataPost.Language, dataPost.Source, dataPost.UserId, dataPost.DateTime, dataAcquirerInputModel.JobId, dataPost.Query); var jsonData = JsonConvert.SerializeObject(uniPost); var messageBrokerMessage = new MessageBrokerMessage( "acquired-data-post", jsonData); await SendRecordToOutputs(jobConfig.OutputMessageBrokerChannels, messageBrokerMessage); } }