public async Task ProcessCommentsAsync(string videoId, Action <CommentBatchResult> onCommentBatchResult) { var totalCommentCount = int.Parse(await _restApi.GetCommentCount(videoId).ConfigureAwait(false)); var parallelWorkers = Math.Ceiling(totalCommentCount / 100.0d); parallelWorkers++; parallelWorkers = (int)Math.Min(parallelWorkers, 10); _commentThreadProvider.Init(videoId, "snippet,replies"); var tasks = new List <Task>(); int commentCount = 0; var dateTimeBeforeCounting = DateTime.Now; Console.WriteLine($"Analyzing {totalCommentCount} comments with {parallelWorkers} worker(s)"); for (int i = 0; i < parallelWorkers; i++) { var newTask = Task.Run(async() => { var commentIterator = new CommentIterator(_restApi); var commentProvider2 = new CommentProvider2(_commentThreadProvider, commentIterator); var docBatchProvider2 = new DocumentBatchProvider2( new BatchedCommentsProviderConfig(20, 10000, document => !string.IsNullOrWhiteSpace(document.Text)), commentProvider2 ); DocumentBatch docBatch = docBatchProvider2.GetNextDocumentBatch(); while (docBatch.Documents.Any()) { Interlocked.Add(ref commentCount, docBatch.Documents.Count); // ToDo: add intermediate step to detect the Text Language, for now assume it is EN docBatch.Documents.ForEach((doc) => doc.LanguageCode = "en"); var analysisResult = await _documentBatchSentimentAnalyzer.AnalyzeDocumentBatchAsync(docBatch) .ConfigureAwait(false); onCommentBatchResult.Invoke(new CommentBatchResult() { TotalCommentCount = totalCommentCount, ProcessedCommentCount = commentCount, DocumentBatchSentiment = analysisResult }); docBatch = docBatchProvider2.GetNextDocumentBatch(); } Console.WriteLine($"Task {Task.CurrentId} ended"); }); tasks.Add(newTask); } await Task.WhenAll(tasks).ConfigureAwait(false); }
private static AnalyzeSentimentRequest GetSentimentAnalysisRequest(DocumentBatch documentBatch) { var document = Google.Cloud.Language.V1.Document.FromPlainText(documentBatch.ToAnnotatedPlainText()); var analyzeSentimentRequest = new AnalyzeSentimentRequest { Document = document, EncodingType = EncodingType.Utf16 // Default in C# }; return(analyzeSentimentRequest); }
public Task <DocumentBatchSentiment> AnalyzeDocumentBatchAsync(DocumentBatch documentBatch) { try { return(_restApi.GetTextSentimentAzure(documentBatch)); } catch (Exception ex) { Console.WriteLine($"{this.GetType().FullName} threw exception: {ex}"); return(Task.FromResult(new DocumentBatchSentiment())); } }
/// <summary> /// CReturns a plain-text representation of a <see cref="DocumentBatch"/> where the text of each document is prefixed with a separator and the document ID /// </summary> /// <param name="documentBatch"></param> /// <returns></returns> public static string ToAnnotatedPlainText(this DocumentBatch documentBatch) { var stringBuilder = new StringBuilder(); foreach (var document in documentBatch.Documents) { stringBuilder.AppendLine($"{DocumentSeparator}."); stringBuilder.AppendLine($"{document.Id}."); stringBuilder.AppendLine(document.Text); } return(stringBuilder.ToString()); }
public async Task <DocumentBatchSentiment> AnalyzeDocumentBatchAsync(DocumentBatch documentBatch) { try { var resonse = await _restApi.GetTextSentimentGoogle(documentBatch).ConfigureAwait(false); return(resonse); } catch (Exception ex) { Console.WriteLine($"{this.GetType().FullName} threw exception: {ex}"); return(new DocumentBatchSentiment()); } }
public async Task <DocumentBatchSentiment> GetTextSentimentAzure(DocumentBatch documentBatch) { var requestUrl = _baseUrl + _textsentimentAzure; //Console.WriteLine(JsonConvert.SerializeObject(documentBatch, Formatting.Indented)); var response = await _httpClient.PostAsync(requestUrl, new StringContent(JsonConvert.SerializeObject(documentBatch))).ConfigureAwait(false); if (response.StatusCode != System.Net.HttpStatusCode.OK) { throw new Exception(response.ReasonPhrase); } var responseStr = await response.Content.ReadAsStringAsync().ConfigureAwait(false); return(JsonConvert.DeserializeObject <DocumentBatchSentiment>(responseStr)); }
public Task <DocumentBatchSentiment> AnalyzeDocumentBatchAsync(DocumentBatch documentBatch) { var docBatchSentiment = new DocumentBatchSentiment(); var rnd = new Random(); foreach (var doc in documentBatch.Documents) { docBatchSentiment.Documents.Add(new DocumentSentiment() { Id = doc.Id, Score = rnd.NextDouble() }); } return(Task.FromResult(docBatchSentiment)); }
public DocumentBatch GetNextDocumentBatch() { var docBatch = new DocumentBatch(); while (docBatch.Documents.Count < _providerConfig.MaxDocumentCountPerbatch && docBatch.TotalDocumentTextLenth < _providerConfig.MaxTotalDocumentTextLengthPerBatch) { var comment = _commentProvider2.GetNextComment(); if (comment == null) { break; } var doc = GetDocumentFromComment(comment); if (_providerConfig.DocumentPredicate.Invoke(doc)) { docBatch.AddDocument(doc); } } return(docBatch); }