public async Task ProcessCommentsAsync(string videoId, Action <CommentBatchResult> onCommentBatchResult)
        {
            var totalCommentCount = int.Parse(await _restApi.GetCommentCount(videoId).ConfigureAwait(false));
            var parallelWorkers   = Math.Ceiling(totalCommentCount / 100.0d);

            parallelWorkers++;

            parallelWorkers = (int)Math.Min(parallelWorkers, 10);

            _commentThreadProvider.Init(videoId, "snippet,replies");

            var tasks                  = new List <Task>();
            int commentCount           = 0;
            var dateTimeBeforeCounting = DateTime.Now;

            Console.WriteLine($"Analyzing {totalCommentCount} comments with {parallelWorkers} worker(s)");
            for (int i = 0; i < parallelWorkers; i++)
            {
                var newTask = Task.Run(async() =>
                {
                    var commentIterator  = new CommentIterator(_restApi);
                    var commentProvider2 = new CommentProvider2(_commentThreadProvider, commentIterator);

                    var docBatchProvider2 = new DocumentBatchProvider2(
                        new BatchedCommentsProviderConfig(20, 10000,
                                                          document => !string.IsNullOrWhiteSpace(document.Text)),
                        commentProvider2
                        );

                    DocumentBatch docBatch = docBatchProvider2.GetNextDocumentBatch();
                    while (docBatch.Documents.Any())
                    {
                        Interlocked.Add(ref commentCount, docBatch.Documents.Count);

                        // ToDo: add intermediate step to detect the Text Language, for now assume it is EN
                        docBatch.Documents.ForEach((doc) => doc.LanguageCode = "en");

                        var analysisResult = await _documentBatchSentimentAnalyzer.AnalyzeDocumentBatchAsync(docBatch)
                                             .ConfigureAwait(false);

                        onCommentBatchResult.Invoke(new CommentBatchResult()
                        {
                            TotalCommentCount      = totalCommentCount,
                            ProcessedCommentCount  = commentCount,
                            DocumentBatchSentiment = analysisResult
                        });

                        docBatch = docBatchProvider2.GetNextDocumentBatch();
                    }

                    Console.WriteLine($"Task {Task.CurrentId} ended");
                });
                tasks.Add(newTask);
            }

            await Task.WhenAll(tasks).ConfigureAwait(false);
        }
Exemplo n.º 2
0
        private static AnalyzeSentimentRequest GetSentimentAnalysisRequest(DocumentBatch documentBatch)
        {
            var document = Google.Cloud.Language.V1.Document.FromPlainText(documentBatch.ToAnnotatedPlainText());
            var analyzeSentimentRequest = new AnalyzeSentimentRequest
            {
                Document     = document,
                EncodingType = EncodingType.Utf16 // Default in C#
            };

            return(analyzeSentimentRequest);
        }
 public Task <DocumentBatchSentiment> AnalyzeDocumentBatchAsync(DocumentBatch documentBatch)
 {
     try
     {
         return(_restApi.GetTextSentimentAzure(documentBatch));
     }
     catch (Exception ex)
     {
         Console.WriteLine($"{this.GetType().FullName} threw exception: {ex}");
         return(Task.FromResult(new DocumentBatchSentiment()));
     }
 }
        /// <summary>
        /// CReturns a plain-text representation of a <see cref="DocumentBatch"/> where the text of each document is prefixed with a separator and the document ID
        /// </summary>
        /// <param name="documentBatch"></param>
        /// <returns></returns>
        public static string ToAnnotatedPlainText(this DocumentBatch documentBatch)
        {
            var stringBuilder = new StringBuilder();

            foreach (var document in documentBatch.Documents)
            {
                stringBuilder.AppendLine($"{DocumentSeparator}.");
                stringBuilder.AppendLine($"{document.Id}.");
                stringBuilder.AppendLine(document.Text);
            }

            return(stringBuilder.ToString());
        }
Exemplo n.º 5
0
        public async Task <DocumentBatchSentiment> AnalyzeDocumentBatchAsync(DocumentBatch documentBatch)
        {
            try
            {
                var resonse = await _restApi.GetTextSentimentGoogle(documentBatch).ConfigureAwait(false);

                return(resonse);
            }
            catch (Exception ex)
            {
                Console.WriteLine($"{this.GetType().FullName} threw exception: {ex}");
                return(new DocumentBatchSentiment());
            }
        }
Exemplo n.º 6
0
        public async Task <DocumentBatchSentiment> GetTextSentimentAzure(DocumentBatch documentBatch)
        {
            var requestUrl = _baseUrl + _textsentimentAzure;
            //Console.WriteLine(JsonConvert.SerializeObject(documentBatch, Formatting.Indented));
            var response = await _httpClient.PostAsync(requestUrl,
                                                       new StringContent(JsonConvert.SerializeObject(documentBatch))).ConfigureAwait(false);

            if (response.StatusCode != System.Net.HttpStatusCode.OK)
            {
                throw new Exception(response.ReasonPhrase);
            }

            var responseStr = await response.Content.ReadAsStringAsync().ConfigureAwait(false);

            return(JsonConvert.DeserializeObject <DocumentBatchSentiment>(responseStr));
        }
Exemplo n.º 7
0
        public Task <DocumentBatchSentiment> AnalyzeDocumentBatchAsync(DocumentBatch documentBatch)
        {
            var docBatchSentiment = new DocumentBatchSentiment();
            var rnd = new Random();

            foreach (var doc in documentBatch.Documents)
            {
                docBatchSentiment.Documents.Add(new DocumentSentiment()
                {
                    Id    = doc.Id,
                    Score = rnd.NextDouble()
                });
            }

            return(Task.FromResult(docBatchSentiment));
        }
        public DocumentBatch GetNextDocumentBatch()
        {
            var docBatch = new DocumentBatch();

            while (docBatch.Documents.Count < _providerConfig.MaxDocumentCountPerbatch &&
                   docBatch.TotalDocumentTextLenth < _providerConfig.MaxTotalDocumentTextLengthPerBatch)
            {
                var comment = _commentProvider2.GetNextComment();
                if (comment == null)
                {
                    break;
                }

                var doc = GetDocumentFromComment(comment);
                if (_providerConfig.DocumentPredicate.Invoke(doc))
                {
                    docBatch.AddDocument(doc);
                }
            }

            return(docBatch);
        }