private string textAnalysis(string text)
        {
            MLContext     mlContext     = new MLContext();
            TrainTestData splitDataView = LoadData(mlContext);
            ITransformer  model         = BuildAndTrainModel(mlContext, splitDataView.TrainSet);

            Evaluate(mlContext, model, splitDataView.TestSet);

            PredictionEngine <SentimentData, SentimentPrediction> predictionFunction = mlContext.Model.CreatePredictionEngine <SentimentData, SentimentPrediction>(model);
            SentimentData sampleStatement = new SentimentData
            {
                SentimentText = text
            };

            //Console.WriteLine();
            //Console.WriteLine($"Sentiment: {resultPrediction.SentimentText} | Prediction: {(Convert.ToBoolean(resultPrediction.Prediction) ? "Positive" : "Negative")} | Probability: {resultPrediction.Probability} ");

            var resultPrediction = predictionFunction.Predict(sampleStatement);
            var curseCount       = helperMethods.profanityCheck(text);

            var punctuation = text.Where(Char.IsPunctuation).Distinct().ToArray();
            var words       = text.Split().Select(x => x.Trim(punctuation));

            float wordslength = words.Count();

            foreach (string word in words)
            {
                if (word.Length == 0)
                {
                    wordslength = wordslength - 1;
                }
            }

            float curseRatio = curseCount / wordslength;

            var result = new TextAnalysisResult
            {
                Context    = (Convert.ToBoolean(resultPrediction.Prediction) ? "Positive" : "Negative"),
                CurseCount = curseCount,
                CurseRatio = curseRatio
            };

            var returnJson = JsonConvert.SerializeObject(result);

            return(returnJson);
        }
示例#2
0
        static TextAnalysisResult AnalyzeText(string inputText, string accountkey)
        {
            TextAnalysisResult result = new TextAnalysisResult();
            using (var httpClient = new HttpClient())
            {
                // Set http client parameters to call data market and encode text
                string inputTextEncoded = HttpUtility.UrlEncode(inputText);
                httpClient.BaseAddress = new Uri("https://api.datamarket.azure.com/");
                string creds = "AccountKey:" + accountkey;
                string authorizationHeader = "Basic " + Convert.ToBase64String(Encoding.ASCII.GetBytes(creds));
                httpClient.DefaultRequestHeaders.Add("Authorization", authorizationHeader);
                httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));

                // Get key phrases
                string keyPhrasesRequest = "data.ashx/amla/text-analytics/v1/GetKeyPhrases?Text=" + inputTextEncoded;
                HttpResponseMessage response = httpClient.GetAsync(keyPhrasesRequest).Result;
                string content = response.Content.ReadAsStringAsync().Result;
                if (!response.IsSuccessStatusCode)
                {
                    throw new Exception("Call failed. HTTP status code: " + response.StatusCode + " and contents: " + content);
                }
                result = JsonConvert.DeserializeObject<TextAnalysisResult>(content);

                // Get sentiment
                string sentimentRequest = "data.ashx/amla/text-analytics/v1/GetSentiment?Text=" + inputTextEncoded;
                response = httpClient.GetAsync(sentimentRequest).Result;
                content = response.Content.ReadAsStringAsync().Result;
                if (!response.IsSuccessStatusCode)
                {
                    throw new Exception("Call failed. HTTP status code: " + response.StatusCode + " and contents: " + content);
                }
                dynamic sentimentResult = Newtonsoft.Json.Linq.JObject.Parse(content);
                result.SentimentIndex = sentimentResult.Score;
            }
            return result;
        }
示例#3
0
        public async Task <TextAnalysisResult> AnalyzeTextAsync(string text)
        {
            // See: https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/quickstarts/csharp

            var analysisResult = new TextAnalysisResult();

            if (string.IsNullOrEmpty(text))
            {
                Console.WriteLine("\t\t\tContentAnalyzer.AnalyzeTextAsync(): no text to analyze");
                return(analysisResult);
            }

            string textToAnalyze = text;

            if (text.Length > 5000)
            {
                Console.WriteLine("\t\t\tContentAnalyzer.AnalyzeTextAsync(): text longer than supported length. Trimming it...");
                textToAnalyze = text.Substring(0, 5000);
            }

            Console.WriteLine("\t\t\tContentAnalyzer.AnalyzeTextAsync(): initializing TextAnalyticsAPI");

            ITextAnalyticsAPI m_TextAnalyticsClient = new TextAnalyticsAPI
            {
                AzureRegion     = AzureRegions.Westeurope,
                SubscriptionKey = m_TextAnalyticsAPISubscriptionKey
            };

            Console.WriteLine("\t\t\tContentAnalyzer.AnalyzeTextAsync(): detecting content language");

            var batchLanguageResult = await m_TextAnalyticsClient.DetectLanguageAsync(new BatchInput(new List <Input>()
            {
                new Input("1", textToAnalyze)
            })).ConfigureAwait(false);

            if (batchLanguageResult.Errors.Count > 0)
            {
                Console.WriteLine("\t\t\tContentAnalyzer.AnalyzeTextAsync(): error while detecting language");
                foreach (var errors in batchLanguageResult.Errors)
                {
                    Console.WriteLine($"\t{errors.Message}");
                }
                return(analysisResult);
            }

            analysisResult.DetectedLanguage      = batchLanguageResult.Documents[0].DetectedLanguages[0].Name;
            analysisResult.DetectedLanguageScore = batchLanguageResult.Documents[0].DetectedLanguages[0].Score.GetValueOrDefault();

            Console.WriteLine($"\t\t\tContentAnalyzer.AnalyzeTextAsync(): detected language is '{analysisResult.DetectedLanguage}' ({(analysisResult.DetectedLanguageScore * 100):0.00}%)");

            Console.WriteLine("\t\t\tContentAnalyzer.AnalyzeTextAsync(): performing key-phrase extraction");

            var multiLanguageInput = new MultiLanguageBatchInput(new List <MultiLanguageInput>()
            {
                new MultiLanguageInput(batchLanguageResult.Documents[0].DetectedLanguages[0].Iso6391Name, "1", textToAnalyze)
            });
            var batchKeyphraseResult = await m_TextAnalyticsClient.KeyPhrasesAsync(multiLanguageInput).ConfigureAwait(false);

            if (batchKeyphraseResult.Errors.Count > 0)
            {
                Console.WriteLine("\t\t\tContentAnalyzer.AnalyzeTextAsync(): error while extracting key-phrases");
                foreach (var errors in batchKeyphraseResult.Errors)
                {
                    Console.WriteLine($"\t\t\t\t{errors.Message}");
                }
                return(analysisResult);
            }

            Console.WriteLine($"\t\t\tContentAnalyzer.AnalyzeTextAsync(): retrieved {batchKeyphraseResult.Documents[0].KeyPhrases.Count} key-phrases:");
            foreach (var keyphrase in batchKeyphraseResult.Documents[0].KeyPhrases)
            {
                analysisResult.KeyPhrases.Add(keyphrase);
                Console.WriteLine($"\t\t\t\t{keyphrase}");
            }

            Console.WriteLine("\t\t\tContentAnalyzer.AnalyzeTextAsync(): performing sentiment analysis");

            var batchSentimentResult = await m_TextAnalyticsClient.SentimentAsync(multiLanguageInput).ConfigureAwait(false);

            if (batchSentimentResult.Errors.Count > 0)
            {
                Console.WriteLine("\t\t\tContentAnalyzer.AnalyzeTextAsync(): error while detecting sentiment");
                foreach (var errors in batchSentimentResult.Errors)
                {
                    Console.WriteLine($"\t\t\t\t{errors.Message}");
                }
                return(analysisResult);
            }

            analysisResult.SentimentScore = batchSentimentResult.Documents[0].Score.GetValueOrDefault();
            analysisResult.Sentiment      = GetSentiment(analysisResult.SentimentScore);

            Console.WriteLine($"\t\t\tContentAnalyzer.AnalyzeTextAsync(): sentiment is '{analysisResult.Sentiment}' ({(analysisResult.SentimentScore * 100):0.00}%)");

            // Extend analysis by estimating reading time for content
            analysisResult.WordCount            = TextTokenizer.GetWordCount(text);
            analysisResult.ReadingTimeInMinutes = ReadingTimeEstimator.GetEstimatedReadingTime(analysisResult.WordCount, analysisResult.DetectedLanguage);

            return(analysisResult);
        }
示例#4
0
    public async ValueTask <UserTypingStatistics> GenerateUserStatisticsAsync(string userId, string language, TextGenerationType textGenerationType)
    {
        var existingStatistics = await _userTypingStatisticsStore.GetUserTypingStatisticsAsync(userId, language, textGenerationType)
                                 .ConfigureAwait(false);

        var userSessions = Enumerable.Empty <UserSession>();

        userSessions = existingStatistics != null
            ? await _userSessionRepository.FindAllForUserFromTypingResultsAsync(userId, existingStatistics.LastHandledResultUtc)
                       .ConfigureAwait(false)
            : await _userSessionRepository.FindAllForUserAsync(userId)
                       .ConfigureAwait(false);

        var      results              = new List <TextAnalysisResult>();
        var      textsTypedCount      = 0;
        DateTime lastHandledResultUtc = default;

        foreach (var userSession in userSessions)
        {
            var typingSession = await _typingSessionRepository.FindAsync(userSession.TypingSessionId)
                                .ConfigureAwait(false);

            if (typingSession == null)
            {
                throw new InvalidOperationException("Typing session is not found.");
            }

            foreach (var textTypingResult in userSession.GetTextTypingResults())
            {
                textsTypedCount++;
                if (textTypingResult.SubmittedResultsUtc > lastHandledResultUtc)
                {
                    lastHandledResultUtc = textTypingResult.SubmittedResultsUtc;
                }

                var text = typingSession.GetTypingSessionTextAtIndexOrDefault(textTypingResult.TypingSessionTextIndex);
                if (text == null)
                {
                    throw new InvalidOperationException("Text is not found in typing session.");
                }

                var textEntity = await _textRepository.FindAsync(text.TextId)
                                 .ConfigureAwait(false);

                if (textEntity == null)
                {
                    throw new InvalidOperationException("Text is not found.");
                }

                if (textEntity.Language != language)
                {
                    continue;
                }

                if (textEntity.TextGenerationType != textGenerationType)
                {
                    continue; // Generate statistics only for requested text generation type.
                }
                var textAnalysisResult = await _textTypingResultValidator.ValidateAsync(text.Value, textTypingResult)
                                         .ConfigureAwait(false);

                results.Add(textAnalysisResult);
            }
        }

        if (results.Count == 0)
        {
            if (existingStatistics != null)
            {
                // No new data yet.
                return(existingStatistics);
            }

            return(new UserTypingStatistics(0, 0, Enumerable.Empty <KeyPairAggregatedData>(), DateTime.UtcNow));
        }

        var aggregatedResult = new TextAnalysisResult(
            results.Sum(x => x.SpeedCpm) / results.Count,
            results.SelectMany(x => x.KeyPairs));

        var specificKeys   = aggregatedResult.KeyPairs.GroupBy(x => new { x.FromKey, x.ShouldBeKey });
        var aggregatedData = specificKeys.Select(x => new KeyPairAggregatedData(
                                                     x.Key.FromKey, x.Key.ShouldBeKey,
                                                     x.Where(y => y.Type == KeyPairType.Correct).Any()
                ? x.Where(y => y.Type == KeyPairType.Correct).Average(y => y.Delay)
                : 0,
                                                     x.Where(y => y.Type == KeyPairType.Correct).Any()
                ? x.Where(y => y.Type == KeyPairType.Correct).Min(y => y.Delay)
                : 0,
                                                     x.Where(y => y.Type == KeyPairType.Correct).Any()
                ? x.Where(y => y.Type == KeyPairType.Correct).Max(y => y.Delay)
                : 0,
                                                     x.Count(y => y.Type == KeyPairType.Correct),
                                                     x.Count(y => y.Type == KeyPairType.Mistake)));

        var result = MergeAndReturnNew(existingStatistics, new TypingReport(aggregatedResult, aggregatedData), textsTypedCount, lastHandledResultUtc);
        await _userTypingStatisticsStore.SaveAsync(userId, result, language, textGenerationType)
        .ConfigureAwait(false);

        return(result);
    }
示例#5
0
    public async ValueTask <TypingReport> GenerateReportForUserSessionAsync(string userSessionId, TextGenerationType textGenerationType)
    {
        var results = new List <TextAnalysisResult>();

        var userSession = await _userSessionRepository.FindAsync(userSessionId)
                          .ConfigureAwait(false);

        if (userSession == null)
        {
            throw new InvalidOperationException("Could not find user session.");
        }

        var typingSession = await _typingSessionRepository.FindAsync(userSession.TypingSessionId)
                            .ConfigureAwait(false);

        if (typingSession == null)
        {
            throw new InvalidOperationException("Typing session is not found.");
        }

        foreach (var textTypingResult in userSession.GetTextTypingResults())
        {
            var text = typingSession.GetTypingSessionTextAtIndexOrDefault(textTypingResult.TypingSessionTextIndex);
            if (text == null)
            {
                throw new InvalidOperationException("Text is not found in typing session.");
            }

            var textEntity = await _textRepository.FindAsync(text.TextId)
                             .ConfigureAwait(false);

            if (textEntity == null)
            {
                throw new InvalidOperationException("Text is not found.");
            }

            if (textGenerationType != textEntity.TextGenerationType)
            {
                continue;
            }

            var textAnalysisResult = await _textTypingResultValidator.ValidateAsync(text.Value, textTypingResult)
                                     .ConfigureAwait(false);

            results.Add(textAnalysisResult);
        }

        if (results.Count == 0)
        {
            // No data yet.
            return(new TypingReport(new TextAnalysisResult(0, Enumerable.Empty <KeyPair>()), Enumerable.Empty <KeyPairAggregatedData>()));
        }

        var aggregatedResult = new TextAnalysisResult(
            results.Sum(x => x.SpeedCpm) / results.Count,
            results.SelectMany(x => x.KeyPairs));

        var specificKeys   = aggregatedResult.KeyPairs.GroupBy(x => new { x.FromKey, x.ShouldBeKey });
        var aggregatedData = specificKeys.Select(x => new KeyPairAggregatedData(
                                                     x.Key.FromKey, x.Key.ShouldBeKey,
                                                     x.Where(y => y.Type == KeyPairType.Correct).Any()
                ? x.Where(y => y.Type == KeyPairType.Correct).Average(y => y.Delay)
                : 0,
                                                     x.Where(y => y.Type == KeyPairType.Correct).Any()
                ? x.Where(y => y.Type == KeyPairType.Correct).Min(y => y.Delay)
                : 0,
                                                     x.Where(y => y.Type == KeyPairType.Correct).Any()
                ? x.Where(y => y.Type == KeyPairType.Correct).Max(y => y.Delay)
                : 0,
                                                     x.Count(y => y.Type == KeyPairType.Correct),
                                                     x.Count(y => y.Type == KeyPairType.Mistake)));

        return(new TypingReport(aggregatedResult, aggregatedData));
    }