private async Task FillHigherInformationLevel(SmallTalksPreProcessingConfiguration configuration, Analysis analysis, string parsedInput) { if (configuration.InformationLevel >= InformationLevel.NORMAL) { var parsedInputProcess = InputProcess.FromString(parsedInput); if (configuration.UnicodeNormalization) { parsedInputProcess = parsedInputProcess.RemoveAccentuation(); } analysis.MarkedInput = parsedInputProcess.Output; analysis.CleanedInput = parsedInputProcess .RemovePunctuation() .RemoveRepeteadChars() .RemovePlaceholder() .Output; analysis.CleanedInputRatio = analysis.CleanedInput.Length / (float)analysis.Input.Length; analysis.UseCleanedInput = analysis.CleanedInputRatio >= 0.5f; if (configuration.InformationLevel >= InformationLevel.FULL) { analysis.RelevantInput = InputProcess .FromString(await _stopWordsDetector.RemoveWordsAsync(analysis.CleanedInput)) .RemoveRepeteadChars() .Output; } } }
private static InputProcess PreProcessInput(string input, SmallTalksPreProcessingConfiguration configuration) { var preProcess = new InputProcess { Input = input } .RemoveRepeteadChars(); if (configuration.ToLower) { preProcess = preProcess.ToLower(); } return(preProcess); }
public async Task <Analysis> DetectAsync(string input, SmallTalksPreProcessingConfiguration configuration) { var sw = Stopwatch.StartNew(); try { var analysis = await AnalyseForSmallTalksAndCurseWords(input, configuration); _logger.Information("For '{@Input}' and {@SmallTalkConfiguration} response was: {@Analysis}", input, configuration, analysis); return(analysis); } catch (Exception ex) { _logger.Error(ex, "For '{@Input}' and {@SmallTalkConfiguration} response was: {@ErrorMessage}", input, configuration, ex.Message); throw ex; } finally { sw.Stop(); _logger.Information("DetectAsync finished after {@Time} ms", sw.ElapsedMilliseconds); } }
private string ParseInputSearchingForSmallTalks(SmallTalksPreProcessingConfiguration configuration, Analysis analysis, string parsedInput) { foreach (var intent in DectectorData.SmallTalksIntents) { var matches = intent.Regex.Matches(parsedInput); if (matches.Count > 0) { foreach (Match m in matches) { parsedInput = parsedInput.Replace(index: m.Index, length: m.Length, replacement: InputProcess.Placeholder); analysis.Matches.Add(new MatchData { SmallTalk = intent.Name, Value = configuration.InformationLevel >= InformationLevel.NORMAL ? m.Value : null, Index = configuration.InformationLevel >= InformationLevel.FULL ? (int?)m.Index : null, Lenght = configuration.InformationLevel >= InformationLevel.FULL ? (int?)m.Length : null, }); } } } return(parsedInput); }
private async Task <Analysis> AnalyseForSmallTalksAndCurseWords(string input, SmallTalksPreProcessingConfiguration configuration) { InputProcess preProcess = PreProcessInput(input, configuration); await Init(); var analysis = new Analysis { Input = preProcess.Output, Matches = new List <MatchData>() }; DectectorData.SmallTalksIntents = DectectorData.SmallTalksIntents.OrderBy(i => i.Priority).ToList(); var parsedInput = input; var haveCursedWords = false; (parsedInput, haveCursedWords) = await _curseWordsDetector.ReplaceWordsAsync(parsedInput, InputProcess.Placeholder); analysis.HaveCursedWords = haveCursedWords; parsedInput = ParseInputSearchingForSmallTalks(configuration, analysis, parsedInput); await FillHigherInformationLevel(configuration, analysis, parsedInput); return(analysis); }