private async Task FillHigherInformationLevel(SmallTalksPreProcessingConfiguration configuration, Analysis analysis, string parsedInput) { if (configuration.InformationLevel >= InformationLevel.NORMAL) { var parsedInputProcess = InputProcess.FromString(parsedInput); if (configuration.UnicodeNormalization) { parsedInputProcess = parsedInputProcess.RemoveAccentuation(); } analysis.MarkedInput = parsedInputProcess.Output; analysis.CleanedInput = parsedInputProcess .RemovePunctuation() .RemoveRepeteadChars() .RemovePlaceholder() .Output; analysis.CleanedInputRatio = analysis.CleanedInput.Length / (float)analysis.Input.Length; analysis.UseCleanedInput = analysis.CleanedInputRatio >= 0.5f; if (configuration.InformationLevel >= InformationLevel.FULL) { analysis.RelevantInput = InputProcess .FromString(await _stopWordsDetector.RemoveWordsAsync(analysis.CleanedInput)) .RemoveRepeteadChars() .Output; } } }
public Analysis Detect(string input) { var preProcess = new InputProcess { Input = input } .RemoveRepeteadChars() .ToLower(); Init(); var analysis = new Analysis { Input = preProcess.Output, Matches = new List <MatchData>() }; DectectorData.SmallTalksIntents = DectectorData.SmallTalksIntents.OrderBy(i => i.Priority).ToList(); var parsedInput = input; foreach (var intent in DectectorData.SmallTalksIntents) { var matches = intent.Regex.Matches(parsedInput); if (matches.Count > 0) { foreach (Match m in matches) { parsedInput = parsedInput.Replace(index: m.Index, length: m.Length, replacement: InputProcess.Placeholder); analysis.Matches.Add(new MatchData { SmallTalk = intent.Name, Value = m.Value, Index = m.Index, Lenght = m.Length, }); } } } analysis.AnalysedInput = parsedInput; analysis.CleanedInput = InputProcess.FromString(parsedInput) .RemovePlaceholder() .RemovePunctuation() .Output; analysis.RelevantInput = _stopWordsDetector.RemoveStopWords(analysis.CleanedInput); return(analysis); }