private static string[] GetMissingSeparators(PhraseSeparator phraseSeparator) { if (phraseSeparator == PhraseSeparator.None || phraseSeparator == PhraseSeparator.All) { return(allSeparators); } List <string> separators = new List <string>(); if (!phraseSeparator.HasFlag(PhraseSeparator.Space)) { separators.Add(" "); } if (!phraseSeparator.HasFlag(PhraseSeparator.Comma)) { separators.Add(","); } if (!phraseSeparator.HasFlag(PhraseSeparator.Semicolon)) { separators.Add(";"); } if (!phraseSeparator.HasFlag(PhraseSeparator.Semicolon)) { separators.Add("·"); } if (!phraseSeparator.HasFlag(PhraseSeparator.Colon)) { separators.Add(":"); } if (!phraseSeparator.HasFlag(PhraseSeparator.FullStop)) { separators.Add("."); } if (!phraseSeparator.HasFlag(PhraseSeparator.Tab)) { separators.Add("\t"); } if (!phraseSeparator.HasFlag(PhraseSeparator.NewLine)) { separators.Add(Environment.NewLine); } return(separators.ToArray()); }
/// <summary> /// Will scan an array of strings and look for phrases that match the given criteria /// </summary> /// <param name="splittedPhrases">The text you want to search</param> /// <param name="minCharsPerPhrase">Set the minimum number of characters for each phrase</param> /// <param name="maximumWordsPerPhrase">Set the maximum amount of words for each phrase</param> /// <returns>A list of phrases that match our criteria</returns> public static async Task <List <Phrase> > ScanAsync(string[] splittedPhrases, PhraseSeparator phraseSeparator, int minCharsPerPhrase = 3, int minimumWordsPerPhrase = 1, int maximumWordsPerPhrase = 1, CancellationToken cts = default) { HashSet <Phrase> matchedPhrases = new HashSet <Phrase>(); if (splittedPhrases != null && splittedPhrases.Count() > 0) { await Task.Run(() => { int totalSplitted = splittedPhrases.Count(); if (phraseSeparator.HasFlag(PhraseSeparator.Space)) { for (int i = 0; i < totalSplitted; i++) { if (cts.IsCancellationRequested) { matchedPhrases.Clear(); break; } int counter = 0; bool isLimitReached = false; while (!isLimitReached) { string currentPhrase = ""; for (int p = i; p <= i + counter && p < totalSplitted; p++) { currentPhrase = string.Concat(currentPhrase, splittedPhrases[p], " "); if (p == totalSplitted - 1 || currentPhrase.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Count() > maximumWordsPerPhrase) { isLimitReached = true; } } counter++; Phrase phrase = new Phrase(currentPhrase); int countPhraseWords = phrase.NormalizedText.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Count(); if (phrase.NormalizedText.Length >= minCharsPerPhrase && countPhraseWords >= minimumWordsPerPhrase && countPhraseWords <= maximumWordsPerPhrase) { matchedPhrases.Add(phrase); } } } } else { if (phraseSeparator != PhraseSeparator.AllExceptSpace) { string[] separators = GetMissingSeparators(phraseSeparator); for (int i = 0; i < splittedPhrases.Count(); i++) { foreach (string s in separators) { splittedPhrases[i] = splittedPhrases[i].Replace(s, " "); } } } for (int i = 0; i < totalSplitted; i++) { if (cts.IsCancellationRequested) { matchedPhrases.Clear(); break; } Phrase phrase = new Phrase(splittedPhrases[i]); int countWords = phrase.NormalizedText.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Count(); if (phrase.NormalizedText.Length >= minCharsPerPhrase && countWords >= minimumWordsPerPhrase && countWords <= maximumWordsPerPhrase) { matchedPhrases.Add(phrase); } } } }); } return(matchedPhrases.ToList()); }
/// <summary> /// Will scan an array of strings and look for phrases that match the given criteria /// </summary> /// <param name="splittedPhrases">The text you want to search</param> /// <param name="values">The values you want to search for</param> /// <param name="calculationMethod">The Calculation Methods you want to use</param> /// <param name="minCharsPerPhrase">Set the minimum number of characters for each phrase</param> /// <param name="maximumWordsPerPhrase">Set the maximum amount of words for each phrase</param> /// <returns>A list of phrases that match our criteria</returns> public static async Task <List <Phrase> > ScanAsync(string[] splittedPhrases, PhraseSeparator phraseSeparator, int[] values, CalculationMethod calculationMethod, int minCharsPerPhrase = 3, int minimumWordsPerPhrase = 1, int maximumWordsPerPhrase = 1, CancellationToken cts = default) { HashSet <Phrase> matchedPhrases = new HashSet <Phrase>(); if (splittedPhrases != null && values != null && splittedPhrases.Count() > 0 && values.Count() > 0) { await Task.Run(() => { int maxValue = values.Max(); int totalSplitted = splittedPhrases.Count(); if (phraseSeparator.HasFlag(PhraseSeparator.Space)) { for (int i = 0; i < totalSplitted; i++) { if (cts.IsCancellationRequested) { matchedPhrases.Clear(); break; } int counter = 0; bool isMaxPassed = false; bool isLimitReached = false; while (!isMaxPassed && !isLimitReached) { string currentPhrase = ""; for (int p = i; p <= i + counter && p < totalSplitted; p++) { currentPhrase = string.Concat(currentPhrase, splittedPhrases[p], " "); if (p == totalSplitted - 1 || currentPhrase.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Count() > maximumWordsPerPhrase) { isLimitReached = true; } } Phrase phrase = new Phrase(currentPhrase); int countPhraseWords = phrase.NormalizedText.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Count(); if (phrase.NormalizedText.Length >= minCharsPerPhrase && countPhraseWords >= minimumWordsPerPhrase && countPhraseWords <= maximumWordsPerPhrase) { if (phrase.ContainsAnyValue(values, calculationMethod, out int containedValue)) { matchedPhrases.Add(phrase); } //Here we check to see whether the values of a phrase are larger than the maximum value we are looking for. //If they are, then there is no need to continue adding more words to this phrase. foreach (CalculationMethod c in Enum.GetValues(typeof(CalculationMethod))) { if ((calculationMethod.HasFlag(c) && c != CalculationMethod.None && c != CalculationMethod.All) && (!(phrase.Values[c] < maxValue && phrase.Values[c] != 0))) { isMaxPassed = true; } } } counter++; } } } else { if (phraseSeparator != PhraseSeparator.AllExceptSpace) { string[] separators = GetMissingSeparators(phraseSeparator); for (int i = 0; i < splittedPhrases.Count(); i++) { foreach (string s in separators) { splittedPhrases[i] = splittedPhrases[i].Replace(s, " "); } } } for (int i = 0; i < totalSplitted; i++) { if (cts.IsCancellationRequested) { matchedPhrases.Clear(); break; } Phrase phrase = new Phrase(splittedPhrases[i]); int countWords = phrase.NormalizedText.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Count(); if (phrase.NormalizedText.Length >= minCharsPerPhrase && countWords >= minimumWordsPerPhrase && countWords <= maximumWordsPerPhrase) { if (phrase.ContainsAnyValue(values, calculationMethod, out int containedValue)) { matchedPhrases.Add(phrase); } } } } }); } return(matchedPhrases.ToList()); }