public object Convert(object value, Type targetType, object parameter, CultureInfo culture)
        {
            PhraseSeparator temp = (PhraseSeparator)parameter;

            this.separator = (PhraseSeparator)value;
            return((temp & this.separator) != 0);
        }
Exemple #2
0
        /// <summary>
        /// Will scan some text and look for phrases that match the given criteria
        /// </summary>
        /// <param name="text">The text you want to search</param>
        /// <param name="minCharsPerPhrase">Set the minimum number of characters for each phrase</param>
        /// <param name="maximumWordsPerPhrase">Set the maximum amount of words for each phrase</param>
        /// <returns>A list of phrases that match our criteria</returns>
        public static async Task <List <Phrase> > ScanTextAsync(string text, PhraseSeparator phraseSeparator = PhraseSeparator.All, int minCharsPerPhrase = 3, int minimumWordsPerPhrase = 1, int maximumWordsPerPhrase = 1, CancellationToken cts = default)
        {
            List <Phrase> phrasesFound = new List <Phrase>();

            if (!string.IsNullOrEmpty(text))
            {
                string[] words = text.Split(GetSeparators(phraseSeparator), StringSplitOptions.RemoveEmptyEntries);

                if (words != null && words.Count() > 0)
                {
                    phrasesFound = await ScanAsync(words, phraseSeparator, minCharsPerPhrase, minimumWordsPerPhrase, maximumWordsPerPhrase, cts);
                }
            }

            return(phrasesFound);
        }
Exemple #3
0
        /// <summary>
        /// Will scan a file and look for phrases that match the given criteria
        /// </summary>
        /// <param name="filePath">The full path to the file</param>
        /// <param name="minCharsPerPhrase">Set the minimum number of characters for each phrase</param>
        /// <param name="maximumWordsPerPhrase">Set the maximum amount of words for each phrase</param>
        /// <returns>A list of phrases that match our criteria</returns>
        public static async Task <List <Phrase> > ScanFileAsync(string filePath, PhraseSeparator phraseSeparator = PhraseSeparator.All, int minCharsPerPhrase = 3, int minimumWordsPerPhrase = 1, int maximumWordsPerPhrase = 1, CancellationToken cts = default)
        {
            List <Phrase> phrasesFound = new List <Phrase>();

            using (mmf = MemoryMappedFile.CreateFromFile(filePath))
            {
                using (Stream mappedStream = mmf.CreateViewStream())
                {
                    using (StreamReader sr = new StreamReader(mappedStream, UTF8Encoding.UTF8))
                    {
                        string text = sr.ReadToEnd();
                        phrasesFound = await ScanTextAsync(text, phraseSeparator, minCharsPerPhrase, minimumWordsPerPhrase, maximumWordsPerPhrase, cts);
                    }
                }
            }

            return(phrasesFound);
        }
Exemple #4
0
        private static string[] GetMissingSeparators(PhraseSeparator phraseSeparator)
        {
            if (phraseSeparator == PhraseSeparator.None || phraseSeparator == PhraseSeparator.All)
            {
                return(allSeparators);
            }
            List <string> separators = new List <string>();

            if (!phraseSeparator.HasFlag(PhraseSeparator.Space))
            {
                separators.Add(" ");
            }
            if (!phraseSeparator.HasFlag(PhraseSeparator.Comma))
            {
                separators.Add(",");
            }
            if (!phraseSeparator.HasFlag(PhraseSeparator.Semicolon))
            {
                separators.Add(";");
            }
            if (!phraseSeparator.HasFlag(PhraseSeparator.Semicolon))
            {
                separators.Add("·");
            }
            if (!phraseSeparator.HasFlag(PhraseSeparator.Colon))
            {
                separators.Add(":");
            }
            if (!phraseSeparator.HasFlag(PhraseSeparator.FullStop))
            {
                separators.Add(".");
            }
            if (!phraseSeparator.HasFlag(PhraseSeparator.Tab))
            {
                separators.Add("\t");
            }
            if (!phraseSeparator.HasFlag(PhraseSeparator.NewLine))
            {
                separators.Add(Environment.NewLine);
            }
            return(separators.ToArray());
        }
Exemple #5
0
        public async Task EnglishScanTextTestAsync()
        {
            string           allStates        = @"Alabama
Alaska
Arizona
Arkansas
California
Colorado
Connecticut
Delaware
Florida
Georgia
Hawaii
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
Montana
Nebraska
Nevada
New Hampshire
New Jersey
New Mexico
New York
North Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washington
West Virginia
Wisconsin
Wyoming";
            PhraseSeparator  currentSeparator = PhraseSeparator.NewLine;
            HashSet <Phrase> matched          = new HashSet <Phrase>(await Scanner.ScanTextAsync(allStates, currentSeparator, 1, 1, 3));

            Assert.IsTrue(matched.Count == 50);

            matched = new HashSet <Phrase>(await Scanner.ScanTextAsync(allStates, new int[] { 666 }, CalculationMethod.Sumerian, currentSeparator, 1, 1, 3));
            Assert.IsTrue(matched.Count == 2);
            Assert.IsTrue(matched.Contains(new Phrase("NEW MEXICO")));
            Assert.IsTrue(matched.Contains(new Phrase("NEW YORK")));

            allStates = "Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,Florida,Georgia,Hawaii,Idaho,Illinois,Indiana,Iowa,Kansas,Kentucky,Louisiana,Maine,Maryland,Massachusetts,Michigan,Minnesota,Mississippi,Missouri,Montana,Nebraska,Nevada,New Hampshire,New Jersey,New Mexico,New York,North Carolina,North Dakota,Ohio,Oklahoma,Oregon,Pennsylvania,Rhode Island,South Carolina,South Dakota,Tennessee,Texas,Utah,Vermont,Virginia,Washington,West Virginia,Wisconsin,Wyoming";

            currentSeparator = PhraseSeparator.Comma;
            matched          = new HashSet <Phrase>(await Scanner.ScanTextAsync(allStates, currentSeparator, 1, 1, 3));
            Assert.IsTrue(matched.Count == 50);

            matched = new HashSet <Phrase>(await Scanner.ScanTextAsync(allStates, new int[] { 666 }, CalculationMethod.Sumerian, currentSeparator, 1, 1, 3));
            Assert.IsTrue(matched.Count == 2);
            Assert.IsTrue(matched.Contains(new Phrase("NEW MEXICO")));
            Assert.IsTrue(matched.Contains(new Phrase("NEW YORK")));

            string str = "THE DIE HAS BEEN CAST";

            currentSeparator = PhraseSeparator.Comma;
            matched          = new HashSet <Phrase>(await Scanner.ScanTextAsync(str, new int[] { 888 }, CalculationMethod.Sumerian, currentSeparator, 1, 1, 5));
            Assert.IsTrue(matched.Count == 1);
            Assert.IsTrue(matched.Contains(new Phrase("THE DIE HAS BEEN CAST")));

            currentSeparator = PhraseSeparator.AllExceptSpace;
            matched          = new HashSet <Phrase>(await Scanner.ScanTextAsync(str, new int[] { 888 }, CalculationMethod.Sumerian, currentSeparator, 1, 1, 5));
            Assert.IsTrue(matched.Count == 1);
            Assert.IsTrue(matched.Contains(new Phrase("THE DIE HAS BEEN CAST")));

            currentSeparator = PhraseSeparator.AllExceptSpace;
            matched          = new HashSet <Phrase>(await Scanner.ScanTextAsync(str, currentSeparator, 1, 1, 5));
            Assert.IsTrue(matched.Count == 1);
            Assert.IsTrue(matched.Contains(new Phrase("THE DIE HAS BEEN CAST")));

            currentSeparator = PhraseSeparator.All;
            matched          = new HashSet <Phrase>(await Scanner.ScanTextAsync(str, currentSeparator, 1, 1, 5));
            Assert.IsTrue(matched.Count == 15);
            Assert.IsTrue(matched.Contains(new Phrase("THE DIE HAS BEEN CAST")));

            currentSeparator = PhraseSeparator.All;
            matched          = new HashSet <Phrase>(await Scanner.ScanTextAsync(str, new int[] { 888 }, CalculationMethod.Sumerian, currentSeparator, 1, 1, 5));
            Assert.IsTrue(matched.Count == 1);
            Assert.IsTrue(matched.Contains(new Phrase("THE DIE HAS BEEN CAST")));
        }
Exemple #6
0
        /// <summary>
        /// Will scan an array of strings and look for phrases that match the given criteria
        /// </summary>
        /// <param name="splittedPhrases">The text you want to search</param>
        /// <param name="minCharsPerPhrase">Set the minimum number of characters for each phrase</param>
        /// <param name="maximumWordsPerPhrase">Set the maximum amount of words for each phrase</param>
        /// <returns>A list of phrases that match our criteria</returns>
        public static async Task <List <Phrase> > ScanAsync(string[] splittedPhrases, PhraseSeparator phraseSeparator, int minCharsPerPhrase = 3, int minimumWordsPerPhrase = 1, int maximumWordsPerPhrase = 1, CancellationToken cts = default)
        {
            HashSet <Phrase> matchedPhrases = new HashSet <Phrase>();

            if (splittedPhrases != null && splittedPhrases.Count() > 0)
            {
                await Task.Run(() =>
                {
                    int totalSplitted = splittedPhrases.Count();
                    if (phraseSeparator.HasFlag(PhraseSeparator.Space))
                    {
                        for (int i = 0; i < totalSplitted; i++)
                        {
                            if (cts.IsCancellationRequested)
                            {
                                matchedPhrases.Clear();
                                break;
                            }
                            int counter         = 0;
                            bool isLimitReached = false;
                            while (!isLimitReached)
                            {
                                string currentPhrase = "";
                                for (int p = i; p <= i + counter && p < totalSplitted; p++)
                                {
                                    currentPhrase = string.Concat(currentPhrase, splittedPhrases[p], " ");
                                    if (p == totalSplitted - 1 || currentPhrase.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Count() > maximumWordsPerPhrase)
                                    {
                                        isLimitReached = true;
                                    }
                                }
                                counter++;
                                Phrase phrase        = new Phrase(currentPhrase);
                                int countPhraseWords = phrase.NormalizedText.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Count();
                                if (phrase.NormalizedText.Length >= minCharsPerPhrase && countPhraseWords >= minimumWordsPerPhrase && countPhraseWords <= maximumWordsPerPhrase)
                                {
                                    matchedPhrases.Add(phrase);
                                }
                            }
                        }
                    }
                    else
                    {
                        if (phraseSeparator != PhraseSeparator.AllExceptSpace)
                        {
                            string[] separators = GetMissingSeparators(phraseSeparator);
                            for (int i = 0; i < splittedPhrases.Count(); i++)
                            {
                                foreach (string s in separators)
                                {
                                    splittedPhrases[i] = splittedPhrases[i].Replace(s, " ");
                                }
                            }
                        }

                        for (int i = 0; i < totalSplitted; i++)
                        {
                            if (cts.IsCancellationRequested)
                            {
                                matchedPhrases.Clear();
                                break;
                            }
                            Phrase phrase  = new Phrase(splittedPhrases[i]);
                            int countWords = phrase.NormalizedText.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Count();
                            if (phrase.NormalizedText.Length >= minCharsPerPhrase && countWords >= minimumWordsPerPhrase && countWords <= maximumWordsPerPhrase)
                            {
                                matchedPhrases.Add(phrase);
                            }
                        }
                    }
                });
            }

            return(matchedPhrases.ToList());
        }
Exemple #7
0
        /// <summary>
        /// Will scan an array of strings and look for phrases that match the given criteria
        /// </summary>
        /// <param name="splittedPhrases">The text you want to search</param>
        /// <param name="values">The values you want to search for</param>
        /// <param name="calculationMethod">The Calculation Methods you want to use</param>
        /// <param name="minCharsPerPhrase">Set the minimum number of characters for each phrase</param>
        /// <param name="maximumWordsPerPhrase">Set the maximum amount of words for each phrase</param>
        /// <returns>A list of phrases that match our criteria</returns>
        public static async Task <List <Phrase> > ScanAsync(string[] splittedPhrases, PhraseSeparator phraseSeparator, int[] values, CalculationMethod calculationMethod, int minCharsPerPhrase = 3, int minimumWordsPerPhrase = 1, int maximumWordsPerPhrase = 1, CancellationToken cts = default)
        {
            HashSet <Phrase> matchedPhrases = new HashSet <Phrase>();

            if (splittedPhrases != null && values != null && splittedPhrases.Count() > 0 && values.Count() > 0)
            {
                await Task.Run(() =>
                {
                    int maxValue      = values.Max();
                    int totalSplitted = splittedPhrases.Count();
                    if (phraseSeparator.HasFlag(PhraseSeparator.Space))
                    {
                        for (int i = 0; i < totalSplitted; i++)
                        {
                            if (cts.IsCancellationRequested)
                            {
                                matchedPhrases.Clear();
                                break;
                            }
                            int counter         = 0;
                            bool isMaxPassed    = false;
                            bool isLimitReached = false;
                            while (!isMaxPassed && !isLimitReached)
                            {
                                string currentPhrase = "";
                                for (int p = i; p <= i + counter && p < totalSplitted; p++)
                                {
                                    currentPhrase = string.Concat(currentPhrase, splittedPhrases[p], " ");
                                    if (p == totalSplitted - 1 || currentPhrase.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Count() > maximumWordsPerPhrase)
                                    {
                                        isLimitReached = true;
                                    }
                                }
                                Phrase phrase        = new Phrase(currentPhrase);
                                int countPhraseWords = phrase.NormalizedText.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Count();
                                if (phrase.NormalizedText.Length >= minCharsPerPhrase && countPhraseWords >= minimumWordsPerPhrase && countPhraseWords <= maximumWordsPerPhrase)
                                {
                                    if (phrase.ContainsAnyValue(values, calculationMethod, out int containedValue))
                                    {
                                        matchedPhrases.Add(phrase);
                                    }
                                    //Here we check to see whether the values of a phrase are larger than the maximum value we are looking for.
                                    //If they are, then there is no need to continue adding more words to this phrase.
                                    foreach (CalculationMethod c in Enum.GetValues(typeof(CalculationMethod)))
                                    {
                                        if ((calculationMethod.HasFlag(c) && c != CalculationMethod.None && c != CalculationMethod.All) && (!(phrase.Values[c] < maxValue && phrase.Values[c] != 0)))
                                        {
                                            isMaxPassed = true;
                                        }
                                    }
                                }
                                counter++;
                            }
                        }
                    }
                    else
                    {
                        if (phraseSeparator != PhraseSeparator.AllExceptSpace)
                        {
                            string[] separators = GetMissingSeparators(phraseSeparator);
                            for (int i = 0; i < splittedPhrases.Count(); i++)
                            {
                                foreach (string s in separators)
                                {
                                    splittedPhrases[i] = splittedPhrases[i].Replace(s, " ");
                                }
                            }
                        }

                        for (int i = 0; i < totalSplitted; i++)
                        {
                            if (cts.IsCancellationRequested)
                            {
                                matchedPhrases.Clear();
                                break;
                            }
                            Phrase phrase  = new Phrase(splittedPhrases[i]);
                            int countWords = phrase.NormalizedText.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).Count();
                            if (phrase.NormalizedText.Length >= minCharsPerPhrase && countWords >= minimumWordsPerPhrase && countWords <= maximumWordsPerPhrase)
                            {
                                if (phrase.ContainsAnyValue(values, calculationMethod, out int containedValue))
                                {
                                    matchedPhrases.Add(phrase);
                                }
                            }
                        }
                    }
                });
            }

            return(matchedPhrases.ToList());
        }
 public object ConvertBack(object value, Type targetType, object parameter, CultureInfo culture)
 {
     this.separator ^= (PhraseSeparator)parameter;
     return(this.separator);
 }