Exemplo n.º 1
0
        /// <summary>
        /// Simplifies a file name for easier processing.
        /// </summary>
        /// <param name="fileName">File name to simplify</param>
        /// <param name="removeYear">Whether to remove year during simplification</param>
        /// <param name="removeWhitespace">Whether to remove extra whitespace</param>
        /// <returns>Simplified file name</returns>
        public static string SimplifyFileName(string fileName, bool removeYear, bool removeWhitespace, bool removeCountry)
        {
            OptionalSimplifyRemoves options = removeYear ? OptionalSimplifyRemoves.Year : OptionalSimplifyRemoves.None;

            if (removeCountry)
            {
                options |= OptionalSimplifyRemoves.Country;
            }
            return(BuildSimplifyResults(fileName, false, false, options, true, false, removeWhitespace, false).SimplifiedString);
        }
Exemplo n.º 2
0
 public static string SimplifyFileName(string fileName, OptionalSimplifyRemoves options)
 {
     return(BuildSimplifyResults(fileName, false, false, options, false, false, true, true).SimplifiedString);
 }
Exemplo n.º 3
0
        /// <summary>
        /// Simplifies an input string with various options.
        /// </summary>
        /// <param name="input">String to be simplified</param>
        /// <param name="removeFirst">Whether to always remove first word during simplification</param>
        /// <param name="removeLast">Whether to always remove last word during simplification</param>
        /// <param name="options">Selection of optional remove words</param>
        /// <param name="disableRemAfter">Disable removing of words that follow defined words to remove</param>
        /// <param name="wordSplitEn">Enables splitting words in the string using dictionary (e.g. "howimetyourmother" split to "how i met your mother"</param>
        /// <param name="removeWhitespace">Whether to remove extra whitespace</param>
        /// <returns>Simplified string results</returns>
        public static SimplifyStringResults BuildSimplifyResults(string input, bool removeFirst, bool removeLast, OptionalSimplifyRemoves options, bool disableRemAfter, bool wordSplitEn, bool removeWhitespace, bool removeBrackContents)
        {
            // All lowercase
            string simplifiedName = input.ToLower().Replace("&", "and");

            // Initialize string modifications
            ContentSearchMod mods = ContentSearchMod.None;

            // Remove contents inside any brackets
            if (removeBrackContents)
            {
                simplifiedName = Regex.Replace(simplifiedName, @"\([^\)]*\)", " ");
                mods          |= ContentSearchMod.BrackRemoval;
            }

            // Remove unneeded characters: ',!,?,(,),:
            simplifiedName = Regex.Replace(simplifiedName, @"[']+", "");
            simplifiedName = Regex.Replace(simplifiedName, @"[!\?\u0028\u0029\:\]\[]+", " ");

            // Replace seperators with spaces
            if (removeWhitespace)
            {
                simplifiedName = Regex.Replace(simplifiedName, @"\W+|_", " ");
            }

            // Initialize removed words dictionary
            Dictionary <FileWordType, List <string> > removeFileWords = new Dictionary <FileWordType, List <string> >();

            // Process each optional remove word
            for (int j = 0; j < OptionalRemoveWords.Length; j++)
            {
                if (((int)options & (int)Math.Pow(2, j)) > 0)
                {
                    bool removed;
                    simplifiedName = RemoveWord(disableRemAfter, simplifiedName, removeFileWords, OptionalRemoveWords[j], out removed);
                    if (removed)
                    {
                        if ((OptionalSimplifyRemoves)j == OptionalSimplifyRemoves.Year || (OptionalSimplifyRemoves)j == OptionalSimplifyRemoves.YearAndFollowing)
                        {
                            mods |= ContentSearchMod.YearRemoved;
                        }
                        else
                        {
                            mods |= ContentSearchMod.WordsRemoved;
                        }
                    }
                }
            }

            // Process always remove words
            foreach (RemoveFileWord remWord in AlwaysRemoveWords)
            {
                simplifiedName = RemoveWord(disableRemAfter, simplifiedName, removeFileWords, remWord);
            }

            // Remove first word
            if (removeFirst)
            {
                Match firstWordMatch = Regex.Match(simplifiedName, @"^\W*\w+");
                if (firstWordMatch.Success)
                {
                    simplifiedName = simplifiedName.Remove(firstWordMatch.Index, firstWordMatch.Length);
                }
                mods |= ContentSearchMod.WordsRemoved;
            }

            // Remove Last word
            if (removeLast)
            {
                Match lastWordMatch = Regex.Match(simplifiedName, @"(\w+\W*)$");
                if (lastWordMatch.Success)
                {
                    simplifiedName = simplifiedName.Remove(lastWordMatch.Index, lastWordMatch.Length);
                }
                mods |= ContentSearchMod.WordsRemoved;
            }

            //// Don't allow removal of both first and last words
            //else if (removeFirst && removeLast)
            //    return null;

            // Word splitting
            if (wordSplitEn)
            {
                // Seperate input by whitespace
                string[] words = simplifiedName.Split(' ');

                // Build new string with words split up
                bool split = false;
                simplifiedName = string.Empty;
                foreach (string word in words)
                {
                    string newWord;
                    if (WordHelper.TrySplitWords(word, out newWord))
                    {
                        split = true;
                    }
                    simplifiedName += newWord + " ";
                }

                if (split)
                {
                    mods |= ContentSearchMod.WordSlit;
                }
            }

            // Trim
            simplifiedName = simplifiedName.Trim().Replace("  ", " ");

            return(new SimplifyStringResults(simplifiedName, removeFileWords, mods));
        }
Exemplo n.º 4
0
        /// <summary>
        /// Creates of list of simplified strings from an input string (multiple results created from enabling various optional word removals).
        /// </summary>
        /// <param name="input">String to be simplified</param>
        /// <returns>List of simplified string results</returns>
        public static List <SimplifyStringResults> SimplifyString(string input)
        {
            // Create list of simplified strings
            List <SimplifyStringResults> simpliedStrings = new List <SimplifyStringResults>();

            // Set number of optional combinations for simplifying string with
            int optionCombinations = (int)Math.Pow(2, OptionalRemoveWords.Length + 2);

            // Loop twice: with and without word splitting
            for (int i = 0; i < 2; i++)
            {
                // Go through all combinations of optional removes
                for (int j = 0; j < optionCombinations; j++)
                {
                    // With and without breack content removal
                    for (int k = 0; k < 2; k++)
                    {
                        // Build options
                        OptionalSimplifyRemoves options = (OptionalSimplifyRemoves)(j >> 2);

                        // Don't do both year removes
                        if ((options & OptionalSimplifyRemoves.Year) > 0 && (options & OptionalSimplifyRemoves.YearAndFollowing) > 0)
                        {
                            continue;
                        }

                        // Get results
                        bool removeFirst = (j & 1) > 0;
                        bool removeLast  = (j & 2) > 0;
                        SimplifyStringResults simpleRes = BuildSimplifyResults(input, removeFirst, removeLast, options, false, i == 1, true, k == 1);

                        // Don't allow result that is only the year
                        if (Regex.IsMatch(simpleRes.SimplifiedString, @"^(19|20)\d{2}$") && !simpleRes.RemovedWords.ContainsKey(FileWordType.Year))
                        {
                            continue;
                        }

                        // Don't let common single words through
                        if (!simpleRes.SimplifiedString.Contains(' ') && simpleRes.SimplifiedString.Length < 3 && WordHelper.IsWord(simpleRes.SimplifiedString))
                        {
                            continue;
                        }

                        // Add to list of simplified strings
                        bool exists = false;
                        foreach (SimplifyStringResults simplifyRes in simpliedStrings)
                        {
                            if (simplifyRes.SimplifiedString == simpleRes.SimplifiedString)
                            {
                                exists = true;
                                break;
                            }
                        }

                        // Check that simplification doesn't already exist!
                        if (!exists && !string.IsNullOrEmpty(simpleRes.SimplifiedString))
                        {
                            simpliedStrings.Add(simpleRes);
                        }
                    }
                }
            }

            return(simpliedStrings);
        }
Exemplo n.º 5
0
 public static string SimplifyFileName(string fileName, OptionalSimplifyRemoves options)
 {
     return BuildSimplifyResults(fileName, false, false, options, false, false, true, true).SimplifiedString;
 }
Exemplo n.º 6
0
        /// <summary>
        /// Simplifies an input string with various options.
        /// </summary>
        /// <param name="input">String to be simplified</param>
        /// <param name="removeFirst">Whether to always remove first word during simplification</param>
        /// <param name="removeLast">Whether to always remove last word during simplification</param>
        /// <param name="options">Selection of optional remove words</param>
        /// <param name="disableRemAfter">Disable removing of words that follow defined words to remove</param>
        /// <param name="wordSplitEn">Enables splitting words in the string using dictionary (e.g. "howimetyourmother" split to "how i met your mother"</param>
        /// <param name="removeWhitespace">Whether to remove extra whitespace</param>
        /// <returns>Simplified string results</returns>
        public static SimplifyStringResults BuildSimplifyResults(string input, bool removeFirst, bool removeLast, OptionalSimplifyRemoves options, bool disableRemAfter, bool wordSplitEn, bool removeWhitespace, bool removeBrackContents)
        {
            // All lowercase
            string simplifiedName = input.ToLower().Replace("&", "and");

            // Initialize string modifications
            ContentSearchMod mods = ContentSearchMod.None;

            // Remove contents inside any brackets
            if (removeBrackContents)
            {
                simplifiedName = Regex.Replace(simplifiedName, @"\([^\)]*\)", " ");
                mods |= ContentSearchMod.BrackRemoval;
            }

            // Remove unneeded characters: ',!,?,(,),:
            simplifiedName = Regex.Replace(simplifiedName, @"[']+", "");
            simplifiedName = Regex.Replace(simplifiedName, @"[!\?\u0028\u0029\:\]\[]+", " ");

            // Replace seperators with spaces
            if (removeWhitespace)
                simplifiedName = Regex.Replace(simplifiedName, @"\W+|_", " ");

            // Initialize removed words dictionary
            Dictionary<FileWordType, List<string>> removeFileWords = new Dictionary<FileWordType, List<string>>();

            // Process each optional remove word
            for (int j = 0; j < OptionalRemoveWords.Length; j++)
                if (((int)options & (int)Math.Pow(2, j)) > 0)
                {
                    bool removed;
                    simplifiedName = RemoveWord(disableRemAfter, simplifiedName, removeFileWords, OptionalRemoveWords[j], out removed);
                    if (removed)
                    {
                        if ((OptionalSimplifyRemoves)j == OptionalSimplifyRemoves.Year || (OptionalSimplifyRemoves)j == OptionalSimplifyRemoves.YearAndFollowing)
                            mods |= ContentSearchMod.YearRemoved;
                        else
                            mods |= ContentSearchMod.WordsRemoved;
                    }
                }

            // Process always remove words
            foreach (RemoveFileWord remWord in AlwaysRemoveWords)
                simplifiedName = RemoveWord(disableRemAfter, simplifiedName, removeFileWords, remWord);

            // Remove first word
            if (removeFirst)
            {
                Match firstWordMatch = Regex.Match(simplifiedName, @"^\W*\w+");
                if (firstWordMatch.Success)
                    simplifiedName = simplifiedName.Remove(firstWordMatch.Index, firstWordMatch.Length);
                mods |= ContentSearchMod.WordsRemoved;
            }

            // Remove Last word
            if (removeLast)
            {
                Match lastWordMatch = Regex.Match(simplifiedName, @"(\w+\W*)$");
                if (lastWordMatch.Success)
                    simplifiedName = simplifiedName.Remove(lastWordMatch.Index, lastWordMatch.Length);
                mods |= ContentSearchMod.WordsRemoved;
            }

            //// Don't allow removal of both first and last words
            //else if (removeFirst && removeLast)
            //    return null;

            // Word splitting
            if (wordSplitEn)
            {
                // Seperate input by whitespace
                string[] words = simplifiedName.Split(' ');

                // Build new string with words split up
                bool split = false;
                simplifiedName = string.Empty;
                foreach (string word in words)
                {
                    string newWord;
                    if (WordHelper.TrySplitWords(word, out newWord))
                        split = true;
                    simplifiedName += newWord + " ";
                }

                if (split)
                    mods |= ContentSearchMod.WordSlit;
            }

            // Trim
            simplifiedName = simplifiedName.Trim().Replace("  ", " ");

            return new SimplifyStringResults(simplifiedName, removeFileWords, mods);
        }