/// <summary> /// Returns valid combinations of <see cref="CharacterDistribution" />s that fits the filter provided. /// </summary> /// <param name="filteredDistributions"> /// A list of all possible <see cref="CharacterDistribution" />s; should be already /// sanitized and filtered. /// </param> /// <param name="distributionsHashTable">Unfiltered hash table of all possible <see cref="CharacterDistribution" />s.</param> /// <param name="firstWordDistribution">The initial word to continue the search with.</param> /// <param name="maxPhraseLength">The maximum possible length of phrases.</param> /// <param name="filter">The filter to be applied to child searches.</param> /// <returns>An array of <see cref="CharacterDistribution" />s representing a phrase.</returns> // ReSharper disable once TooManyArguments private static IEnumerable <CharacterDistribution[]> GetDistributionCombinations( CharacterDistribution[] filteredDistributions, GenericHashTable <CharacterDistribution> distributionsHashTable, CharacterDistribution firstWordDistribution, int maxPhraseLength, CharacterDistribution filter ) { // Create an array with the maximum possible size, so we don't need to resize it later var words = new CharacterDistribution[maxPhraseLength]; words[0] = firstWordDistribution; // If filter is already empty, then this word is enough if (filter.IsEmpty()) { yield return(words); yield break; } // If maximum possible length of phrases is one, then this is the only word we had to check for if (maxPhraseLength == 1 || !filter.IsValid()) { yield break; } // If only one word is missing, try finding it based on the filter directly if (maxPhraseLength == 2) { if (distributionsHashTable.Contains(filter)) { words[1] = filter; yield return(words); } yield break; } // Otherwise, go through all sub combinations var subCombinations = GetDistributionSubCombinations( filteredDistributions.Where(filter.CanContain).ToArray(), distributionsHashTable, words, 1, maxPhraseLength, filter ); // And yield if anything matched foreach (var combination in subCombinations) { yield return(combination); } }
public static Options FromArguments(string[] arguments) { if (arguments.Length < 1 || !int.TryParse(arguments[0], out var maxNumberOfWords) || maxNumberOfWords <= 0) { throw new ArgumentException("Invalid maximum number of words."); } if (arguments.Length < 2 || !int.TryParse(arguments[1], out var numberOfTasks) || numberOfTasks <= 0) { throw new ArgumentException("Invalid number of tasks."); } if (arguments.Length < 3 || string.IsNullOrWhiteSpace(arguments[2]) || !File.Exists(arguments[2].Trim())) { throw new ArgumentException("Missing dictionary file."); } var wordDictionaryPath = Path.GetFullPath(arguments[2].Trim()); if (arguments.Length < 4 || string.IsNullOrWhiteSpace(arguments[3]) || !arguments[3].Trim().All(c => c >= 'a' || c <= 'z' || c == ' ')) { throw new ArgumentException("Invalid anagram string."); } var anagramFilter = CharacterDistribution.FromString(new string( arguments[3] .Trim() .Where(c => c >= 'a' && c <= 'z') .ToArray() )); var silence = arguments.LastOrDefault()?.Equals("Silence", StringComparison.InvariantCultureIgnoreCase) == true; var hashStrings = (silence ? arguments.Skip(4).Take(arguments.Length - 5) : arguments.Skip(4)) .Select(s => s.ToLower().Trim()) .ToArray(); if (arguments.Length < 5 || hashStrings.Any(s => s.Length != 32 || s.Any(c => c < '0' || c > 'f'))) { throw new ArgumentException("Invalid or missing MD5 hash."); } return(new Options( maxNumberOfWords, numberOfTasks, wordDictionaryPath, anagramFilter, hashStrings.ToDictionary(GetMD5Bytes, hash => hash), silence || Console.IsOutputRedirected )); }
// ReSharper disable once TooManyDependencies private Options( int maximumNumberOfWords, int numberOfTasks, string wordDictionaryPath, CharacterDistribution anagramFilter, Dictionary <uint[], string> hashes, bool silence) { MaximumNumberOfWords = maximumNumberOfWords; NumberOfTasks = numberOfTasks; WordDictionaryPath = wordDictionaryPath; AnagramFilter = anagramFilter; Hashes = hashes; Silence = silence; }
/// <summary> /// Tries to find matching phrases to meet a provided <see cref="anagramFilter" />. /// </summary> /// <param name="sanitizedAnagramWordPairs">A list of all possible words.</param> /// <param name="anagramFilter">A <see cref="CharacterDistribution" /> containing characters to filter with.</param> /// <param name="maximumNumberOfWords">Maximum number of words in a phrase.</param> /// <param name="numberOfTasks">Number of tasks to spawn and search with.</param> /// <returns>An array of <see cref="T:byte[]" /> representing a phrase.</returns> // ReSharper disable once TooManyArguments // ReSharper disable once ExcessiveIndentation // ReSharper disable once TooManyDeclarations public static ParallelQuery <byte[]> GetMatchedPhrases( this Dictionary <CharacterDistribution, byte[][]> sanitizedAnagramWordPairs, CharacterDistribution anagramFilter, int maximumNumberOfWords, int numberOfTasks ) { // Create a second array containing only character distributions var filteredDistributions = sanitizedAnagramWordPairs.Keys.Where(anagramFilter.CanContain).ToArray(); var distributionsHashTable = new GenericHashTable <CharacterDistribution>(filteredDistributions, null); // Initialize characters combinations query with PLINQ var anagramCombinations = filteredDistributions .AsParallel() .WithDegreeOfParallelism(numberOfTasks) .SelectMany(distribution => GetDistributionCombinations( filteredDistributions, distributionsHashTable, distribution, maximumNumberOfWords, anagramFilter - distribution ) ); // Continue the characters combination query by extracting every // possible word combination associated with a character distribution var wordCombinations = anagramCombinations.SelectMany(anagramCombination => anagramCombination .Where(distribution => !distribution.IsEmpty()) .Select(distribution => sanitizedAnagramWordPairs[distribution]) .GetCartesianProduct() ); // Create binary representations of phrases from the word combinations return(wordCombinations.Select(wordCombination => wordCombination .Aggregate( new byte[0], (before, item) => before.Concat( before.Length == 0 ? item : WordSeparatorBytes.Concat(item) ).ToArray() ) )); }
/// <summary> /// Returns valid sub combinations of <see cref="CharacterDistribution" />s that fits the filter provided. /// The difference between this method and /// <see /// cref="GetDistributionCombinations" /> /// is that this method wont check to see if the currently passed <see cref="CharacterDistribution" /> /// meets the passed filter. /// </summary> /// <param name="filteredDistributions"> /// A list of all possible <see cref="CharacterDistribution" />s. Should be already /// filtered. /// </param> /// <param name="distributionsHashTable">Unfiltered hash table of all possible <see cref="CharacterDistribution" />s.</param> /// <param name="phraseDistributions">An array of previous <see cref="CharacterDistribution" />s.</param> /// <param name="currentPhraseLength"> /// The number of <see cref="CharacterDistribution" />s in the /// <see cref="phraseDistributions" /> argument. /// </param> /// <param name="maxPhraseLength">The maximum possible length of phrases.</param> /// <param name="filter">The filter to be applied to child searches.</param> /// <returns>An array of <see cref="CharacterDistribution" />s representing a phrase.</returns> // ReSharper disable once TooManyArguments private static IEnumerable <CharacterDistribution[]> GetDistributionSubCombinations( CharacterDistribution[] filteredDistributions, GenericHashTable <CharacterDistribution> distributionsHashTable, CharacterDistribution[] phraseDistributions, int currentPhraseLength, int maxPhraseLength, CharacterDistribution filter ) { // Trying to find the next word foreach (var word in filteredDistributions) { var newWords = new CharacterDistribution[maxPhraseLength]; Array.Copy(phraseDistributions, newWords, currentPhraseLength); newWords[currentPhraseLength] = word; // Try to proactively decide if this word is going to be enough if (word.Rank == filter.Rank) { yield return(newWords); continue; } // This was the last word, no need to go deeper if (currentPhraseLength == maxPhraseLength - 1) { continue; } // Calculate the required characters after this one var newFilter = filter - word; // If we still missing some characters; but so little that // it doesn't make sense to continue; ignore this word if (!newFilter.IsValid()) { continue; } // If only one word is missing, try finding it based on the filter directly if (currentPhraseLength == maxPhraseLength - 2) { if (distributionsHashTable.Contains(newFilter)) { newWords[currentPhraseLength + 1] = newFilter; yield return(newWords); } continue; } var subCombinations = GetDistributionSubCombinations( filteredDistributions.Where(newFilter.CanContain).ToArray(), distributionsHashTable, newWords, currentPhraseLength + 1, maxPhraseLength, newFilter ); foreach (var combination in subCombinations) { yield return(combination); } } }