public static HashSet ExpandTagsNGrams(TagLookup allTags, List <string> tagsToExpand, NGrams nGrams, bool printLoggingMessages = false)
        {
            // Query: /Google.*Search/, we can build a query of ANDs and ORs that gives the trigrams that must be present in any text matching the regular expression.
            // In this case, the query is
            //      Goo AND oog AND ogl AND gle AND Sea AND ear AND arc AND rch
            // '*php* -> php
            // '*.net* -> .ne AND net
            // '*.net' -> .ne AND net // how do we distinguish this from '*.net*'
            // *hibernate* -> hib AND ibe AND ber AND ern AND rna AND nat AND ate

            var expandedTags = new HashSet();
            // TODO is there a better way of doing this, as we are creating a tempoary list, just for indexing the dictionary!!!
            var allTagsList = allTags.Keys.ToList();
            var results     = new NGramResults();

            foreach (var tagPattern in tagsToExpand)
            {
                if (IsWildCard(tagPattern) == false)
                {
                    //not a wildcard, leave it as is
                    if (allTags.ContainsKey(tagPattern))
                    {
                        expandedTags.Add(tagPattern);
                    }
                    continue;
                }

                results.ActualWildcards++;
                var searches = CreateSearches(tagPattern);
                var tagAdded = CollectPossibleNGramMatches(allTagsList, nGrams, searches, tagPattern, expandedTags, results);
            }

            if (printLoggingMessages)
            {
                Logger.Log("{0:N0} wildcards, {1:N0} searches processed, {2:N0} tag Ids collected, \n" +
                           "{3:N0} possible matches, {4:N0} IsActualMatch checks, {5:N0} tags added, {6:N0} false positives",
                           results.ActualWildcards, results.SearchesProcessed, results.TagIdsCollected, results.PossibleMatches,
                           results.ActualMatchChecks, results.TagsAdded, results.FalsePositives);
            }

            return(expandedTags);
        }
        private static bool CollectPossibleNGramMatches(List <string> allTagsList, NGrams nGrams, IEnumerable <string> searches,
                                                        string tagPattern, HashSet expandedTags, NGramResults results)
        {
            HashSet <int> expandedTagIds = null;

            // Sanity check, in case there is a tag in the exclusion list that is no longer a real tag
            // Also start with the search that has the least matches/hits, makes the Hash set intersections slightly faster
            foreach (var search in searches.Where(s => nGrams.ContainsKey(s)).OrderBy(s => nGrams[s].Count))
            {
                results.SearchesProcessed++;
                var tagLocations = nGrams[search];
                results.TagIdsCollected += tagLocations.Count;
                if (expandedTagIds == null)
                {
                    expandedTagIds = new HashSet <int>(tagLocations);
                }
                else
                {
                    expandedTagIds.IntersectWith(tagLocations);
                    // This seems to be slower (probably the time taken to build the extra HashSet(..)
                    // even though it should be able to use a fast-path intersect as it's intersecting 2 HashSets!!
                    // expandedTagIds.IntersectWith(new HashSet<int>(tagLocations));
                }
            }

            if (expandedTagIds == null)
            {
                Logger.Log("TagPattern={0} (Searches: {1}), produces NO Tag Ids to test", tagPattern, String.Join(", ", searches));
                return(false);
            }

            // N-Grams can give false +ve, so we have to sanity check each match!
            // For example TagPattern: *php*, Searches: ph, hp, Tag: phonegap-pushplugin,
            bool tagWasAdded = false;

            results.PossibleMatches += (expandedTagIds != null ? expandedTagIds.Count : 1);
            var rawTagPattern = tagPattern.Replace("*", "");

            foreach (var tagMatch in expandedTagIds.Select(expandedTagId => allTagsList[expandedTagId]))
            {
                results.ActualMatchChecks++;
                if (IsActualMatch(tagMatch, tagPattern, rawTagPattern))
                {
                    expandedTags.Add(tagMatch);
                    results.TagsAdded++;
                    tagWasAdded = true;
                }
                else
                {
                    results.FalsePositives++;
                }
            }

            return(tagWasAdded);
        }