private static void ExtractUniqueQueryTokensBasedOnOccurance(ArrayList<ArrayList<QueryToken>> disjuncts, ArrayList<ArrayList<QueryToken>> finalQueryList, double twitterProfilerFrequencyThreshold, ArrayList<ArrayList<QueryToken>> subsetsGrouped, ArrayList<int> subsetsGroupedOccurance, int maxOccurance) { var subsetsWithMaxOccurance = subsetsGrouped .Where((x, idx) => subsetsGroupedOccurance[idx] == maxOccurance) .OrderBy(x => x.Count) .ToList(); // remove subsets which are contained in other ones ArrayList<int> subsetsToRemove = new ArrayList<int>(); for (int i = 0; i < subsetsWithMaxOccurance.Count(); ++i) { for (int j = i + 1; j < subsetsWithMaxOccurance.Count(); ++j) { if (ContainsTokenSubset(subsetsWithMaxOccurance[i], subsetsWithMaxOccurance[j])) { subsetsToRemove.Add(i); break; } } } subsetsToRemove.Sort(); subsetsToRemove.Reverse(); foreach (var toRemove in subsetsToRemove.Distinct()) { subsetsWithMaxOccurance.RemoveAt(toRemove); } subsetsToRemove.Clear(); if (maxOccurance > 1) // perform twitter profiling { for (int i = 0; i < subsetsWithMaxOccurance.Count(); ++i) { //twitter profiling string searchQuery = TwitterProfiler.QueryTokensToStringConverter(subsetsWithMaxOccurance[i]); double twitterProfilerFrequency = TwitterProfiler.ProfileFrequency(searchQuery); if (Math.Round(twitterProfilerFrequency) == Convert.ToInt32(TwitterProfiler.InvalidReturns.Exception)) { Console.WriteLine("\nError during twitter profiling: {0}!", TwitterProfiler.GetExceptionError()); System.Environment.Exit(1); } if (twitterProfilerFrequency < twitterProfilerFrequencyThreshold) { int disjunctsCountBeforeRemoval = disjuncts.Count(); // remove disjuncts from the original list that contain the selected subset QueryConverterUtils.RemoveDisjunctsContainingSubsets(disjuncts, subsetsWithMaxOccurance[i]); if (disjuncts.Count() < disjunctsCountBeforeRemoval) finalQueryList.Add(subsetsWithMaxOccurance[i]); if (disjuncts.Count() == 0) return; else { if (i >= (subsetsWithMaxOccurance.Count() - 1)) { // find new possible subsets QueryConverterUtils.ExtractUniqueQueryTokens(disjuncts, finalQueryList, twitterProfilerFrequencyThreshold); return; } } } else { if (i >= (subsetsWithMaxOccurance.Count() - 1)) { // select subsets with the first lower occurance ArrayList<int> lowerOccurances = subsetsGroupedOccurance.FindAll(x => x < maxOccurance); if (lowerOccurances.Count > 0) maxOccurance = lowerOccurances.Max(); else maxOccurance = 0; if (maxOccurance < 1) return; QueryConverterUtils.ExtractUniqueQueryTokensBasedOnOccurance(disjuncts, finalQueryList, twitterProfilerFrequencyThreshold, subsetsGrouped, subsetsGroupedOccurance, maxOccurance); if (disjuncts.Count() == 0) return; } } } } else // if occurrence is 1 -> profiling not needed { foreach (var item in subsetsWithMaxOccurance) { int disjunctsCountBeforeRemoval = disjuncts.Count(); // remove disjuncts from the original list that contain the selected subset QueryConverterUtils.RemoveDisjunctsContainingSubsets(disjuncts, item); if (disjuncts.Count() < disjunctsCountBeforeRemoval) finalQueryList.Add(item); if (disjuncts.Count() == 0) return; } // find new possible subsets QueryConverterUtils.ExtractUniqueQueryTokens(disjuncts, finalQueryList, twitterProfilerFrequencyThreshold); return; } }