コード例 #1
0
        public static void ExtractUniqueQueryTokens(ArrayList<ArrayList<QueryToken>> disjuncts, ArrayList<ArrayList<QueryToken>> finalQueryList, 
                                                    double twitterProfilerFrequencyThreshold)
        {
            //extract all term subsets
            ArrayList<ArrayList<QueryToken>> subsetsCombined = new ArrayList<ArrayList<QueryToken>>();
            foreach (var disjunct in disjuncts)
                subsetsCombined.AddRange(QueryConverterUtils.GetSubsets(disjunct));
            
            //group same subsets and count occurance
            var subsetsCombinedGrouped =
                from s in subsetsCombined
                group s by TwitterProfiler.QueryTokensToStringConverter(s) into gs
                select new {subsetKey = gs.Key, 
                            subsetOccurance = gs.Count(), 
                            subset = gs.First()};

            ArrayList<int> subsetsGroupedOccurance = new ArrayList<int>();
            ArrayList<ArrayList<QueryToken>> subsetsGrouped = new ArrayList<ArrayList<QueryToken>>();
            foreach (var item in subsetsCombinedGrouped)
            {
                subsetsGroupedOccurance.Add(item.subsetOccurance);
                subsetsGrouped.Add(item.subset);
            }
   
            int maxOccurance = subsetsGroupedOccurance.Max();
            
            // select subsets with max occurance and extract 
            QueryConverterUtils.ExtractUniqueQueryTokensBasedOnOccurance(disjuncts, finalQueryList, twitterProfilerFrequencyThreshold, subsetsGrouped, subsetsGroupedOccurance, maxOccurance);
        }