예제 #1
0
            public void Should_find_all_matches_in_a_populated_list()
            {
                var items = new ArrayList
                                {
                                    "Food",
                                    "",
                                    "Bar",
                                    null,
                                    "Base"
                                };

                var matches = new List<string>(items.Where(MatchNotNullOrEmptyDelegate()));
                Assert.AreEqual(3, matches.Count, "count is wrong");
                Assert.AreEqual(items[0], matches[0], "first item");
                Assert.AreEqual(items[2], matches[1], "second item");
                Assert.AreEqual(items[4], matches[2], "third item");
            }
예제 #2
0
 public void Should_not_find_any_matches_in_an_empty_list()
 {
     var items = new ArrayList();
     Assert.IsFalse(items.Where(MatchNotNullOrEmptyDelegate()).Any(), "should not have returned any items");
 }
        private static void ExtractUniqueQueryTokensBasedOnOccurance(ArrayList<ArrayList<QueryToken>> disjuncts, 
                                                                    ArrayList<ArrayList<QueryToken>> finalQueryList,
                                                                    double twitterProfilerFrequencyThreshold, 
                                                                    ArrayList<ArrayList<QueryToken>> subsetsGrouped, 
                                                                    ArrayList<int> subsetsGroupedOccurance, 
                                                                    int maxOccurance)
        {            
            var subsetsWithMaxOccurance = subsetsGrouped
                                            .Where((x, idx) => subsetsGroupedOccurance[idx] == maxOccurance)
                                            .OrderBy(x => x.Count)
                                            .ToList();

            // remove subsets which are contained in other ones
            ArrayList<int> subsetsToRemove = new ArrayList<int>();
            for (int i = 0; i < subsetsWithMaxOccurance.Count(); ++i)
            {
                for (int j = i + 1; j < subsetsWithMaxOccurance.Count(); ++j)
                {
                    if (ContainsTokenSubset(subsetsWithMaxOccurance[i], subsetsWithMaxOccurance[j]))
                    {
                        subsetsToRemove.Add(i);
                        break;
                    }
                }                 
            }

            subsetsToRemove.Sort();
            subsetsToRemove.Reverse();

            foreach (var toRemove in subsetsToRemove.Distinct())
            {
                subsetsWithMaxOccurance.RemoveAt(toRemove);
            }
            subsetsToRemove.Clear();

            if (maxOccurance > 1) // perform twitter profiling
            {
                for (int i = 0; i < subsetsWithMaxOccurance.Count(); ++i)
                {
                    //twitter profiling
                    string searchQuery = TwitterProfiler.QueryTokensToStringConverter(subsetsWithMaxOccurance[i]);
                    double twitterProfilerFrequency = TwitterProfiler.ProfileFrequency(searchQuery);

                    if (Math.Round(twitterProfilerFrequency) == Convert.ToInt32(TwitterProfiler.InvalidReturns.Exception))
                    {
                        Console.WriteLine("\nError during twitter profiling: {0}!", TwitterProfiler.GetExceptionError());
                        System.Environment.Exit(1);
                    }

                    if (twitterProfilerFrequency < twitterProfilerFrequencyThreshold)
                    {
                        int disjunctsCountBeforeRemoval = disjuncts.Count();

                        // remove disjuncts from the original list that contain the selected subset
                        QueryConverterUtils.RemoveDisjunctsContainingSubsets(disjuncts, subsetsWithMaxOccurance[i]);

                        if (disjuncts.Count() < disjunctsCountBeforeRemoval)
                            finalQueryList.Add(subsetsWithMaxOccurance[i]);

                        if (disjuncts.Count() == 0)
                            return;
                        else
                        {
                            if (i >= (subsetsWithMaxOccurance.Count() - 1))
                            {
                                // find new possible subsets
                                QueryConverterUtils.ExtractUniqueQueryTokens(disjuncts, finalQueryList, twitterProfilerFrequencyThreshold);
                                return;
                            }
                        }
                    }
                    else
                    {
                        if (i >= (subsetsWithMaxOccurance.Count() - 1))
                        {
                            // select subsets with the first lower occurance
                            ArrayList<int> lowerOccurances = subsetsGroupedOccurance.FindAll(x => x < maxOccurance);
                            if (lowerOccurances.Count > 0)
                                maxOccurance = lowerOccurances.Max();
                            else
                                maxOccurance = 0;

                            if (maxOccurance < 1)
                                return;

                            QueryConverterUtils.ExtractUniqueQueryTokensBasedOnOccurance(disjuncts, finalQueryList, twitterProfilerFrequencyThreshold, subsetsGrouped, subsetsGroupedOccurance, maxOccurance);
                            if (disjuncts.Count() == 0)
                                return;
                        }
                    }
                }
            }
            else // if occurrence is 1 -> profiling not needed
            {
                foreach (var item in subsetsWithMaxOccurance)
                {
                    int disjunctsCountBeforeRemoval = disjuncts.Count();                    

                    // remove disjuncts from the original list that contain the selected subset
                    QueryConverterUtils.RemoveDisjunctsContainingSubsets(disjuncts, item);

                    if (disjuncts.Count() < disjunctsCountBeforeRemoval)
                        finalQueryList.Add(item);

                    if (disjuncts.Count() == 0)
                        return;
                }

                // find new possible subsets
                QueryConverterUtils.ExtractUniqueQueryTokens(disjuncts, finalQueryList, twitterProfilerFrequencyThreshold);
                return;
            }
        }