Esempio n. 1
0
        private void AddGroupsFromData(List <ClaimableKeywordExample> data, List <string> topKeywords, double minimumMembers, List <ClaimableKeywordExample> ungroupedExamples, HashSet <KeywordGroup> currentGroups)
        {
            double averageGroupSize = 0;

            while (ungroupedExamples.Count > averageGroupSize)
            {
                int currentKeywordIndex = 0;
                while (true)
                {
                    KeywordGroup tempGroup = new KeywordGroup(topKeywords[currentKeywordIndex]);
                    tempGroup.UpdateMembers(data);
                    if (tempGroup.Count > minimumMembers && !IsGroupInContainedGroups(tempGroup) && !currentGroups.Contains(tempGroup))
                    {
                        bool wasAdded = currentGroups.Add(tempGroup);
                        //List<KeywordGroup> subGroups = tempGroup.GenerateSubGroups(CalculateLargestGroupSize(currentGroups), minimumMembers);
                        if (wasAdded)
                        {
                            break;
                        }
                        else
                        {
                            tempGroup.DeleteClaims();
                        }
                    }
                    currentKeywordIndex++;
                    if (currentKeywordIndex == topKeywords.Count)
                    {
                        break;
                    }
                }

                ungroupedExamples = GetDefaultExamples(data);
                topKeywords       = GetKKeywordsFromData(data, DEFAULT_NUMBER_OF_GROUPS);
                averageGroupSize  = CalculateAverageGroupSize(currentGroups);
                if (currentKeywordIndex == topKeywords.Count)
                {
                    break;
                }
            }
        }
        /**<summary>Generates all sub groups of the current group. A sub group is created if a quarter of the keywords contained within its examples are the same. If so,
         * then those keywords are added to its definition, and it becomes a new group. As a note, its parent group still exists as well.</summary>*/
        public List <KeywordGroup> GenerateSubGroups(int maxGroupSize, double minimumMembers)
        {
            double globalThreshold     = .25;
            HashSet <KeywordGroup> ret = new HashSet <KeywordGroup>();

            foreach (string keyword in ContainedKeywords.Keys)
            {
                if (!SelectedKeywords.Contains(keyword))
                {
                    if (ContainedKeywords[keyword] / (double)ContainedMembers.Count >= globalThreshold)
                    {
                        var keywords = SelectedKeywords.ContainedKeywords;
                        keywords.MoveNext();
                        KeywordGroup temp = new KeywordGroup(keywords.Current);
                        while (keywords.MoveNext())
                        {
                            temp.SelectedKeywords.AddKeyword(keywords.Current);
                        }
                        temp.SelectedKeywords.AddKeyword(keyword);
                        temp.UpdateMembers(ContainedMembers);
                        if (temp.ContainedMembers.Count < minimumMembers)
                        {
                            temp.DeleteClaims();
                            continue;
                        }
                        foreach (KeywordGroup tempSubGroup in temp.GenerateSubGroups(maxGroupSize, minimumMembers))
                        {
                            if (!ret.Add(tempSubGroup))
                            {
                                tempSubGroup.DeleteClaims();
                            }
                        }
                        ret.Add(temp);
                    }
                }
            }
            return(new List <KeywordGroup>(ret));
        }