private void AddGroupsFromData(List <ClaimableKeywordExample> data, List <string> topKeywords, double minimumMembers, List <ClaimableKeywordExample> ungroupedExamples, HashSet <KeywordGroup> currentGroups) { double averageGroupSize = 0; while (ungroupedExamples.Count > averageGroupSize) { int currentKeywordIndex = 0; while (true) { KeywordGroup tempGroup = new KeywordGroup(topKeywords[currentKeywordIndex]); tempGroup.UpdateMembers(data); if (tempGroup.Count > minimumMembers && !IsGroupInContainedGroups(tempGroup) && !currentGroups.Contains(tempGroup)) { bool wasAdded = currentGroups.Add(tempGroup); //List<KeywordGroup> subGroups = tempGroup.GenerateSubGroups(CalculateLargestGroupSize(currentGroups), minimumMembers); if (wasAdded) { break; } else { tempGroup.DeleteClaims(); } } currentKeywordIndex++; if (currentKeywordIndex == topKeywords.Count) { break; } } ungroupedExamples = GetDefaultExamples(data); topKeywords = GetKKeywordsFromData(data, DEFAULT_NUMBER_OF_GROUPS); averageGroupSize = CalculateAverageGroupSize(currentGroups); if (currentKeywordIndex == topKeywords.Count) { break; } } }
/**<summary>Generates all sub groups of the current group. A sub group is created if a quarter of the keywords contained within its examples are the same. If so, * then those keywords are added to its definition, and it becomes a new group. As a note, its parent group still exists as well.</summary>*/ public List <KeywordGroup> GenerateSubGroups(int maxGroupSize, double minimumMembers) { double globalThreshold = .25; HashSet <KeywordGroup> ret = new HashSet <KeywordGroup>(); foreach (string keyword in ContainedKeywords.Keys) { if (!SelectedKeywords.Contains(keyword)) { if (ContainedKeywords[keyword] / (double)ContainedMembers.Count >= globalThreshold) { var keywords = SelectedKeywords.ContainedKeywords; keywords.MoveNext(); KeywordGroup temp = new KeywordGroup(keywords.Current); while (keywords.MoveNext()) { temp.SelectedKeywords.AddKeyword(keywords.Current); } temp.SelectedKeywords.AddKeyword(keyword); temp.UpdateMembers(ContainedMembers); if (temp.ContainedMembers.Count < minimumMembers) { temp.DeleteClaims(); continue; } foreach (KeywordGroup tempSubGroup in temp.GenerateSubGroups(maxGroupSize, minimumMembers)) { if (!ret.Add(tempSubGroup)) { tempSubGroup.DeleteClaims(); } } ret.Add(temp); } } } return(new List <KeywordGroup>(ret)); }