public Dictionary <string, List <CommentObj> > GetNamedObjects(int N) { StringBuilder sbAllWords = new StringBuilder(); foreach (children child in children) { sbAllWords.Append(child.SubtreeText); sbAllWords.Append(" "); } string[] allWords = GetAllWords(sbAllWords.ToString()); Dictionary <string, string> stemParentDictionary = GetStemParentDictionary(allWords); List <string> namedObjects = new List <string>(); children rootNode = new children(); List <HashSet <int> > rootChildIDs = new List <HashSet <int> >(); foreach (children child in children) { GetChildIDHashSetList(child); HashSet <int> currChildIDs = new HashSet <int>(); currChildIDs.Add(child.id); foreach (var item in child.ChildIDList) { currChildIDs.UnionWith(item); } rootChildIDs.Add(currChildIDs); } rootNode.ChildIDList = rootChildIDs; NodeList = new List <children>(); NodeList.Add(rootNode); foreach (children child in children) { PopulateNodeList(child); } Dictionary <string, HashSet <int> > wordIDMapping = GetWordIDMapping(); //Dictionary<string, double> WordTreeScore = new Dictionary<string, double>(); Dictionary <string, List <children> > WordLCAList = new Dictionary <string, List <children> >(); foreach (var kvp in wordIDMapping) { List <children> currLCAList = new List <children>(); int numLCAs = 0; foreach (children node in NodeList) { int numBranchesWithWord = 0; foreach (var childIDBranch in node.ChildIDList) { if (childIDBranch.Intersect(kvp.Value).Count() > 0) { numBranchesWithWord += 1; } } if ((numBranchesWithWord == 1 && node.ChildIDList.Count == 1) || numBranchesWithWord > 1) { currLCAList.Add(node); } } WordLCAList[stemParentDictionary.ContainsKey(kvp.Key) ? stemParentDictionary[kvp.Key] : kvp.Key] = currLCAList; } namedObjects = WordLCAList .OrderByDescending(x => x.Value.Count) .Select(x => x.Key) .Where(y => CommonWords.GetFrequency(y) < 1) .Where(a => char.IsUpper(a[0])) .Where(b => b.Length > 1) .Where(z => !(z.EndsWith("n't") || z.EndsWith("'m") || (z.EndsWith("'ll")) || (z.EndsWith("'d")) || z.EndsWith("'ve") || z.EndsWith("'re") || z.EndsWith("'s"))) .Take(N) .ToList(); //namedObjects.Sort(); Dictionary <string, List <CommentObj> > namedObjectDictionary = new Dictionary <string, List <CommentObj> >(); foreach (string namedObject in namedObjects) { List <CommentObj> commentObjsForWord = new List <CommentObj>(); string stem = Stemmer.GetStem(namedObject); HashSet <int> idsWithWord = wordIDMapping[stem]; foreach (int id in idsWithWord) { children child = GetNodeById(id); CommentObj commentObj = new CommentObj() { Id = id, Text = child.text }; commentObjsForWord.Add(commentObj); } namedObjectDictionary[namedObject] = commentObjsForWord; } var ordered = namedObjectDictionary.Keys.OrderByDescending(x => namedObjectDictionary[x].Count).ToList().ToDictionary(x => x, x => namedObjectDictionary[x]); return(ordered); }
public Dictionary<string, List<CommentObj>> GetNamedObjects(int N) { StringBuilder sbAllWords = new StringBuilder(); foreach (children child in children) { sbAllWords.Append(child.SubtreeText); sbAllWords.Append(" "); } string[] allWords = GetAllWords(sbAllWords.ToString()); Dictionary<string, string> stemParentDictionary = GetStemParentDictionary(allWords); List<string> namedObjects = new List<string>(); children rootNode = new children(); List<HashSet<int>> rootChildIDs = new List<HashSet<int>>(); foreach (children child in children) { GetChildIDHashSetList(child); HashSet<int> currChildIDs = new HashSet<int>(); currChildIDs.Add(child.id); foreach (var item in child.ChildIDList) { currChildIDs.UnionWith(item); } rootChildIDs.Add(currChildIDs); } rootNode.ChildIDList = rootChildIDs; NodeList = new List<children>(); NodeList.Add(rootNode); foreach (children child in children) { PopulateNodeList(child); } Dictionary<string, HashSet<int>> wordIDMapping = GetWordIDMapping(); //Dictionary<string, double> WordTreeScore = new Dictionary<string, double>(); Dictionary<string, List<children>> WordLCAList = new Dictionary<string, List<children>>(); foreach (var kvp in wordIDMapping) { List<children> currLCAList = new List<children>(); int numLCAs = 0; foreach (children node in NodeList) { int numBranchesWithWord = 0; foreach (var childIDBranch in node.ChildIDList) { if (childIDBranch.Intersect(kvp.Value).Count() > 0) { numBranchesWithWord += 1; } } if ((numBranchesWithWord == 1 && node.ChildIDList.Count == 1) || numBranchesWithWord > 1) { currLCAList.Add(node); } } WordLCAList[stemParentDictionary.ContainsKey(kvp.Key) ? stemParentDictionary[kvp.Key] : kvp.Key] = currLCAList; } namedObjects = WordLCAList .OrderByDescending(x => x.Value.Count) .Select(x => x.Key) .Where(y => CommonWords.GetFrequency(y) < 1) .Where(a=>char.IsUpper(a[0])) .Where(b => b.Length>1) .Where(z => !(z.EndsWith("n't") || z.EndsWith("'m") || (z.EndsWith("'ll")) || (z.EndsWith("'d")) || z.EndsWith("'ve") || z.EndsWith("'re") || z.EndsWith("'s"))) .Take(N) .ToList(); //namedObjects.Sort(); Dictionary<string,List<CommentObj>> namedObjectDictionary = new Dictionary<string, List<CommentObj>>(); foreach (string namedObject in namedObjects) { List<CommentObj> commentObjsForWord = new List<CommentObj>(); string stem = Stemmer.GetStem(namedObject); HashSet<int> idsWithWord = wordIDMapping[stem]; foreach (int id in idsWithWord) { children child = GetNodeById(id); CommentObj commentObj = new CommentObj(){Id = id,Text = child.text}; commentObjsForWord.Add(commentObj); } namedObjectDictionary[namedObject] = commentObjsForWord; } var ordered = namedObjectDictionary.Keys.OrderByDescending(x => namedObjectDictionary[x].Count).ToList().ToDictionary(x=>x,x=>namedObjectDictionary[x]); return ordered; }