Ejemplo n.º 1
0
        public Dictionary <string, List <CommentObj> > GetNamedObjects(int N)
        {
            StringBuilder sbAllWords = new StringBuilder();

            foreach (children child in children)
            {
                sbAllWords.Append(child.SubtreeText);
                sbAllWords.Append(" ");
            }
            string[] allWords = GetAllWords(sbAllWords.ToString());
            Dictionary <string, string> stemParentDictionary = GetStemParentDictionary(allWords);
            List <string>         namedObjects = new List <string>();
            children              rootNode     = new children();
            List <HashSet <int> > rootChildIDs = new List <HashSet <int> >();

            foreach (children child in children)
            {
                GetChildIDHashSetList(child);
                HashSet <int> currChildIDs = new HashSet <int>();
                currChildIDs.Add(child.id);
                foreach (var item in child.ChildIDList)
                {
                    currChildIDs.UnionWith(item);
                }
                rootChildIDs.Add(currChildIDs);
            }
            rootNode.ChildIDList = rootChildIDs;
            NodeList             = new List <children>();
            NodeList.Add(rootNode);
            foreach (children child in children)
            {
                PopulateNodeList(child);
            }
            Dictionary <string, HashSet <int> > wordIDMapping = GetWordIDMapping();
            //Dictionary<string, double> WordTreeScore = new Dictionary<string, double>();
            Dictionary <string, List <children> > WordLCAList = new Dictionary <string, List <children> >();

            foreach (var kvp in wordIDMapping)
            {
                List <children> currLCAList = new List <children>();
                int             numLCAs     = 0;
                foreach (children node in NodeList)
                {
                    int numBranchesWithWord = 0;
                    foreach (var childIDBranch in node.ChildIDList)
                    {
                        if (childIDBranch.Intersect(kvp.Value).Count() > 0)
                        {
                            numBranchesWithWord += 1;
                        }
                    }
                    if ((numBranchesWithWord == 1 && node.ChildIDList.Count == 1) || numBranchesWithWord > 1)
                    {
                        currLCAList.Add(node);
                    }
                }
                WordLCAList[stemParentDictionary.ContainsKey(kvp.Key) ? stemParentDictionary[kvp.Key] : kvp.Key] = currLCAList;
            }
            namedObjects = WordLCAList
                           .OrderByDescending(x => x.Value.Count)
                           .Select(x => x.Key)
                           .Where(y => CommonWords.GetFrequency(y) < 1)
                           .Where(a => char.IsUpper(a[0]))
                           .Where(b => b.Length > 1)
                           .Where(z => !(z.EndsWith("n't") || z.EndsWith("'m") || (z.EndsWith("'ll")) || (z.EndsWith("'d")) || z.EndsWith("'ve") || z.EndsWith("'re") || z.EndsWith("'s")))
                           .Take(N)
                           .ToList();
            //namedObjects.Sort();
            Dictionary <string, List <CommentObj> > namedObjectDictionary = new Dictionary <string, List <CommentObj> >();

            foreach (string namedObject in namedObjects)
            {
                List <CommentObj> commentObjsForWord = new List <CommentObj>();
                string            stem        = Stemmer.GetStem(namedObject);
                HashSet <int>     idsWithWord = wordIDMapping[stem];
                foreach (int id in idsWithWord)
                {
                    children   child      = GetNodeById(id);
                    CommentObj commentObj = new CommentObj()
                    {
                        Id = id, Text = child.text
                    };
                    commentObjsForWord.Add(commentObj);
                }
                namedObjectDictionary[namedObject] = commentObjsForWord;
            }
            var ordered = namedObjectDictionary.Keys.OrderByDescending(x => namedObjectDictionary[x].Count).ToList().ToDictionary(x => x, x => namedObjectDictionary[x]);

            return(ordered);
        }
Ejemplo n.º 2
0
 public Dictionary<string, List<CommentObj>> GetNamedObjects(int N)
 {
     StringBuilder sbAllWords = new StringBuilder();
     foreach (children child in children)
     {
         sbAllWords.Append(child.SubtreeText);
         sbAllWords.Append(" ");
     }
     string[] allWords = GetAllWords(sbAllWords.ToString());
     Dictionary<string, string> stemParentDictionary = GetStemParentDictionary(allWords);
     List<string> namedObjects = new List<string>();
     children rootNode = new children();
     List<HashSet<int>> rootChildIDs = new List<HashSet<int>>();
     foreach (children child in children)
     {
         GetChildIDHashSetList(child);
         HashSet<int> currChildIDs = new HashSet<int>();
         currChildIDs.Add(child.id);
         foreach (var item in child.ChildIDList)
         {
             currChildIDs.UnionWith(item);
         }
         rootChildIDs.Add(currChildIDs);
     }
     rootNode.ChildIDList = rootChildIDs;
     NodeList = new List<children>();
     NodeList.Add(rootNode);
     foreach (children child in children)
     {
         PopulateNodeList(child);
     }
     Dictionary<string, HashSet<int>> wordIDMapping = GetWordIDMapping();
     //Dictionary<string, double> WordTreeScore = new Dictionary<string, double>();
     Dictionary<string, List<children>> WordLCAList = new Dictionary<string, List<children>>();
     foreach (var kvp in wordIDMapping)
     {
         List<children> currLCAList = new List<children>();
         int numLCAs = 0;
         foreach (children node in NodeList)
         {
             int numBranchesWithWord = 0;
             foreach (var childIDBranch in node.ChildIDList)
             {
                 if (childIDBranch.Intersect(kvp.Value).Count() > 0)
                 {
                     numBranchesWithWord += 1;
                 }
             }
             if ((numBranchesWithWord == 1 && node.ChildIDList.Count == 1) || numBranchesWithWord > 1)
             {
                 currLCAList.Add(node);
             }
         }
         WordLCAList[stemParentDictionary.ContainsKey(kvp.Key) ? stemParentDictionary[kvp.Key] : kvp.Key] = currLCAList;
     }
     namedObjects = WordLCAList
         .OrderByDescending(x => x.Value.Count)
         .Select(x => x.Key)
         .Where(y => CommonWords.GetFrequency(y) < 1)
         .Where(a=>char.IsUpper(a[0]))
         .Where(b => b.Length>1)
         .Where(z => !(z.EndsWith("n't") || z.EndsWith("'m") || (z.EndsWith("'ll")) || (z.EndsWith("'d")) || z.EndsWith("'ve") || z.EndsWith("'re") || z.EndsWith("'s")))
         .Take(N)
         .ToList();
     //namedObjects.Sort();
     Dictionary<string,List<CommentObj>> namedObjectDictionary = new Dictionary<string, List<CommentObj>>();
     foreach (string namedObject in namedObjects)
     {
         List<CommentObj> commentObjsForWord = new List<CommentObj>();
         string stem = Stemmer.GetStem(namedObject);
         HashSet<int> idsWithWord = wordIDMapping[stem];
         foreach (int id in idsWithWord)
         {
             children child = GetNodeById(id);
             CommentObj commentObj = new CommentObj(){Id = id,Text = child.text};
             commentObjsForWord.Add(commentObj);
         }
         namedObjectDictionary[namedObject] = commentObjsForWord;
     }
     var ordered = namedObjectDictionary.Keys.OrderByDescending(x => namedObjectDictionary[x].Count).ToList().ToDictionary(x=>x,x=>namedObjectDictionary[x]);
     return ordered;
 }