Splits an identifier on non-alphabetic characters and easy camelcase transitions (lowercase to uppercase).
Inheritance: IdSplitter
Ejemplo n.º 1
0
        public static void ClassSetup()
        {
            //construct the necessary srcML wrapper unit tags
            XmlNamespaceManager xnm = ABB.SrcML.SrcML.NamespaceManager;
            StringBuilder namespaceDecls = new StringBuilder();
            foreach (string prefix in xnm)
            {
                if (prefix != string.Empty && !prefix.StartsWith("xml", StringComparison.InvariantCultureIgnoreCase))
                {
                    if (prefix.Equals("src", StringComparison.InvariantCultureIgnoreCase))
                    {
                        namespaceDecls.AppendFormat("xmlns=\"{0}\" ", xnm.LookupNamespace(prefix));
                    }
                    else
                    {
                        namespaceDecls.AppendFormat("xmlns:{0}=\"{1}\" ", prefix, xnm.LookupNamespace(prefix));
                    }
                }
            }
            srcMLFormat = string.Format("<unit {0}>{{0}}</unit>", namespaceDecls.ToString());

            //initialize swum stuff
            splitter = new ConservativeIdSplitter();
            tagger = new UnigramTagger();
            posData = new PCKimmoPartOfSpeechData();
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Reads the necessary data files and initializes the member variables.
        /// </summary>
        /// <param name="programWordCount">A dictionary containing the word counts from the local program.</param>
        private void Initialize(Dictionary<string, int> programWordCount)
        {
            this.CamelSplitter = new ConservativeIdSplitter();

            //set ProgramWordCount and calculate log of total
            this.ProgramWordCount = programWordCount;
            ulong ProgramTotalWordCount = 0;
            foreach (int value in this.ProgramWordCount.Values)
            {
                ProgramTotalWordCount = ProgramTotalWordCount + (ulong)value;
            }
            this.LogProgramTotalWordCount = Math.Log10(ProgramTotalWordCount);
            
            //load globalWordCount from default location
            var rawGlobalWordCount = LibFileLoader.ReadWordCount(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.GlobalWordCountFile"), false, IncludeIdentifier);
            this.GlobalWordCount = new Dictionary<string, double>();
            //add weighting to word counts
            foreach (var kvp in rawGlobalWordCount)
            {
                this.GlobalWordCount[kvp.Key] = kvp.Value * Math.Pow((double)kvp.Key.Length - 1, 1.5);
            }

            //read prefix and suffix lists from default locations
            //TODO: the words must be in lowercase. Should we lowercase them on loading, or just assume/require that they're in lowercase in the file?
            this.Prefixes = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.Prefixesfile"));
            this.Suffixes = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.Suffixesfile"));
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Reads the necessary data files and initializes the member variables.
        /// </summary>
        /// <param name="programWordCount">A dictionary containing the word counts from the local program.</param>
        private void Initialize(Dictionary <string, int> programWordCount)
        {
            this.CamelSplitter = new ConservativeIdSplitter();

            //set ProgramWordCount and calculate log of total
            this.ProgramWordCount = programWordCount;
            ulong ProgramTotalWordCount = 0;

            foreach (int value in this.ProgramWordCount.Values)
            {
                ProgramTotalWordCount = ProgramTotalWordCount + (ulong)value;
            }
            this.LogProgramTotalWordCount = Math.Log10(ProgramTotalWordCount);

            //load globalWordCount from default location
            var rawGlobalWordCount = LibFileLoader.ReadWordCount(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.GlobalWordCountFile"), false, IncludeIdentifier);

            this.GlobalWordCount = new Dictionary <string, double>();
            //add weighting to word counts
            foreach (var kvp in rawGlobalWordCount)
            {
                this.GlobalWordCount[kvp.Key] = kvp.Value * Math.Pow((double)kvp.Key.Length - 1, 1.5);
            }

            //read prefix and suffix lists from default locations
            //TODO: the words must be in lowercase. Should we lowercase them on loading, or just assume/require that they're in lowercase in the file?
            this.Prefixes = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.Prefixesfile"));
            this.Suffixes = LibFileLoader.ReadWordList(SwumConfiguration.GetFileSetting("SamuraiIdSplitter.Suffixesfile"));
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Counts the number of occurrences of words within the identifiers in the given srcml files.
        /// </summary>
        /// <param name="archive">An archive containing the srcml files to analyze.</param>
        /// <returns>A dictionary mapping words to the number of occurrences within identifiers.</returns>
        public static Dictionary <string, int> CountProgramWords(ISrcMLArchive archive)
        {
            if (archive == null)
            {
                throw new ArgumentNullException("archive");
            }

            var splitter     = new ConservativeIdSplitter();
            var observations = new Dictionary <string, int>();

            foreach (var fileUnit in archive.FileUnits)
            {
                //query for all the identifiers
                var identifiers = from id in fileUnit.Descendants(SRC.Name)
                                  where !id.Elements().Any()
                                  select id.Value;

                foreach (var id in identifiers)
                {
                    string[] words = splitter.Split(id);
                    foreach (string word in words)
                    {
                        int    obs;
                        string lowWord = word.ToLower();
                        observations.TryGetValue(lowWord, out obs); //gets the number of observations for the word. If it is new, obs is set to 0
                        observations[lowWord] = obs + 1;
                    }
                }
            }

            return(observations);
        }
Ejemplo n.º 5
0
 public void initalizeSwum()
 {
     //initialize swum stuff
     splitter = new ConservativeIdSplitter();
     tagger = new UnigramTagger();
     posData = new PCKimmoPartOfSpeechData();
 }
Ejemplo n.º 6
0
        public void GenerateEndingSUnit()
        {
            /*var dataProject = new DataProject<CompleteWorkingSet>("npp_6.2.3",
                Path.GetFullPath("..//..//..//projects//npp_6.2.3"),
                "..//..//..//SrcML");*/

            var dataProject = new DataProject<CompleteWorkingSet>(folderName,
                Path.GetFullPath(fullFilePath),
                "..//..//..//SrcML");

            dataProject.UpdateAsync().Wait();

            //get srcml stuff in order
            NamespaceDefinition globalNamespace;
            Assert.That(dataProject.WorkingSet.TryObtainReadLock(5000, out globalNamespace));

            //initialize swum stuff
            splitter = new ConservativeIdSplitter();
            tagger = new UnigramTagger();
            posData = new PCKimmoPartOfSpeechData();

            //find an example method, uses global methodName variable
            var testMethod = globalNamespace.GetDescendants<MethodDefinition>().Where(m => m.Name == methodName).First();
            var testMethodXElement = DataHelpers.GetElement(dataProject.SourceArchive, testMethod.PrimaryLocation);

            //generate swum for method declaration
            MethodContext mc = ContextBuilder.BuildMethodContext(testMethodXElement);
            MethodDeclarationNode mdn = new MethodDeclarationNode(methodName, mc);

            //Console.WriteLine(mdn.ToString()); //returns nothing since it hasn't been written

            var exp = testMethod.GetDescendants();
            //var verb = mdn.Action.ToString();

            var expResult = exp.ElementAt(exp.Count() - 1);

            Console.WriteLine(expResult);
            MethodDeclarationNode expMDN = null;
            if (expResult is ReturnStatement)
            {
                Console.WriteLine("return");
            }
            else
            {
                var mCall = expResult.FindExpressions<MethodCall>().First();

                expMDN = new MethodDeclarationNode(mCall.Name, mc);
            }
            //MethodDeclarationNode mdn2 = new MethodDeclarationNode(expResult.ToString(), mc);

            //BaseVerbRule rule = new BaseVerbRule(posData, tagger, splitter);
            //Console.WriteLine("InClass = " + rule.InClass(mdn)); //REQUIRED in order for the ConstructSwum method to work
            //rule.ConstructSwum(mdn); //rewrites mdn.ToString to a SWUM breakdown
            //Console.WriteLine(mdn.Action.ToString());

            BaseVerbRule rule2 = new BaseVerbRule(posData, tagger, splitter);
            Console.WriteLine("InClass = " + rule2.InClass(expMDN)); //REQUIRED in order for the ConstructSwum method to work
            rule2.ConstructSwum(expMDN); //rewrites mdn.ToString to a SWUM breakdown
            Console.WriteLine(expMDN.Action.ToString());
            //Console.WriteLine(mdn.Action.ToString());
        }
Ejemplo n.º 7
0
        public void GenerateVoidReturnSUnit()
        {
            var dataProject = new DataProject<CompleteWorkingSet>(folderName,
                Path.GetFullPath(fullFilePath),
                "..//..//..//SrcML");

            dataProject.UpdateAsync().Wait();

            //get srcml stuff in order
            NamespaceDefinition globalNamespace;
            Assert.That(dataProject.WorkingSet.TryObtainReadLock(5000, out globalNamespace));

            //initialize swum stuff
            splitter = new ConservativeIdSplitter();
            tagger = new UnigramTagger();
            posData = new PCKimmoPartOfSpeechData();

            //find an example method
            var guiMethod = globalNamespace.GetDescendants<MethodDefinition>().Where(m => m.Name == methodName).First();
            var guiMethodXElement = DataHelpers.GetElement(dataProject.SourceArchive, guiMethod.PrimaryLocation);

            // forget that, find ALL the methods
            var methods = globalNamespace.GetDescendants<MethodDefinition>().Where(m => m.Name == methodName);

            foreach (MethodDefinition method in methods)
            {
                //Console.WriteLine(method.ToString());
                var statements = method.ChildStatements;
                foreach (Statement statement in statements)
                {
                    var expressions = statement.GetExpressions();
                    foreach (Expression expression in expressions)
                    {
                        // Skip any expression that contains an assignment
                       if (expression.ToString().Contains(" =") || expression.ToString().Contains(" ->")) { continue; }

                        // Print whatever's left. It should be a void return.
                        Console.WriteLine(expression.ToString());

                        // *** PoS tag it ***
                        // convert the string to 'PhraseNode' objects so we can feed them to SWUM
                        var pn = PhraseNode.Parse(new WordNode( expression.ToString() ).ToString() );

                        Console.WriteLine(pn.ToString());

                        // construct the "rule" to break up method names into sentences
                        BaseVerbRule thisrule = new BaseVerbRule(posData, tagger, splitter);

                        var methodNode = new MethodDeclarationNode(expression.ToString());

                        thisrule.ConstructSwum(methodNode);

                        Console.WriteLine(methodNode.ToString());
                    }
                }
            }
        }
Ejemplo n.º 8
0
        public void GenerateSimpleSwum()
        {
            var dataProject = new DataProject<CompleteWorkingSet>("npp_6.2.3",
                Path.GetFullPath("..//..//..//projects//npp_6.2.3"),
                "..//..//..//SrcML");

            dataProject.UpdateAsync().Wait();

            //get srcml stuff in order
            NamespaceDefinition globalNamespace;
            Assert.That(dataProject.WorkingSet.TryObtainReadLock(5000, out globalNamespace));

            //initialize swum stuff
            splitter = new ConservativeIdSplitter();
            tagger = new UnigramTagger();
            posData = new PCKimmoPartOfSpeechData();

            //find an example method
            var guiMethod = globalNamespace.GetDescendants<MethodDefinition>().Where(m => m.Name == "saveGUIParams").First();
            var guiMethodXElement = DataHelpers.GetElement(dataProject.SourceArchive, guiMethod.PrimaryLocation);

            //generate swum for method declaration
            MethodContext mc = ContextBuilder.BuildMethodContext(guiMethodXElement);
            MethodDeclarationNode mdn = new MethodDeclarationNode("saveGUIParams", mc);
            BaseVerbRule rule = new BaseVerbRule(posData, tagger, splitter);
            rule.ConstructSwum(mdn);
            Console.WriteLine(mdn.ToString());
        }
Ejemplo n.º 9
0
        public void GenerateSameActionSUnit()
        {
            //   var dataProject = new DataProject<CompleteWorkingSet>("npp_6.2.3",
            //     Path.GetFullPath("..//..//..//projects//npp_6.2.3"),
            //   "..//..//..//SrcML");

            //var dataProject = new DataProject<CompleteWorkingSet>("CodeAnalysisToolkit",
            //    Path.GetFullPath("..//..//..//samples"),
            //    "..//..//..//SrcML");

            var dataProject = new DataProject<CompleteWorkingSet>(folderName,
                Path.GetFullPath(fullFilePath),
                "..//..//..//SrcML");

            List<String> success = new List<String>();
            Dictionary<SwumRule, bool> inClasses = null;

            string methodName = arg;
            bool debug = false; ///////////////// DEBUGGING

            // Get SrcML stuff in order
            dataProject.UpdateAsync().Wait();
            NamespaceDefinition globalNamespace;
            Assert.That(dataProject.WorkingSet.TryObtainReadLock(1000, out globalNamespace));

            // Initialize Swum
            splitter = new ConservativeIdSplitter();
            tagger = new UnigramTagger();
            posData = new PCKimmoPartOfSpeechData();

            var methodList = globalNamespace.GetDescendants<MethodDefinition>().Where(m => m.Name == methodName);
            MethodDefinition topMethod = null;

            // Check if the method was found
            try
            {
                topMethod = methodList.First();
            }
            catch (System.InvalidOperationException)
            {
                Console.WriteLine("--ERROR: Method '" + methodName + "' Not Found--");
                Assert.Fail("Method '" + methodName + "' Not Found");
            }

            var guiMethodXElement = DataHelpers.GetElement(dataProject.SourceArchive, topMethod.PrimaryLocation);

            //generate swum for method declaration
            MethodContext mc = ContextBuilder.BuildMethodContext(guiMethodXElement);
            MethodDeclarationNode mdn = new MethodDeclarationNode(methodName, mc);
            BaseVerbRule rule = new BaseVerbRule(posData, tagger, splitter);
            Console.WriteLine("Method = \t" + methodName);
            Console.WriteLine("InClass = \t" + rule.InClass(mdn));

            // Get the action verb from the SWUM
            String methodVerb = GetMethodVerb(methodName, mc);
            Console.WriteLine("Verb = \t\t" + methodVerb);
            Console.WriteLine("============================");

            // Get all of the lines of code that contains the verb in any form
            var expr = topMethod.GetDescendants().Where(t => t.ToString().Contains(methodVerb)).ToArray();

            // Iterate each line
            foreach (Statement t in expr)
            {
                if (debug) Console.WriteLine("Line: " + t.ToString());
                // This finds any method calls in this line and finds the verb in that method - from GetCallsTo()
                var methods = t.FindExpressions<MethodCall>(true).Where(c => c.ToString().ToLower().Contains(methodVerb));//c.Name.Contains(methodVerb));

                // Iterate through a list of the method calls in a line and find ones that contain the verb
                foreach (MethodCall i in methods)
                {

                    if (debug) Console.WriteLine("===\n" + i.Name);

                    MethodDeclarationNode mdnNEW = new MethodDeclarationNode(i.Name, mc);

                    inClasses = InClassChecker(i.Name, mc);

                    String foundVerb = GetMethodVerb(i.Name, mc);
                    if (foundVerb.Equals("!NONE!"))
                    {
                        if (debug) Console.WriteLine("   Method does not contain verb");
                    }
                    else
                    {
                        if (debug) Console.WriteLine("GetMethodVerb= " + GetMethodVerb(i.Name, mc));

                        success.Add(i.Name);
                    }

                    if (debug) Console.WriteLine("CompareSwums= " + CompareSwums(i.Name, mc));

                    // Debugging
                    if (debug)
                    {
                        int numbtrue = 0;
                        foreach (KeyValuePair<SwumRule, bool> entry in inClasses)
                        {
                            if (entry.Value)
                            {
                                numbtrue++;
                                mdnNEW = new MethodDeclarationNode(i.Name, mc);
                                entry.Key.InClass(mdnNEW);
                                entry.Key.ConstructSwum(mdnNEW);

                                Console.WriteLine("\t" + entry.Key.GetType().ToString() + new String(' ', 30 - entry.Key.GetType().ToString().Length) + " = " + mdnNEW.ToString());

                                if (mdnNEW.Action.ToPlainString().Equals(methodVerb, StringComparison.InvariantCultureIgnoreCase))
                                {
                                    Console.WriteLine("\tMethod contains the verb");
                                }
                                else
                                {
                                    Console.WriteLine("\tMethod does not contain the verb" + "\n");
                                }

                            }

                        }
                        Console.WriteLine(" Inclasses  = " + numbtrue);

                    } //end if debug

                } // End Method Iteration
            } // End Line Iteration

            if (success.Count == 0)
            {
                Console.WriteLine("===== No Same-Action Methods Found =====");
                //Assert.Fail("No Same-Action Methods found");
            }
            else
            {
                Console.WriteLine("\n============= SUCCESSES ===============");
                foreach (String i in success)
                {
                    Console.WriteLine(i);
                }
                //Assert.Pass("Same-Action methods found, check Output");

            }

            dataProject.WorkingSet.ReleaseReadLock();
        }
Ejemplo n.º 10
0
    private static FieldRule SetupFieldRule()
    {
        var splitter = new ConservativeIdSplitter();
        var tagger = new UnigramTagger();
        var posData = new PCKimmoPartOfSpeechData();

        return new FieldRule(posData, tagger, splitter);
    }
Ejemplo n.º 11
0
        /// <summary>
        /// Counts the number of occurrences of words within the identifiers in the given srcml files.
        /// </summary>
        /// <param name="archive">An archive containing the srcml files to analyze.</param>
        /// <returns>A dictionary mapping words to the number of occurrences within identifiers.</returns>
        public static Dictionary<string,int> CountProgramWords(ISrcMLArchive archive)
        {
            if(archive == null) {
                throw new ArgumentNullException("archive");
            }
            
            var splitter = new ConservativeIdSplitter();
            var observations = new Dictionary<string, int>();
            foreach (var fileUnit in archive.FileUnits)
            {
                //query for all the identifiers
                var identifiers = from id in fileUnit.Descendants(SRC.Name)
                                  where !id.Elements().Any()
                                  select id.Value;

                foreach (var id in identifiers)
                {
                    string[] words = splitter.Split(id);
                    foreach (string word in words)
                    {
                        int obs;
                        string lowWord = word.ToLower();
                        observations.TryGetValue(lowWord, out obs); //gets the number of observations for the word. If it is new, obs is set to 0
                        observations[lowWord] = obs + 1;
                    }
                }
            }

            return observations;
        }
Ejemplo n.º 12
0
 public ConservativeIdSplitterTests() {
     splitter = new ConservativeIdSplitter();
 }