Example #1
0
        public static void Rewrite(string basePath, string githubBaseLink, Solution solution,
                                   string serializedClustersFile, string targetTypeName)
        {
            Func <string, int, string> pathProcessor = (fullPath, lineNumber) =>
            {
                var relativePath = fullPath.Substring(basePath.Length);
                return(githubBaseLink + relativePath.Replace('\\', '/') + "#L" + (lineNumber + 1));
            };

            Console.WriteLine("Collecting type constraint graph...");
            var typeRelations = new TypeConstraints(pathProcessor);

            var     projectGraph = solution.GetProjectDependencyGraph();
            UDCTree UDCHierarchy = new UDCTree();
            var     compilations = new List <CSharpCompilation>();

            foreach (var projectId in projectGraph.GetTopologicallySortedProjects())
            {
                Compilation compilation;
                try
                {
                    var project = solution.GetProject(projectId);
                    if (project.FilePath.ToLower().Contains("test") ||
                        !(project.FilePath.ToLower().EndsWith(".csproj")))
                    {
                        Console.WriteLine($"Excluding {project.FilePath} since it seems to be test-related");
                        continue;
                    }
                    compilation = project.GetCompilationAsync().Result;
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Exception while compiling project {0}: {1}", projectId, ex);
                    continue;
                }
                if (compilation == null)
                {
                    continue;
                }
                foreach (var error in compilation.GetDiagnostics().Where(d => d.Severity == DiagnosticSeverity.Error))
                {
                    Console.WriteLine(error.GetMessage());
                }
                if (compilation is CSharpCompilation cSharpCompilation)
                {
                    UDCHierarchy.ParseTypesInCompilation(cSharpCompilation);
                    typeRelations.AddFromCompilation(cSharpCompilation);
                    compilations.Add(cSharpCompilation);
                }
            }

            Console.WriteLine("Starting rewriting of builtin type uses after reading in serialized clusterings...");
            RewriterFromJson rJson = new RewriterFromJson(serializedClustersFile, basePath, typeRelations);
            var r = new Rewriter(compilations, rJson.Clusters, rJson.AncestorMap, targetTypeName);

            r.RewriteTypes();
            string errors = JsonConvert.SerializeObject(r.ErrorHistogram, Formatting.Indented);

            Console.WriteLine(errors);
        }
Example #2
0
        public RewriterFromJson(string filename, string repositoryPath, TypeConstraints recollectedConstraints)
        {
            ClusteringSerializerUtil csu = new ClusteringSerializerUtil();

            (Clusters, Parents) = csu.Deserialize(filename, repositoryPath, recollectedConstraints);
            AncestorMap         = new Dictionary <int, List <int> >();
            SetAncestorMap();
        }
Example #3
0
        public static void ExtractFromSolution(string repositoryPath, string githubPath, Solution solution,
                                               string saveDir, string typeToCluster = "string")
        {
            Func <string, int, string> pathProcessor = (fullPath, lineNumber) =>
            {
                var basePath     = repositoryPath;
                var relativePath = fullPath.Substring(basePath.Length);
                var githubLink   = githubPath + relativePath.Replace('\\', '/') + "#L" + (lineNumber + 1);
                return(githubLink);
            };

            Console.WriteLine("Collecting type constraint graph...");
            var typeRelations = new TypeConstraints(pathProcessor);

            var projectGraph = solution.GetProjectDependencyGraph();
            var compilations = new List <CSharpCompilation>();

            foreach (var projectId in projectGraph.GetTopologicallySortedProjects())
            {
                Compilation compilation;
                try
                {
                    var project = solution.GetProject(projectId);

                    if (project.FilePath.ToLower().Contains("test"))
                    {
                        Console.WriteLine($"Excluding {project.FilePath} since it seems to be test-related");
                        continue;
                    }
                    compilation = project.GetCompilationAsync().Result;
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Exception while compiling project {0}: {1}", projectId, ex);
                    continue;
                }
                foreach (var error in compilation.GetDiagnostics().Where(d => d.Severity == DiagnosticSeverity.Error))
                {
                    Console.WriteLine(error.GetMessage());
                }
                if (compilation is CSharpCompilation cSharpCompilation)
                {
                    typeRelations.AddFromCompilation(cSharpCompilation);
                    compilations.Add(cSharpCompilation);
                }
            }

            var extractor = new ClusteringExtractor(new HashSet <string> {
                typeToCluster
            }, typeRelations);

            var(clusters, clusterParents) = extractor.InferColors();
            ClusteringSerializerUtil.SerializeClustering(saveDir, repositoryPath, clusters, clusterParents);
        }
Example #4
0
        public ClusteringExtractor(HashSet <string> types, TypeConstraints collectedConstraints)
        {
            collectedConstraints.RemoveSelfLinks();

            _ciComputer = new SubtokenVariationOfInformationComputer(dirichletAlpha: 10);
            _lattice    = new SuperGreedySplitingVIColoredLattice <NodeName>(_ciComputer);

            // TODO: Allow external choice of splitting method
            Func <AbstractNode, string[]> subtokenSplitting = n => SubtokenSplitter.SplitSubtokens(n.Name).ToArray();
            Func <AbstractNode, string[]> charSplitting     = n => n.Name.ToLower().Select(ch => ch.ToString()).ToArray();
            Func <AbstractNode, string[]> bigramSplitting   = n =>
            {
                if (n.Name.Length == 0)
                {
                    return new string[] { "" }
                }
                ;
                var name = n.Name.ToLower();
                return(Enumerable.Range(0, name.Length - 1).Select(i => name.Substring(i, 2)).ToArray());
            };
            Func <AbstractNode, string[]> subtokenBigramSplitting = n =>
            {
                return(SubtokenSplitter.SplitSubtokens(n.Name).SelectMany(sub =>
                {
                    return Enumerable.Range(0, sub.Length - 1).Select(i => sub.Substring(i, 2));
                }).ToArray());
            };
            Func <AbstractNode, string[]> subtokenTrigramSplitting = n =>
            {
                return(SubtokenSplitter.SplitSubtokens(n.Name).SelectMany(sub =>
                {
                    if (sub.Length < 3)
                    {
                        return new string[] { sub }
                    }
                    ;
                    return Enumerable.Range(0, sub.Length - 2).Select(i => sub.Substring(i, 3));
                }).ToArray());
            };

            Func <AbstractNode, string[]> trigramAndSubtokenSplitting = n => subtokenSplitting(n).Concat(subtokenTrigramSplitting(n)).ToArray();

            _nodeMap = _lattice.Add(collectedConstraints.AllRelationships.Where(kv => types.Contains(kv.Key.Type)).
                                    ToDictionary(kv => kv.Key, kv => new HashSet <AbstractNode>(kv.Value.Where(n => types.Contains(n.Type)))),
                                    s => new NodeName(subtokenSplitting(s)));
            NumRelationships = _lattice.NumRelationships;
        }
Example #5
0
        public static void EvaluateOnBuiltIns(
            List <CSharpCompilation> compilations, Func <string, int, string> pathProcessor,
            TypeConstraints typeRelations, string results_path, bool deserializeClusters = false,
            string slnName = null)
        {
            Dictionary <string, Tuple <string, string> > serialisedClusters = new Dictionary <string, Tuple <string, string> >();

            void bootstrapSerialisedClusters()
            {
                serialisedClusters["BEPUphysics.sln"]           = new Tuple <string, string>(@"E:\bepuphysics1", @"F:\clusterings\bepuphysics-float.json");
                serialisedClusters["Microsoft.Bot.Builder.sln"] = new Tuple <string, string>(@"E:\BotBuilder\CSharp\", @"F:\clusterings\botbuilder-str.json");
                serialisedClusters["CommandLine.sln"]           = new Tuple <string, string>(@"E:\commandline\", @"F:\clusterings\commandline-str.json");
                serialisedClusters["CommonMark.sln"]            = new Tuple <string, string>(@"E:\CommonMark.NET\", @"F:\clusterings\commonmark-str.json");
                serialisedClusters["Hangfire.sln"]              = new Tuple <string, string>(@"E:\Hangfire\", @"F:\clusterings\hangfire-str.json");
                serialisedClusters["Humanizer.sln"]             = new Tuple <string, string>(@"E:\Humanizer\", @"F:\clusterings\humanizer-str.json");
                serialisedClusters["QuantConnect.Lean.sln"]     = new Tuple <string, string>(@"E:\Lean\", @"F:\clusterings\lean-str.json");
                serialisedClusters["Nancy.sln"]                 = new Tuple <string, string>(@"E:\Nancy\", @"F:\clusterings\nancy-str.json");
                serialisedClusters["Newtonsoft.Json.Net40.sln"] = new Tuple <string, string>(@"E:\Newtonsoft.Json\", @"F:\clusterings\newtonsoft-str.json");
                serialisedClusters["Ninject.sln"]               = new Tuple <string, string>(@"E:\Ninject\", @"F:\clusterings\ninject-str.json");
                serialisedClusters["NLog.sln"]      = new Tuple <string, string>(@"E:\NLog\", @"F:\clusterings\nlog-str.json");
                serialisedClusters["Quartz.sln"]    = new Tuple <string, string>(@"E:\quartznet\", @"F:\clusterings\quartznet-str.json");
                serialisedClusters["RavenDB.sln"]   = new Tuple <string, string>(@"E:\ravendb\", @"F:\clusterings\ravendb-str.json");
                serialisedClusters["RestSharp.sln"] = new Tuple <string, string>(@"E:\RestSharp\", @"F:\clusterings\restsharp-str.json");
                serialisedClusters["Wox.sln"]       = new Tuple <string, string>(@"E:\Wox\", @"F:\clusterings\wox-str.json");
            }

            bootstrapSerialisedClusters();

            string  typename  = "string";
            UDCTree hierarchy = null;

            foreach (var compilation in compilations)
            {
                ITypeSymbol t = compilation.GetTypeByMetadataName(typename);
                if (t != null)
                {
                    hierarchy = new UDCTree(t);
                    break;
                }
            }

            Rewriter r;

            if (deserializeClusters)
            {
                var serializedClustersFile = serialisedClusters[slnName].Item2;
                var repoPath           = serialisedClusters[slnName].Item1;
                RewriterFromJson rJson = new RewriterFromJson(serializedClustersFile, repoPath, typeRelations);
                r = new Rewriter(compilations, rJson.Clusters, rJson.AncestorMap, typename);
            }
            else
            {
                GodClassResults results = new GodClassResults(pathProcessor, typeRelations, results_path);
                results.startInference(hierarchy);
                r = new Rewriter(compilations, results.MiningResults.First().clusteringResult, results.MiningResults.First().ancestorMap, typename);
            }

            r.RewriteTypes();
            string errors = JsonConvert.SerializeObject(r.ErrorHistogram, Formatting.Indented);

            try
            {
                if (!Directory.Exists(results_path))
                {
                    Directory.CreateDirectory(results_path);
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
                System.Environment.Exit(-1);
            }

            System.IO.DirectoryInfo di = new DirectoryInfo(results_path);

            foreach (FileInfo f in di.GetFiles())
            {
                f.Delete();
            }
            foreach (DirectoryInfo dir in di.GetDirectories())
            {
                dir.Delete(true);
            }
            System.IO.StreamWriter file = new System.IO.StreamWriter(results_path + "/" + typename + "_rewriting_errors.txt");
            file.WriteLine(errors);
            file.Close();
        }
Example #6
0
        public SubtypeMiner(HashSet <string> types, TypeConstraints collectedConstraints, int maxNumTypes, bool UDTSpecificAnalysis = false, ITypeSymbol t = null)
        {
            collectedConstraints.RemoveSelfLinks();

            _ciComputer = new SubtokenVariationOfInformationComputer(dirichletAlpha: 2);
            _lattice    = new SuperGreedySplitingVIColoredLattice <NodeName>(_ciComputer);

            // TODO: Allow external choice of splitting type
            Func <AbstractNode, string[]> subtokenSplitting = n => SubtokenSplitter.SplitSubtokens(n.Name).ToArray();
            Func <AbstractNode, string[]> charSplitting     = n => n.Name.ToLower().Select(ch => ch.ToString()).ToArray();
            Func <AbstractNode, string[]> bigramSplitting   = n =>
            {
                if (n.Name.Length == 0)
                {
                    return new string[] { "" }
                }
                ;
                var name = n.Name.ToLower();
                return(Enumerable.Range(0, name.Length - 1).Select(i => name.Substring(i, 2)).ToArray());
            };
            Func <AbstractNode, string[]> subtokenBigramSplitting = n =>
            {
                return(SubtokenSplitter.SplitSubtokens(n.Name).SelectMany(sub =>
                {
                    return Enumerable.Range(0, sub.Length - 1).Select(i => sub.Substring(i, 2));
                }).ToArray());
            };
            Func <AbstractNode, string[]> subtokenTrigramSplitting = n =>
            {
                return(SubtokenSplitter.SplitSubtokens(n.Name).SelectMany(sub =>
                {
                    if (sub.Length < 3)
                    {
                        return new string[] { sub }
                    }
                    ;
                    return Enumerable.Range(0, sub.Length - 2).Select(i => sub.Substring(i, 3));
                }).ToArray());
            };

            Func <AbstractNode, string[]> trigramAndSubtokenSplitting = n => subtokenSplitting(n).Concat(subtokenTrigramSplitting(n)).ToArray();

            if (UDTSpecificAnalysis)
            {
                /*IEnumerable<KeyValuePair<AbstractNode, HashSet<AbstractNode>>> nodes = collectedConstraints.AllRelationships.Where(kv => kv.Key.IsSymbol);
                 * var symbols = nodes.Where(kv => (kv.Key as VariableSymbol) !=null);*/

                var symbols = GetTypeSpecificRelations(collectedConstraints.AllRelationships, t);

                _nodeMap = _lattice.Add(
                    symbols.ToDictionary(
                        kv => kv.Key,
                        kv => new HashSet <AbstractNode>(kv.Value)
                        ),
                    s => new NodeName(subtokenSplitting(s))
                    );
            }
            else
            {
                var selKeys = collectedConstraints.AllRelationships.Where(kv => types.Contains(kv.Key.Type));
                _nodeMap = _lattice.Add(selKeys.
                                        ToDictionary(kv => kv.Key, kv => new HashSet <AbstractNode>(kv.Value.Where(n => types.Contains(n.Type)))),
                                        s => new NodeName(subtokenSplitting(s)));
            }
            NumRelationships = _lattice.NumRelationships;
        }
Example #7
0
        public static void ExtractFromSolution(string basePath, string githubBaseLink, Solution solution)
        {
            Func <string, int, string> pathProcessor = (fullPath, lineNumber) =>
            {
                var relativePath = fullPath.Substring(basePath.Length);
                return(githubBaseLink + relativePath.Replace('\\', '/') + "#L" + (lineNumber + 1));
            };


            Console.WriteLine("Collecting type constraint graph...");
            var typeRelations = new TypeConstraints(pathProcessor);

            var     projectGraph = solution.GetProjectDependencyGraph();
            UDCTree UDCHierarchy = new UDCTree();
            var     compilations = new List <CSharpCompilation>();

            foreach (var projectId in projectGraph.GetTopologicallySortedProjects())
            {
                Compilation compilation;
                try
                {
                    var project = solution.GetProject(projectId);
                    if (project.FilePath.ToLower().Contains("test") ||
                        !(project.FilePath.ToLower().EndsWith(".csproj")))
                    {
                        Console.WriteLine($"Excluding {project.FilePath} since it seems to be test-related");
                        continue;
                    }
                    compilation = project.GetCompilationAsync().Result;
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Exception while compiling project {0}: {1}", projectId, ex);
                    continue;
                }
                if (compilation == null)
                {
                    continue;
                }
                foreach (var error in compilation.GetDiagnostics().Where(d => d.Severity == DiagnosticSeverity.Error))
                {
                    Console.WriteLine(error.GetMessage());
                }
                if (compilation is CSharpCompilation cSharpCompilation)
                {
                    UDCHierarchy.ParseTypesInCompilation(cSharpCompilation);
                    typeRelations.AddFromCompilation(cSharpCompilation);
                    compilations.Add(cSharpCompilation);
                }
            }

            //AnalysisType t = AnalysisType.TypeSpecificFlowClustering;

            var analysisTriggers = new List <AnalysisType> {
                //AnalysisType.UDCSanityCheck,
                //AnalysisType.TypeSpecificFlowClustering,
                AnalysisType.RewritingFromSerialization,
            };

            foreach (var i in analysisTriggers)
            {
                switch (i)
                {
                case AnalysisType.TypeSpecificFlowClustering:
                    var TSFresults_path = "results/" + "TypeSpecificClustering/" + basePath.Substring(basePath.LastIndexOf('\\') + 1);
                    Console.WriteLine("Starting coloring for name flows in individual user defined types...");
                    GodClassResults typeSpecificClustering = new GodClassResults(pathProcessor, typeRelations, TSFresults_path);
                    typeSpecificClustering.startTypeSpecificNameFlowInference(UDCHierarchy, 0);
                    break;

                case AnalysisType.UDCSanityCheck:
                    var sanityResults_path = "results/" + "UDCSanityCheck/" + basePath.Substring(basePath.LastIndexOf('\\') + 1);
                    Console.WriteLine("Starting coloring for sanity checking on user defined types...");
                    GodClassResults sanityCheck = new GodClassResults(pathProcessor, typeRelations, sanityResults_path);
                    sanityCheck.startConsolidatedInference(UDCHierarchy, 0);
                    break;

                case AnalysisType.Rewriting:
                    var rewritingResults_path = "results/" + "rewriting/" + basePath.Substring(basePath.LastIndexOf('\\') + 1);
                    Console.WriteLine("Starting rewriting of builtin type uses...");
                    EvaluateOnBuiltIns(compilations, pathProcessor, typeRelations, rewritingResults_path, false);
                    break;

                case AnalysisType.RewritingFromSerialization:
                    var rewritingFromSerializationResults_path = "results/" + "rewritingFromSerialised/" + basePath.Substring(basePath.LastIndexOf('\\') + 1);
                    Console.WriteLine("Starting rewriting of builtin type uses after reading in serilised clusterings...");
                    EvaluateOnBuiltIns(compilations, pathProcessor, typeRelations, rewritingFromSerializationResults_path, true,
                                       basePath.Substring(basePath.LastIndexOf('\\') + 1));
                    break;

                default:
                    break;
                }
            }
            Console.WriteLine("Done!");
        }
        public (List <HashSet <AbstractNode> > Clusters, List <HashSet <int> > Parents) Deserialize(string filename, string repositoryPath, TypeConstraints recollectedConstraints)
        {
            var allNodes = recollectedConstraints.AllRelationships.Select(kv => kv.Key).Concat(recollectedConstraints.AllRelationships.SelectMany(kv => kv.Value));

            var allNodesFiltered     = allNodes.GroupBy(x => LocationToString(repositoryPath, x)).Select(grp => grp.First());
            var nodeLocationToString = allNodesFiltered.ToDictionary(n => LocationToString(repositoryPath, n), n => n);
            // Load file
            var deserialized = JsonConvert.DeserializeObject <SerializableCluster>(File.ReadAllText(filename));
            // re-map
            var clusters = deserialized.Clusters.Select(c => new HashSet <AbstractNode>(c.Select(l => getNodeLocation(nodeLocationToString, l) /*nodeLocationToString[l]*/))).ToList();

            return(clusters, deserialized.Parents);
        }