public static void Main(string[] args)
        {
            var code1 = @"using System;

                        class Hello
                                {
                                    public static readonly void Main(){
                                        foreach(var h in asdf){
                                             h.Write(test);
                                        }
    
                                        var foo = new Bar(1,2,3,4,5,SomeProperty,""MyString"");
                                        var foo1 = sdf(x => x.sdf);
                                        var foo1 = sdf((x,y,z) => x.sdf);
                                        var foo2 = sdf();
                                        Assert.Equal(SomeInt, new int[] {1,2,3,4});

                                        Main();
                                    }

                                    public static int SomeInt = 2;
                                    public static string SomeProperty {get; set;};

                                    
                                }

                     class Hello2 {
                        public static int sdf = 3;
                     }
";

            var syntaxTree = CSharpSyntaxTree.ParseText(code1);

            var nodes = syntaxTree.GetRoot().DescendantNodes().ToList();

            Console.WriteLine(Canonicalization.CanonicalizeSyntaxNode(syntaxTree.GetRoot(), extractAllVariablesFirst: true).CanonicalSyntaxNode.GetText());

            Console.ReadLine();
        }
コード例 #2
0
        public static IEnumerable <object> ProcessSingleRevision(string jsonLine, JsonSyntaxTreeHelper jsonSyntaxTreeHelper)
        {
            var entry = JObject.Parse(jsonLine);

            var previousFile = entry["prev_file"].ToString();
            var updatedFile  = entry["updated_file"].ToString();

            // Console.WriteLine($"Processing {entry["id"]}");

            // File.WriteAllText("a.original.cs", previousFile);
            // File.WriteAllText("b.original.cs", updatedFile);

            var previousFileAst = CSharpSyntaxTree.ParseText(previousFile);
            var updatedFileAst  = CSharpSyntaxTree.ParseText(updatedFile);

            (SyntaxNode canonicalPrevFileAst, Dictionary <string, string> prevFileVariableNameMap)       = Canonicalization.CanonicalizeSyntaxNode(previousFileAst.GetRoot(), extractAllVariablesFirst: true);
            (SyntaxNode canonicalUpdatedFileAst, Dictionary <string, string> updatedFileVariableNameMap) = Canonicalization.CanonicalizeSyntaxNode(updatedFileAst.GetRoot(), prevFileVariableNameMap);

            var prevCodeFile    = canonicalPrevFileAst.GetText();
            var updatedCodeFile = canonicalUpdatedFileAst.GetText();

            var prevFileTokens    = canonicalPrevFileAst.DescendantTokens().ToList();
            var updatedFileTokens = canonicalUpdatedFileAst.DescendantTokens().ToList();

            var changesInRevision = GetChangesBetweenAsts(canonicalPrevFileAst.SyntaxTree, canonicalUpdatedFileAst.SyntaxTree);

            // File.WriteAllText("a.canonical.cs", canonicalPrevFileAst.GetText().ToString());
            // File.WriteAllText("b.canonical.cs", canonicalUpdatedFileAst.GetText().ToString());

            var prevTokenIndex    = new TokenIndex(prevFileTokens);
            var updatedTokenIndex = new TokenIndex(updatedFileTokens);

            var changeId = 0;

            foreach (var change in changesInRevision)
            {
                var prevCodeChunkLineSpan    = canonicalPrevFileAst.SyntaxTree.GetLineSpan(change.BeforeSpan.ChangeSpan);
                var updatedCodeChunkLineSpan = canonicalUpdatedFileAst.SyntaxTree.GetLineSpan(change.AfterSpan.ChangeSpan);

                var prevCodeChunkLineSpanStart = prevCodeFile.Lines[prevCodeChunkLineSpan.StartLinePosition.Line].Span.Start;
                var prevCodeChunkSpanEnd       = prevCodeFile.Lines[prevCodeChunkLineSpan.EndLinePosition.Line].Span.End;

                var updatedCodeChunkLineSpanStart = updatedCodeFile.Lines[updatedCodeChunkLineSpan.StartLinePosition.Line].Span.Start;
                var updatedCodeChunkSpanEnd       = updatedCodeFile.Lines[updatedCodeChunkLineSpan.EndLinePosition.Line].Span.End;

                // only consider changes of equal number of lines
                if (prevCodeChunkLineSpan.EndLinePosition.Line - prevCodeChunkLineSpan.StartLinePosition.Line
                    != updatedCodeChunkLineSpan.EndLinePosition.Line - updatedCodeChunkLineSpan.StartLinePosition.Line)
                {
                    continue;
                }

                // TODO: remove trivial change

                // only consider SyntaxKind in allowedSytaxKinds
                var prevCodeChunkNodes = GetNodesByLineSpan(canonicalPrevFileAst, prevCodeFile,
                                                            prevCodeChunkLineSpan.StartLinePosition.Line, prevCodeChunkLineSpan.EndLinePosition.Line);
                if (prevCodeChunkNodes.Any(node => !allowedSytaxKinds.Contains(node.Kind())))
                {
                    continue;
                }

                var updatedCodeChunkNodes = GetNodesByLineSpan(canonicalUpdatedFileAst, updatedCodeFile,
                                                               updatedCodeChunkLineSpan.StartLinePosition.Line, updatedCodeChunkLineSpan.EndLinePosition.Line);
                if (updatedCodeChunkNodes.Any(node => !allowedSytaxKinds.Contains(node.Kind())))
                {
                    continue;
                }

                var previousCodeChunkTokens = prevTokenIndex
                                              .GetTokensInSpan(prevCodeChunkLineSpanStart, prevCodeChunkSpanEnd)
                                              .Select(token => token.ValueText)
                                              .Where(token => !string.IsNullOrWhiteSpace(token) && !string.IsNullOrEmpty(token))
                                              .ToArray();

                var updatedsCodeChunkTokens = updatedTokenIndex
                                              .GetTokensInSpan(updatedCodeChunkLineSpanStart, updatedCodeChunkSpanEnd)
                                              .Select(token => token.ValueText)
                                              .Where(token => !string.IsNullOrWhiteSpace(token) && !string.IsNullOrEmpty(token))
                                              .ToArray();

                if (previousCodeChunkTokens.Length > 0 && updatedsCodeChunkTokens.Length > 0 &&
                    IsValidCodeChunkTokens(previousCodeChunkTokens) && IsValidCodeChunkTokens(updatedsCodeChunkTokens) &&
                    !previousCodeChunkTokens.SequenceEqual(updatedsCodeChunkTokens))
                {
                    var changeSha = entry["id"] + "_" + changeId;

                    var prevCodeChunkBlockStmt    = SyntaxFactory.Block(prevCodeChunkNodes.Select(node => (StatementSyntax)node));
                    var updatedCodeChunkBlockStmt = SyntaxFactory.Block(updatedCodeChunkNodes.Select(node => (StatementSyntax)node));

                    IDictionary <string, string> zeroIndexedVariableNameMap;
                    (prevCodeChunkBlockStmt, updatedCodeChunkBlockStmt, zeroIndexedVariableNameMap) =
                        zeroIndexVariableNames(prevCodeChunkBlockStmt, updatedCodeChunkBlockStmt);

                    var prevCodeChunkBlockStmtTokens      = prevCodeChunkBlockStmt.DescendantTokens().Skip(1).SkipLast(1).ToArray();
                    var prevCodeChunkBlackStmtTokensIndex = new TokenIndex(prevCodeChunkBlockStmtTokens).InitInvertedIndex();

                    var updatedCodeChunkBlockStmtTokens      = updatedCodeChunkBlockStmt.DescendantTokens().Skip(1).SkipLast(1).ToArray();
                    var updatedCodeChunkBlockStmtTokensIndex = new TokenIndex(updatedCodeChunkBlockStmtTokens).InitInvertedIndex();

                    var prevCodeBlockJObject    = jsonSyntaxTreeHelper.GetJObjectForSyntaxNode(prevCodeChunkBlockStmt, prevCodeChunkBlackStmtTokensIndex);
                    var updatedCodeBlockJObject = jsonSyntaxTreeHelper.GetJObjectForSyntaxNode(updatedCodeChunkBlockStmt, updatedCodeChunkBlockStmtTokensIndex);

                    var precedingContextTokens  = prevTokenIndex.GetTokensInSpan(change.BeforeSpan.SpanOfPrecedingContext);
                    var succeedingContextTokens = updatedTokenIndex.GetTokensInSpan(change.BeforeSpan.SpanOfSucceedingContext);

                    precedingContextTokens  = zeroIndexVariableNames(precedingContextTokens, zeroIndexedVariableNameMap);
                    succeedingContextTokens = zeroIndexVariableNames(succeedingContextTokens, zeroIndexedVariableNameMap);

                    var prevCodeChunkBlockStmtTextTokens =
                        prevCodeChunkBlockStmtTokens.Select(token => token.ValueText).ToArray();
                    var updatedCodeChunkBlockStmtTextTokens =
                        updatedCodeChunkBlockStmtTokens.Select(token => token.ValueText).ToArray();

                    var prevCodeTextChunk = Utils.ExtractCodeTextFromBraces(prevCodeChunkBlockStmt.GetText().ToString());
                    prevCodeTextChunk = Utils.RemoveLeadingWhiteSpace(prevCodeTextChunk, naive: true);

                    var updatedCodeTextChunk = Utils.ExtractCodeTextFromBraces(updatedCodeChunkBlockStmt.GetText().ToString());
                    updatedCodeTextChunk = Utils.RemoveLeadingWhiteSpace(updatedCodeTextChunk, naive: true);

                    var precedingContextTextTokens  = precedingContextTokens.Select(token => token.ValueText).ToArray();
                    var succeedingContextTextTokens = succeedingContextTokens.Select(token => token.ValueText).ToArray();

                    var result = new
                    {
                        Id                     = changeSha,
                        PrevCodeChunk          = prevCodeTextChunk,
                        UpdatedCodeChunk       = updatedCodeTextChunk,
                        PrevCodeChunkTokens    = prevCodeChunkBlockStmtTextTokens,
                        UpdatedCodeChunkTokens = updatedCodeChunkBlockStmtTextTokens,
                        PrevCodeAST            = prevCodeBlockJObject,
                        UpdatedCodeAST         = updatedCodeBlockJObject,
                        PrecedingContext       = precedingContextTextTokens,
                        SucceedingContext      = succeedingContextTextTokens,
                        CommitMessage          = entry["message"]
                    };

                    changeId += 1;

                    yield return(result);
                }
            }
        }