public static HashSet <string> GetUnionKeys(
            this ICollection <CstNode> nodes, ICollection <SelectedFragment> fragments,
            FeatureExtractor extractor)
        {
            var commonKeys     = new HashSet <string>();
            var usedRangeCount = 0;

            if (nodes.Count > 0)
            {
                var root = nodes.First().AncestorsAndSelf().Last();
                foreach (var node in nodes)
                {
                    IEnumerable <CstNode> surroundingNodes;
                    CstNode outermostNode = null;
                    var     fragment      = fragments
                                            .FirstOrDefault(f => node.AncestorWithSingleChild() == f.Node);
                    if (fragment != null)
                    {
                        surroundingNodes = fragment.SurroundingRange.FindOverlappedNodes(root);
                        outermostNode    = fragment.SurroundingRange.FindInnermostNode(root);
                        usedRangeCount++;
                    }
                    else
                    {
                        surroundingNodes = node.DescendantsAndSelf();
                    }
                    var keys = node.GetSurroundingPaths(
                        surroundingNodes.ToHashSet(), extractor, outermostNode);
                    commonKeys.UnionWith(keys);
                }
            }
            Console.WriteLine("#Used Ranges: " + usedRangeCount);
            return(commonKeys);
        }
 public static HashSet<string> GetUnionKeys(
         this ICollection<CstNode> nodes, ICollection<SelectedFragment> fragments,
         FeatureExtractor extractor) {
     var commonKeys = new HashSet<string>();
     var usedRangeCount = 0;
     if (nodes.Count > 0) {
         var root = nodes.First().AncestorsAndSelf().Last();
         foreach (var node in nodes) {
             IEnumerable<CstNode> surroundingNodes;
             CstNode outermostNode = null;
             var fragment = fragments
                     .FirstOrDefault(f => node.AncestorWithSingleChild() == f.Node);
             if (fragment != null) {
                 surroundingNodes = fragment.SurroundingRange.FindOverlappedNodes(root);
                 outermostNode = fragment.SurroundingRange.FindInnermostNode(root);
                 usedRangeCount++;
             } else {
                 surroundingNodes = node.DescendantsAndSelf();
             }
             var keys = node.GetSurroundingPaths(
                     surroundingNodes.ToHashSet(), extractor, outermostNode);
             commonKeys.UnionWith(keys);
         }
     }
     Console.WriteLine("#Used Ranges: " + usedRangeCount);
     return commonKeys;
 }
Beispiel #3
0
 public FeatureEncoder(
         ISet<string> selectedNodeNames, FeatureExtractor extractor, FeatuerSet featureSet) {
     _selectedNodeNames = selectedNodeNames;
     _extractor = extractor;
     _featureString2Bit = CreateFeatureString2Bit(featureSet);
     _bit2FeatureString = CreateBit2FeatureString(_featureString2Bit);
 }
Beispiel #4
0
 public FeatureEncoder(
     ISet <string> selectedNodeNames, FeatureExtractor extractor, FeatuerSet featureSet)
 {
     _selectedNodeNames = selectedNodeNames;
     _extractor         = extractor;
     _featureString2Bit = CreateFeatureString2Bit(featureSet);
     _bit2FeatureString = CreateBit2FeatureString(_featureString2Bit);
 }
        public static HashSet <string> GetSurroundingPaths(
            this CstNode node, HashSet <CstNode> surroundingNodes, FeatureExtractor extractor,
            CstNode outermostNode)
        {
            var path      = node.Name;
            var paths     = new HashSet <string>();
            var ancestors = new Dictionary <CstNode, string> {
                { node, node.Name }
            };

            paths.Add(node.Name);
            //paths.Add("'" + extractor.GetToken(node));

            var ancestor = node.Ancestors().FirstOrDefault(a => a.Children().Count() > 1);

            if (surroundingNodes.Contains(ancestor))
            {
                extractor.IsInner = false;

                var originalNode = node;
                path = node.Name + node.RuleId;
                while ((node = node.Parent) != outermostNode)
                {
                    ancestors.Add(node, path + "<" + node.Name);
                    path = path + "<" + node.Name + node.RuleId;
                    paths.Add(path);
                }
                path = path + "<" + node.Name; // must not have RuleId
                paths.Add(path);

                var index = 0;
                foreach (var tokenNode in
                         originalNode.PreviousTokenNodes(node).Where(IsMeaningfulIdentifier))
                {
                    if (!surroundingNodes.Contains(tokenNode))
                    {
                        break;
                    }
                    paths.Add("'-" + extractor.GetToken(tokenNode));
                    index++;
                }
                extractor.TokenLeft = Math.Max(extractor.TokenLeft, index);
                index = 0;
                foreach (var tokenNode in
                         originalNode.NextTokenNodes(node).Where(IsMeaningfulIdentifier))
                {
                    if (!surroundingNodes.Contains(tokenNode))
                    {
                        break;
                    }
                    paths.Add("'+" + extractor.GetToken(tokenNode));
                    index++;
                }
                extractor.TokenRight = Math.Max(extractor.TokenRight, index);
            }
            GetSurroundingPaths(node, path, surroundingNodes, extractor, paths, ancestors);
            return(paths);
        }
Beispiel #6
0
 private static IEnumerable<string> CreateAcceptingFeatures(
         ICollection<CstNode> acceptedNodes, FeatureExtractor extractor, ICollection<SelectedFragment> fragments) {
     var acceptingFeatures = acceptedNodes
             .GetUnionKeys(fragments, extractor)
             .Distinct()
             .ToList();
     acceptingFeatures.Sort((s1, s2) => s1.Length.CompareTo(s2.Length));
     return acceptingFeatures;
 }
Beispiel #7
0
 public FeatuerSet(
         SeedNodeSet seedNodeSet, FeatureExtractor extractor,
         ICollection<SelectedFragment> acceptingFragments, ICollection<SelectedFragment> rejectingFragments) {
     AcceptingFeatures =
             CreateAcceptingFeatures(seedNodeSet.AcceptedNodes, extractor,acceptingFragments)
                     .ToImmutableList();
     RejectingFeatures =
             CreateRejectingFeatures(seedNodeSet.RejectedNodes, extractor,rejectingFragments)
                     .ToImmutableList();
 }
Beispiel #8
0
 private IEnumerable<string> CreateRejectingFeatures(
         ICollection<CstNode> rejectedNodes, FeatureExtractor extractor, ICollection<SelectedFragment> fragments) {
     var rejectingFeatureSet = rejectedNodes
             .GetUnionKeys(fragments, extractor)
             .ToHashSet();
     rejectingFeatureSet.ExceptWith(AcceptingFeatures);
     var rejectingFeatures = rejectingFeatureSet.ToList();
     rejectingFeatures.Sort((s1, s2) => s1.Length.CompareTo(s2.Length));
     return rejectingFeatures;
 }
Beispiel #9
0
        private static IEnumerable <string> CreateAcceptingFeatures(
            ICollection <CstNode> acceptedNodes, FeatureExtractor extractor, ICollection <SelectedFragment> fragments)
        {
            var acceptingFeatures = acceptedNodes
                                    .GetUnionKeys(fragments, extractor)
                                    .Distinct()
                                    .ToList();

            acceptingFeatures.Sort((s1, s2) => s1.Length.CompareTo(s2.Length));
            return(acceptingFeatures);
        }
Beispiel #10
0
 public FeatuerSet(
     SeedNodeSet seedNodeSet, FeatureExtractor extractor,
     ICollection <SelectedFragment> acceptingFragments, ICollection <SelectedFragment> rejectingFragments)
 {
     AcceptingFeatures =
         CreateAcceptingFeatures(seedNodeSet.AcceptedNodes, extractor, acceptingFragments)
         .ToImmutableList();
     RejectingFeatures =
         CreateRejectingFeatures(seedNodeSet.RejectedNodes, extractor, rejectingFragments)
         .ToImmutableList();
 }
Beispiel #11
0
        private IEnumerable <string> CreateRejectingFeatures(
            ICollection <CstNode> rejectedNodes, FeatureExtractor extractor, ICollection <SelectedFragment> fragments)
        {
            var rejectingFeatureSet = rejectedNodes
                                      .GetUnionKeys(fragments, extractor)
                                      .ToHashSet();

            rejectingFeatureSet.ExceptWith(AcceptingFeatures);
            var rejectingFeatures = rejectingFeatureSet.ToList();

            rejectingFeatures.Sort((s1, s2) => s1.Length.CompareTo(s2.Length));
            return(rejectingFeatures);
        }
        private static Tuple <string, bool> GetSurroundingPaths(
            CstNode node, string path, ICollection <CstNode> surroundingNodes,
            FeatureExtractor extractor, ISet <string> paths,
            IDictionary <CstNode, string> ancestors)
        {
            var token    = "";
            var temporal = false;
            var count    = 0;

            if (node.HasToken)
            {
                token    = extractor.GetToken(node);
                temporal = IsTemporalVariable(token);
            }
            else
            {
                foreach (var child in node.Children())
                {
                    count++;
                    if (surroundingNodes.Contains(child))
                    {
                        //var newPath = path + (ancestors.Contains(child) ? "/" : ">")
                        //              + child.Name + child.RuleId;
                        var newPath = (ancestors.ContainsKey(child)
                                ? ancestors[child]
                                : path + ">" + child.Name + child.RuleId);
                        var ret = GetSurroundingPaths(
                            child, newPath, surroundingNodes, extractor, paths, ancestors);
                        temporal = ret.Item2;
                        if (token != null && ret.Item1 != null)
                        {
                            token += ret.Item1;
                            continue;
                        }
                    }
                    token = null;
                }
            }
            if (temporal && count <= 1)
            {
                return(Tuple.Create <string, bool>(null, true));
            }
            paths.Add(path);
            if (token != null)
            {
                paths.Add(path + "'" + token); // need node.Name + node.RuleId ?
            }
            return(Tuple.Create(token, false));
        }
        private static string GetFeatureVector(
            CstNode node, string path, IDictionary <string, BigInteger> featureString2Bit,
            FeatureExtractor extractor, IDictionary <CstNode, string> ancestors,
            ref BigInteger vector)
        {
            BigInteger bit;
            var        token = "";

            if (node.HasToken)
            {
                token = extractor.GetToken(node);
            }
            else
            {
                foreach (var child in node.Children())
                {
                    //var newPath = path + (ancestors.Contains(child) ? "/" : ">")
                    //              + child.Name + child.RuleId;
                    var newPath = (ancestors.ContainsKey(child)
                            ? ancestors[child]
                            : path + ">" + child.Name + child.RuleId);
                    if (featureString2Bit.TryGetValue(newPath, out bit))
                    {
                        vector |= bit;
                        var ret = GetFeatureVector(
                            child, newPath, featureString2Bit, extractor, ancestors, ref vector);
                        if (token != null && ret != null)
                        {
                            token += ret;
                            continue;
                        }
                    }
                    token = null;
                }
            }
            if (token != null)
            {
                if (featureString2Bit.TryGetValue(path + "'" + token, out bit))
                {
                    vector |= bit;
                }
            }
            return(token);
        }
        public static BigInteger GetFeatureVector(
                this CstNode node, IDictionary<string, BigInteger> featureString2Bit,
                FeatureExtractor extractor) {
            BigInteger bit;
            var path = node.Name;
            var vector = BigInteger.Zero;
            var ancestors = new Dictionary<CstNode, string> { { node, node.Name } };

            if (featureString2Bit.TryGetValue(node.Name, out bit)) {
                vector |= bit;
            }
            //if (featureString2Bit.TryGetValue("'" + extractor.GetToken(node), out bit)) {
            //    vector |= bit;
            //}

            if (!extractor.IsInner) {
                var originalNode = node;
                path = node.Name + node.RuleId;
                while (true) {
                    node = node.Parent;
                    var newPath = path + "<" + node.Name + node.RuleId;
                    if (!featureString2Bit.TryGetValue(newPath, out bit)) {
                        break;
                    }
                    ancestors.Add(node, path + "<" + node.Name);
                    path = newPath;
                    // vector |= bit; is unnecesarry
                }
                path = path + "<" + node.Name; // must not have RuleId
                if (!featureString2Bit.TryGetValue(path, out bit)) {
                    return vector;
                }
                // vector |= bit; is unnecesarry

                originalNode.PreviousTokenNodes(node)
                        .Where(IsMeaningfulIdentifier)
                        .Take(extractor.TokenLeft)
                        .ForEach(
                                tokenNode => {
                                    if (featureString2Bit.TryGetValue(
                                            "'-" + extractor.GetToken(tokenNode), out bit)) {
                                        vector |= bit;
                                    }
                                });
                originalNode.NextTokenNodes(node)
                        .Where(IsMeaningfulIdentifier)
                        .Take(extractor.TokenRight)
                        .ForEach(
                                tokenNode => {
                                    if (featureString2Bit.TryGetValue(
                                            "'+" + extractor.GetToken(tokenNode), out bit)) {
                                        vector |= bit;
                                    }
                                });
            }
            GetFeatureVector(node, path, featureString2Bit, extractor, ancestors, ref vector);
            return vector;
        }
        public static BigInteger GetFeatureVector(
            this CstNode node, IDictionary <string, BigInteger> featureString2Bit,
            FeatureExtractor extractor)
        {
            BigInteger bit;
            var        path      = node.Name;
            var        vector    = BigInteger.Zero;
            var        ancestors = new Dictionary <CstNode, string> {
                { node, node.Name }
            };

            if (featureString2Bit.TryGetValue(node.Name, out bit))
            {
                vector |= bit;
            }
            //if (featureString2Bit.TryGetValue("'" + extractor.GetToken(node), out bit)) {
            //    vector |= bit;
            //}

            if (!extractor.IsInner)
            {
                var originalNode = node;
                path = node.Name + node.RuleId;
                while (true)
                {
                    node = node.Parent;
                    var newPath = path + "<" + node.Name + node.RuleId;
                    if (!featureString2Bit.TryGetValue(newPath, out bit))
                    {
                        break;
                    }
                    ancestors.Add(node, path + "<" + node.Name);
                    path = newPath;
                    // vector |= bit; is unnecesarry
                }
                path = path + "<" + node.Name; // must not have RuleId
                if (!featureString2Bit.TryGetValue(path, out bit))
                {
                    return(vector);
                }
                // vector |= bit; is unnecesarry

                originalNode.PreviousTokenNodes(node)
                .Where(IsMeaningfulIdentifier)
                .Take(extractor.TokenLeft)
                .ForEach(
                    tokenNode => {
                    if (featureString2Bit.TryGetValue(
                            "'-" + extractor.GetToken(tokenNode), out bit))
                    {
                        vector |= bit;
                    }
                });
                originalNode.NextTokenNodes(node)
                .Where(IsMeaningfulIdentifier)
                .Take(extractor.TokenRight)
                .ForEach(
                    tokenNode => {
                    if (featureString2Bit.TryGetValue(
                            "'+" + extractor.GetToken(tokenNode), out bit))
                    {
                        vector |= bit;
                    }
                });
            }
            GetFeatureVector(node, path, featureString2Bit, extractor, ancestors, ref vector);
            return(vector);
        }
 private static Tuple<string, bool> GetSurroundingPaths(
         CstNode node, string path, ICollection<CstNode> surroundingNodes,
         FeatureExtractor extractor, ISet<string> paths,
         IDictionary<CstNode, string> ancestors) {
     var token = "";
     var temporal = false;
     var count = 0;
     if (node.HasToken) {
         token = extractor.GetToken(node);
         temporal = IsTemporalVariable(token);
     } else {
         foreach (var child in node.Children()) {
             count++;
             if (surroundingNodes.Contains(child)) {
                 //var newPath = path + (ancestors.Contains(child) ? "/" : ">")
                 //              + child.Name + child.RuleId;
                 var newPath = (ancestors.ContainsKey(child)
                         ? ancestors[child]
                         : path + ">" + child.Name + child.RuleId);
                 var ret = GetSurroundingPaths(
                         child, newPath, surroundingNodes, extractor, paths, ancestors);
                 temporal = ret.Item2;
                 if (token != null && ret.Item1 != null) {
                     token += ret.Item1;
                     continue;
                 }
             }
             token = null;
         }
     }
     if (temporal && count <= 1) {
         return Tuple.Create<string, bool>(null, true);
     }
     paths.Add(path);
     if (token != null) {
         paths.Add(path + "'" + token); // need node.Name + node.RuleId ?
     }
     return Tuple.Create(token, false);
 }
 private static string GetFeatureVector(
         CstNode node, string path, IDictionary<string, BigInteger> featureString2Bit,
         FeatureExtractor extractor, IDictionary<CstNode, string> ancestors,
         ref BigInteger vector) {
     BigInteger bit;
     var token = "";
     if (node.HasToken) {
         token = extractor.GetToken(node);
     } else {
         foreach (var child in node.Children()) {
             //var newPath = path + (ancestors.Contains(child) ? "/" : ">")
             //              + child.Name + child.RuleId;
             var newPath = (ancestors.ContainsKey(child)
                     ? ancestors[child]
                     : path + ">" + child.Name + child.RuleId);
             if (featureString2Bit.TryGetValue(newPath, out bit)) {
                 vector |= bit;
                 var ret = GetFeatureVector(
                         child, newPath, featureString2Bit, extractor, ancestors, ref vector);
                 if (token != null && ret != null) {
                     token += ret;
                     continue;
                 }
             }
             token = null;
         }
     }
     if (token != null) {
         if (featureString2Bit.TryGetValue(path + "'" + token, out bit)) {
             vector |= bit;
         }
     }
     return token;
 }
        public static HashSet<string> GetSurroundingPaths(
                this CstNode node, HashSet<CstNode> surroundingNodes, FeatureExtractor extractor,
                CstNode outermostNode) {
            var path = node.Name;
            var paths = new HashSet<string>();
            var ancestors = new Dictionary<CstNode, string> { { node, node.Name } };

            paths.Add(node.Name);
            //paths.Add("'" + extractor.GetToken(node));

            var ancestor = node.Ancestors().FirstOrDefault(a => a.Children().Count() > 1);
            if (surroundingNodes.Contains(ancestor)) {
                extractor.IsInner = false;

                var originalNode = node;
                path = node.Name + node.RuleId;
                while ((node = node.Parent) != outermostNode) {
                    ancestors.Add(node, path + "<" + node.Name);
                    path = path + "<" + node.Name + node.RuleId;
                    paths.Add(path);
                }
                path = path + "<" + node.Name; // must not have RuleId
                paths.Add(path);

                var index = 0;
                foreach (var tokenNode in
                        originalNode.PreviousTokenNodes(node).Where(IsMeaningfulIdentifier)) {
                    if (!surroundingNodes.Contains(tokenNode)) {
                        break;
                    }
                    paths.Add("'-" + extractor.GetToken(tokenNode));
                    index++;
                }
                extractor.TokenLeft = Math.Max(extractor.TokenLeft, index);
                index = 0;
                foreach (var tokenNode in 
                        originalNode.NextTokenNodes(node).Where(IsMeaningfulIdentifier)) {
                    if (!surroundingNodes.Contains(tokenNode)) {
                        break;
                    }
                    paths.Add("'+" + extractor.GetToken(tokenNode));
                    index++;
                }
                extractor.TokenRight = Math.Max(extractor.TokenRight, index);
            }
            GetSurroundingPaths(node, path, surroundingNodes, extractor, paths, ancestors);
            return paths;
        }