public static HashSet <string> GetUnionKeys( this ICollection <CstNode> nodes, ICollection <SelectedFragment> fragments, FeatureExtractor extractor) { var commonKeys = new HashSet <string>(); var usedRangeCount = 0; if (nodes.Count > 0) { var root = nodes.First().AncestorsAndSelf().Last(); foreach (var node in nodes) { IEnumerable <CstNode> surroundingNodes; CstNode outermostNode = null; var fragment = fragments .FirstOrDefault(f => node.AncestorWithSingleChild() == f.Node); if (fragment != null) { surroundingNodes = fragment.SurroundingRange.FindOverlappedNodes(root); outermostNode = fragment.SurroundingRange.FindInnermostNode(root); usedRangeCount++; } else { surroundingNodes = node.DescendantsAndSelf(); } var keys = node.GetSurroundingPaths( surroundingNodes.ToHashSet(), extractor, outermostNode); commonKeys.UnionWith(keys); } } Console.WriteLine("#Used Ranges: " + usedRangeCount); return(commonKeys); }
public static HashSet<string> GetUnionKeys( this ICollection<CstNode> nodes, ICollection<SelectedFragment> fragments, FeatureExtractor extractor) { var commonKeys = new HashSet<string>(); var usedRangeCount = 0; if (nodes.Count > 0) { var root = nodes.First().AncestorsAndSelf().Last(); foreach (var node in nodes) { IEnumerable<CstNode> surroundingNodes; CstNode outermostNode = null; var fragment = fragments .FirstOrDefault(f => node.AncestorWithSingleChild() == f.Node); if (fragment != null) { surroundingNodes = fragment.SurroundingRange.FindOverlappedNodes(root); outermostNode = fragment.SurroundingRange.FindInnermostNode(root); usedRangeCount++; } else { surroundingNodes = node.DescendantsAndSelf(); } var keys = node.GetSurroundingPaths( surroundingNodes.ToHashSet(), extractor, outermostNode); commonKeys.UnionWith(keys); } } Console.WriteLine("#Used Ranges: " + usedRangeCount); return commonKeys; }
public FeatureEncoder( ISet<string> selectedNodeNames, FeatureExtractor extractor, FeatuerSet featureSet) { _selectedNodeNames = selectedNodeNames; _extractor = extractor; _featureString2Bit = CreateFeatureString2Bit(featureSet); _bit2FeatureString = CreateBit2FeatureString(_featureString2Bit); }
public FeatureEncoder( ISet <string> selectedNodeNames, FeatureExtractor extractor, FeatuerSet featureSet) { _selectedNodeNames = selectedNodeNames; _extractor = extractor; _featureString2Bit = CreateFeatureString2Bit(featureSet); _bit2FeatureString = CreateBit2FeatureString(_featureString2Bit); }
public static HashSet <string> GetSurroundingPaths( this CstNode node, HashSet <CstNode> surroundingNodes, FeatureExtractor extractor, CstNode outermostNode) { var path = node.Name; var paths = new HashSet <string>(); var ancestors = new Dictionary <CstNode, string> { { node, node.Name } }; paths.Add(node.Name); //paths.Add("'" + extractor.GetToken(node)); var ancestor = node.Ancestors().FirstOrDefault(a => a.Children().Count() > 1); if (surroundingNodes.Contains(ancestor)) { extractor.IsInner = false; var originalNode = node; path = node.Name + node.RuleId; while ((node = node.Parent) != outermostNode) { ancestors.Add(node, path + "<" + node.Name); path = path + "<" + node.Name + node.RuleId; paths.Add(path); } path = path + "<" + node.Name; // must not have RuleId paths.Add(path); var index = 0; foreach (var tokenNode in originalNode.PreviousTokenNodes(node).Where(IsMeaningfulIdentifier)) { if (!surroundingNodes.Contains(tokenNode)) { break; } paths.Add("'-" + extractor.GetToken(tokenNode)); index++; } extractor.TokenLeft = Math.Max(extractor.TokenLeft, index); index = 0; foreach (var tokenNode in originalNode.NextTokenNodes(node).Where(IsMeaningfulIdentifier)) { if (!surroundingNodes.Contains(tokenNode)) { break; } paths.Add("'+" + extractor.GetToken(tokenNode)); index++; } extractor.TokenRight = Math.Max(extractor.TokenRight, index); } GetSurroundingPaths(node, path, surroundingNodes, extractor, paths, ancestors); return(paths); }
private static IEnumerable<string> CreateAcceptingFeatures( ICollection<CstNode> acceptedNodes, FeatureExtractor extractor, ICollection<SelectedFragment> fragments) { var acceptingFeatures = acceptedNodes .GetUnionKeys(fragments, extractor) .Distinct() .ToList(); acceptingFeatures.Sort((s1, s2) => s1.Length.CompareTo(s2.Length)); return acceptingFeatures; }
public FeatuerSet( SeedNodeSet seedNodeSet, FeatureExtractor extractor, ICollection<SelectedFragment> acceptingFragments, ICollection<SelectedFragment> rejectingFragments) { AcceptingFeatures = CreateAcceptingFeatures(seedNodeSet.AcceptedNodes, extractor,acceptingFragments) .ToImmutableList(); RejectingFeatures = CreateRejectingFeatures(seedNodeSet.RejectedNodes, extractor,rejectingFragments) .ToImmutableList(); }
private IEnumerable<string> CreateRejectingFeatures( ICollection<CstNode> rejectedNodes, FeatureExtractor extractor, ICollection<SelectedFragment> fragments) { var rejectingFeatureSet = rejectedNodes .GetUnionKeys(fragments, extractor) .ToHashSet(); rejectingFeatureSet.ExceptWith(AcceptingFeatures); var rejectingFeatures = rejectingFeatureSet.ToList(); rejectingFeatures.Sort((s1, s2) => s1.Length.CompareTo(s2.Length)); return rejectingFeatures; }
private static IEnumerable <string> CreateAcceptingFeatures( ICollection <CstNode> acceptedNodes, FeatureExtractor extractor, ICollection <SelectedFragment> fragments) { var acceptingFeatures = acceptedNodes .GetUnionKeys(fragments, extractor) .Distinct() .ToList(); acceptingFeatures.Sort((s1, s2) => s1.Length.CompareTo(s2.Length)); return(acceptingFeatures); }
public FeatuerSet( SeedNodeSet seedNodeSet, FeatureExtractor extractor, ICollection <SelectedFragment> acceptingFragments, ICollection <SelectedFragment> rejectingFragments) { AcceptingFeatures = CreateAcceptingFeatures(seedNodeSet.AcceptedNodes, extractor, acceptingFragments) .ToImmutableList(); RejectingFeatures = CreateRejectingFeatures(seedNodeSet.RejectedNodes, extractor, rejectingFragments) .ToImmutableList(); }
private IEnumerable <string> CreateRejectingFeatures( ICollection <CstNode> rejectedNodes, FeatureExtractor extractor, ICollection <SelectedFragment> fragments) { var rejectingFeatureSet = rejectedNodes .GetUnionKeys(fragments, extractor) .ToHashSet(); rejectingFeatureSet.ExceptWith(AcceptingFeatures); var rejectingFeatures = rejectingFeatureSet.ToList(); rejectingFeatures.Sort((s1, s2) => s1.Length.CompareTo(s2.Length)); return(rejectingFeatures); }
private static Tuple <string, bool> GetSurroundingPaths( CstNode node, string path, ICollection <CstNode> surroundingNodes, FeatureExtractor extractor, ISet <string> paths, IDictionary <CstNode, string> ancestors) { var token = ""; var temporal = false; var count = 0; if (node.HasToken) { token = extractor.GetToken(node); temporal = IsTemporalVariable(token); } else { foreach (var child in node.Children()) { count++; if (surroundingNodes.Contains(child)) { //var newPath = path + (ancestors.Contains(child) ? "/" : ">") // + child.Name + child.RuleId; var newPath = (ancestors.ContainsKey(child) ? ancestors[child] : path + ">" + child.Name + child.RuleId); var ret = GetSurroundingPaths( child, newPath, surroundingNodes, extractor, paths, ancestors); temporal = ret.Item2; if (token != null && ret.Item1 != null) { token += ret.Item1; continue; } } token = null; } } if (temporal && count <= 1) { return(Tuple.Create <string, bool>(null, true)); } paths.Add(path); if (token != null) { paths.Add(path + "'" + token); // need node.Name + node.RuleId ? } return(Tuple.Create(token, false)); }
private static string GetFeatureVector( CstNode node, string path, IDictionary <string, BigInteger> featureString2Bit, FeatureExtractor extractor, IDictionary <CstNode, string> ancestors, ref BigInteger vector) { BigInteger bit; var token = ""; if (node.HasToken) { token = extractor.GetToken(node); } else { foreach (var child in node.Children()) { //var newPath = path + (ancestors.Contains(child) ? "/" : ">") // + child.Name + child.RuleId; var newPath = (ancestors.ContainsKey(child) ? ancestors[child] : path + ">" + child.Name + child.RuleId); if (featureString2Bit.TryGetValue(newPath, out bit)) { vector |= bit; var ret = GetFeatureVector( child, newPath, featureString2Bit, extractor, ancestors, ref vector); if (token != null && ret != null) { token += ret; continue; } } token = null; } } if (token != null) { if (featureString2Bit.TryGetValue(path + "'" + token, out bit)) { vector |= bit; } } return(token); }
public static BigInteger GetFeatureVector( this CstNode node, IDictionary<string, BigInteger> featureString2Bit, FeatureExtractor extractor) { BigInteger bit; var path = node.Name; var vector = BigInteger.Zero; var ancestors = new Dictionary<CstNode, string> { { node, node.Name } }; if (featureString2Bit.TryGetValue(node.Name, out bit)) { vector |= bit; } //if (featureString2Bit.TryGetValue("'" + extractor.GetToken(node), out bit)) { // vector |= bit; //} if (!extractor.IsInner) { var originalNode = node; path = node.Name + node.RuleId; while (true) { node = node.Parent; var newPath = path + "<" + node.Name + node.RuleId; if (!featureString2Bit.TryGetValue(newPath, out bit)) { break; } ancestors.Add(node, path + "<" + node.Name); path = newPath; // vector |= bit; is unnecesarry } path = path + "<" + node.Name; // must not have RuleId if (!featureString2Bit.TryGetValue(path, out bit)) { return vector; } // vector |= bit; is unnecesarry originalNode.PreviousTokenNodes(node) .Where(IsMeaningfulIdentifier) .Take(extractor.TokenLeft) .ForEach( tokenNode => { if (featureString2Bit.TryGetValue( "'-" + extractor.GetToken(tokenNode), out bit)) { vector |= bit; } }); originalNode.NextTokenNodes(node) .Where(IsMeaningfulIdentifier) .Take(extractor.TokenRight) .ForEach( tokenNode => { if (featureString2Bit.TryGetValue( "'+" + extractor.GetToken(tokenNode), out bit)) { vector |= bit; } }); } GetFeatureVector(node, path, featureString2Bit, extractor, ancestors, ref vector); return vector; }
public static BigInteger GetFeatureVector( this CstNode node, IDictionary <string, BigInteger> featureString2Bit, FeatureExtractor extractor) { BigInteger bit; var path = node.Name; var vector = BigInteger.Zero; var ancestors = new Dictionary <CstNode, string> { { node, node.Name } }; if (featureString2Bit.TryGetValue(node.Name, out bit)) { vector |= bit; } //if (featureString2Bit.TryGetValue("'" + extractor.GetToken(node), out bit)) { // vector |= bit; //} if (!extractor.IsInner) { var originalNode = node; path = node.Name + node.RuleId; while (true) { node = node.Parent; var newPath = path + "<" + node.Name + node.RuleId; if (!featureString2Bit.TryGetValue(newPath, out bit)) { break; } ancestors.Add(node, path + "<" + node.Name); path = newPath; // vector |= bit; is unnecesarry } path = path + "<" + node.Name; // must not have RuleId if (!featureString2Bit.TryGetValue(path, out bit)) { return(vector); } // vector |= bit; is unnecesarry originalNode.PreviousTokenNodes(node) .Where(IsMeaningfulIdentifier) .Take(extractor.TokenLeft) .ForEach( tokenNode => { if (featureString2Bit.TryGetValue( "'-" + extractor.GetToken(tokenNode), out bit)) { vector |= bit; } }); originalNode.NextTokenNodes(node) .Where(IsMeaningfulIdentifier) .Take(extractor.TokenRight) .ForEach( tokenNode => { if (featureString2Bit.TryGetValue( "'+" + extractor.GetToken(tokenNode), out bit)) { vector |= bit; } }); } GetFeatureVector(node, path, featureString2Bit, extractor, ancestors, ref vector); return(vector); }
private static Tuple<string, bool> GetSurroundingPaths( CstNode node, string path, ICollection<CstNode> surroundingNodes, FeatureExtractor extractor, ISet<string> paths, IDictionary<CstNode, string> ancestors) { var token = ""; var temporal = false; var count = 0; if (node.HasToken) { token = extractor.GetToken(node); temporal = IsTemporalVariable(token); } else { foreach (var child in node.Children()) { count++; if (surroundingNodes.Contains(child)) { //var newPath = path + (ancestors.Contains(child) ? "/" : ">") // + child.Name + child.RuleId; var newPath = (ancestors.ContainsKey(child) ? ancestors[child] : path + ">" + child.Name + child.RuleId); var ret = GetSurroundingPaths( child, newPath, surroundingNodes, extractor, paths, ancestors); temporal = ret.Item2; if (token != null && ret.Item1 != null) { token += ret.Item1; continue; } } token = null; } } if (temporal && count <= 1) { return Tuple.Create<string, bool>(null, true); } paths.Add(path); if (token != null) { paths.Add(path + "'" + token); // need node.Name + node.RuleId ? } return Tuple.Create(token, false); }
private static string GetFeatureVector( CstNode node, string path, IDictionary<string, BigInteger> featureString2Bit, FeatureExtractor extractor, IDictionary<CstNode, string> ancestors, ref BigInteger vector) { BigInteger bit; var token = ""; if (node.HasToken) { token = extractor.GetToken(node); } else { foreach (var child in node.Children()) { //var newPath = path + (ancestors.Contains(child) ? "/" : ">") // + child.Name + child.RuleId; var newPath = (ancestors.ContainsKey(child) ? ancestors[child] : path + ">" + child.Name + child.RuleId); if (featureString2Bit.TryGetValue(newPath, out bit)) { vector |= bit; var ret = GetFeatureVector( child, newPath, featureString2Bit, extractor, ancestors, ref vector); if (token != null && ret != null) { token += ret; continue; } } token = null; } } if (token != null) { if (featureString2Bit.TryGetValue(path + "'" + token, out bit)) { vector |= bit; } } return token; }
public static HashSet<string> GetSurroundingPaths( this CstNode node, HashSet<CstNode> surroundingNodes, FeatureExtractor extractor, CstNode outermostNode) { var path = node.Name; var paths = new HashSet<string>(); var ancestors = new Dictionary<CstNode, string> { { node, node.Name } }; paths.Add(node.Name); //paths.Add("'" + extractor.GetToken(node)); var ancestor = node.Ancestors().FirstOrDefault(a => a.Children().Count() > 1); if (surroundingNodes.Contains(ancestor)) { extractor.IsInner = false; var originalNode = node; path = node.Name + node.RuleId; while ((node = node.Parent) != outermostNode) { ancestors.Add(node, path + "<" + node.Name); path = path + "<" + node.Name + node.RuleId; paths.Add(path); } path = path + "<" + node.Name; // must not have RuleId paths.Add(path); var index = 0; foreach (var tokenNode in originalNode.PreviousTokenNodes(node).Where(IsMeaningfulIdentifier)) { if (!surroundingNodes.Contains(tokenNode)) { break; } paths.Add("'-" + extractor.GetToken(tokenNode)); index++; } extractor.TokenLeft = Math.Max(extractor.TokenLeft, index); index = 0; foreach (var tokenNode in originalNode.NextTokenNodes(node).Where(IsMeaningfulIdentifier)) { if (!surroundingNodes.Contains(tokenNode)) { break; } paths.Add("'+" + extractor.GetToken(tokenNode)); index++; } extractor.TokenRight = Math.Max(extractor.TokenRight, index); } GetSurroundingPaths(node, path, surroundingNodes, extractor, paths, ancestors); return paths; }