public static HashSet <string> GetSurroundingPaths( this CstNode node, HashSet <CstNode> surroundingNodes, FeatureExtractor extractor, CstNode outermostNode) { var path = node.Name; var paths = new HashSet <string>(); var ancestors = new Dictionary <CstNode, string> { { node, node.Name } }; paths.Add(node.Name); //paths.Add("'" + extractor.GetToken(node)); var ancestor = node.Ancestors().FirstOrDefault(a => a.Children().Count() > 1); if (surroundingNodes.Contains(ancestor)) { extractor.IsInner = false; var originalNode = node; path = node.Name + node.RuleId; while ((node = node.Parent) != outermostNode) { ancestors.Add(node, path + "<" + node.Name); path = path + "<" + node.Name + node.RuleId; paths.Add(path); } path = path + "<" + node.Name; // must not have RuleId paths.Add(path); var index = 0; foreach (var tokenNode in originalNode.PreviousTokenNodes(node).Where(IsMeaningfulIdentifier)) { if (!surroundingNodes.Contains(tokenNode)) { break; } paths.Add("'-" + extractor.GetToken(tokenNode)); index++; } extractor.TokenLeft = Math.Max(extractor.TokenLeft, index); index = 0; foreach (var tokenNode in originalNode.NextTokenNodes(node).Where(IsMeaningfulIdentifier)) { if (!surroundingNodes.Contains(tokenNode)) { break; } paths.Add("'+" + extractor.GetToken(tokenNode)); index++; } extractor.TokenRight = Math.Max(extractor.TokenRight, index); } GetSurroundingPaths(node, path, surroundingNodes, extractor, paths, ancestors); return(paths); }
/// <summary> /// Get a group key from the specified node. /// </summary> /// <param name="node"></param> /// <returns></returns> public string GetGroupPathFromNode(CstNode node) { IEnumerable <string> distinctivePath; if (_extractor.IsInner) { //node = node.AncestorsOfOnlyChildAndSelf().Last(); // TODO // TODO: descendants may be empty list distinctivePath = node.DescendantsOfFirstChild() .Take(GroupKeyLength) .Select(e => e.HasToken ? e.RuleId + "-" + _extractor.GetToken(e) : e.RuleId); } else { //node = node.DescendantsOfOnlyChildAndSelf().Last(); // TODO distinctivePath = node.AncestorsAndSelf() .Take(GroupKeyLength) .Select(e => e.HasToken ? e.RuleId + "-" + _extractor.GetToken(e) : e.RuleId); } return(">" + node.Name + ">" + string.Join(">", distinctivePath) + ">"); }
private static Tuple <string, bool> GetSurroundingPaths( CstNode node, string path, ICollection <CstNode> surroundingNodes, FeatureExtractor extractor, ISet <string> paths, IDictionary <CstNode, string> ancestors) { var token = ""; var temporal = false; var count = 0; if (node.HasToken) { token = extractor.GetToken(node); temporal = IsTemporalVariable(token); } else { foreach (var child in node.Children()) { count++; if (surroundingNodes.Contains(child)) { //var newPath = path + (ancestors.Contains(child) ? "/" : ">") // + child.Name + child.RuleId; var newPath = (ancestors.ContainsKey(child) ? ancestors[child] : path + ">" + child.Name + child.RuleId); var ret = GetSurroundingPaths( child, newPath, surroundingNodes, extractor, paths, ancestors); temporal = ret.Item2; if (token != null && ret.Item1 != null) { token += ret.Item1; continue; } } token = null; } } if (temporal && count <= 1) { return(Tuple.Create <string, bool>(null, true)); } paths.Add(path); if (token != null) { paths.Add(path + "'" + token); // need node.Name + node.RuleId ? } return(Tuple.Create(token, false)); }
private static string GetFeatureVector( CstNode node, string path, IDictionary <string, BigInteger> featureString2Bit, FeatureExtractor extractor, IDictionary <CstNode, string> ancestors, ref BigInteger vector) { BigInteger bit; var token = ""; if (node.HasToken) { token = extractor.GetToken(node); } else { foreach (var child in node.Children()) { //var newPath = path + (ancestors.Contains(child) ? "/" : ">") // + child.Name + child.RuleId; var newPath = (ancestors.ContainsKey(child) ? ancestors[child] : path + ">" + child.Name + child.RuleId); if (featureString2Bit.TryGetValue(newPath, out bit)) { vector |= bit; var ret = GetFeatureVector( child, newPath, featureString2Bit, extractor, ancestors, ref vector); if (token != null && ret != null) { token += ret; continue; } } token = null; } } if (token != null) { if (featureString2Bit.TryGetValue(path + "'" + token, out bit)) { vector |= bit; } } return(token); }
public static BigInteger GetFeatureVector( this CstNode node, IDictionary<string, BigInteger> featureString2Bit, FeatureExtractor extractor) { BigInteger bit; var path = node.Name; var vector = BigInteger.Zero; var ancestors = new Dictionary<CstNode, string> { { node, node.Name } }; if (featureString2Bit.TryGetValue(node.Name, out bit)) { vector |= bit; } //if (featureString2Bit.TryGetValue("'" + extractor.GetToken(node), out bit)) { // vector |= bit; //} if (!extractor.IsInner) { var originalNode = node; path = node.Name + node.RuleId; while (true) { node = node.Parent; var newPath = path + "<" + node.Name + node.RuleId; if (!featureString2Bit.TryGetValue(newPath, out bit)) { break; } ancestors.Add(node, path + "<" + node.Name); path = newPath; // vector |= bit; is unnecesarry } path = path + "<" + node.Name; // must not have RuleId if (!featureString2Bit.TryGetValue(path, out bit)) { return vector; } // vector |= bit; is unnecesarry originalNode.PreviousTokenNodes(node) .Where(IsMeaningfulIdentifier) .Take(extractor.TokenLeft) .ForEach( tokenNode => { if (featureString2Bit.TryGetValue( "'-" + extractor.GetToken(tokenNode), out bit)) { vector |= bit; } }); originalNode.NextTokenNodes(node) .Where(IsMeaningfulIdentifier) .Take(extractor.TokenRight) .ForEach( tokenNode => { if (featureString2Bit.TryGetValue( "'+" + extractor.GetToken(tokenNode), out bit)) { vector |= bit; } }); } GetFeatureVector(node, path, featureString2Bit, extractor, ancestors, ref vector); return vector; }
public static HashSet<string> GetSurroundingPaths( this CstNode node, HashSet<CstNode> surroundingNodes, FeatureExtractor extractor, CstNode outermostNode) { var path = node.Name; var paths = new HashSet<string>(); var ancestors = new Dictionary<CstNode, string> { { node, node.Name } }; paths.Add(node.Name); //paths.Add("'" + extractor.GetToken(node)); var ancestor = node.Ancestors().FirstOrDefault(a => a.Children().Count() > 1); if (surroundingNodes.Contains(ancestor)) { extractor.IsInner = false; var originalNode = node; path = node.Name + node.RuleId; while ((node = node.Parent) != outermostNode) { ancestors.Add(node, path + "<" + node.Name); path = path + "<" + node.Name + node.RuleId; paths.Add(path); } path = path + "<" + node.Name; // must not have RuleId paths.Add(path); var index = 0; foreach (var tokenNode in originalNode.PreviousTokenNodes(node).Where(IsMeaningfulIdentifier)) { if (!surroundingNodes.Contains(tokenNode)) { break; } paths.Add("'-" + extractor.GetToken(tokenNode)); index++; } extractor.TokenLeft = Math.Max(extractor.TokenLeft, index); index = 0; foreach (var tokenNode in originalNode.NextTokenNodes(node).Where(IsMeaningfulIdentifier)) { if (!surroundingNodes.Contains(tokenNode)) { break; } paths.Add("'+" + extractor.GetToken(tokenNode)); index++; } extractor.TokenRight = Math.Max(extractor.TokenRight, index); } GetSurroundingPaths(node, path, surroundingNodes, extractor, paths, ancestors); return paths; }
private static string GetFeatureVector( CstNode node, string path, IDictionary<string, BigInteger> featureString2Bit, FeatureExtractor extractor, IDictionary<CstNode, string> ancestors, ref BigInteger vector) { BigInteger bit; var token = ""; if (node.HasToken) { token = extractor.GetToken(node); } else { foreach (var child in node.Children()) { //var newPath = path + (ancestors.Contains(child) ? "/" : ">") // + child.Name + child.RuleId; var newPath = (ancestors.ContainsKey(child) ? ancestors[child] : path + ">" + child.Name + child.RuleId); if (featureString2Bit.TryGetValue(newPath, out bit)) { vector |= bit; var ret = GetFeatureVector( child, newPath, featureString2Bit, extractor, ancestors, ref vector); if (token != null && ret != null) { token += ret; continue; } } token = null; } } if (token != null) { if (featureString2Bit.TryGetValue(path + "'" + token, out bit)) { vector |= bit; } } return token; }
private static Tuple<string, bool> GetSurroundingPaths( CstNode node, string path, ICollection<CstNode> surroundingNodes, FeatureExtractor extractor, ISet<string> paths, IDictionary<CstNode, string> ancestors) { var token = ""; var temporal = false; var count = 0; if (node.HasToken) { token = extractor.GetToken(node); temporal = IsTemporalVariable(token); } else { foreach (var child in node.Children()) { count++; if (surroundingNodes.Contains(child)) { //var newPath = path + (ancestors.Contains(child) ? "/" : ">") // + child.Name + child.RuleId; var newPath = (ancestors.ContainsKey(child) ? ancestors[child] : path + ">" + child.Name + child.RuleId); var ret = GetSurroundingPaths( child, newPath, surroundingNodes, extractor, paths, ancestors); temporal = ret.Item2; if (token != null && ret.Item1 != null) { token += ret.Item1; continue; } } token = null; } } if (temporal && count <= 1) { return Tuple.Create<string, bool>(null, true); } paths.Add(path); if (token != null) { paths.Add(path + "'" + token); // need node.Name + node.RuleId ? } return Tuple.Create(token, false); }
public static BigInteger GetFeatureVector( this CstNode node, IDictionary <string, BigInteger> featureString2Bit, FeatureExtractor extractor) { BigInteger bit; var path = node.Name; var vector = BigInteger.Zero; var ancestors = new Dictionary <CstNode, string> { { node, node.Name } }; if (featureString2Bit.TryGetValue(node.Name, out bit)) { vector |= bit; } //if (featureString2Bit.TryGetValue("'" + extractor.GetToken(node), out bit)) { // vector |= bit; //} if (!extractor.IsInner) { var originalNode = node; path = node.Name + node.RuleId; while (true) { node = node.Parent; var newPath = path + "<" + node.Name + node.RuleId; if (!featureString2Bit.TryGetValue(newPath, out bit)) { break; } ancestors.Add(node, path + "<" + node.Name); path = newPath; // vector |= bit; is unnecesarry } path = path + "<" + node.Name; // must not have RuleId if (!featureString2Bit.TryGetValue(path, out bit)) { return(vector); } // vector |= bit; is unnecesarry originalNode.PreviousTokenNodes(node) .Where(IsMeaningfulIdentifier) .Take(extractor.TokenLeft) .ForEach( tokenNode => { if (featureString2Bit.TryGetValue( "'-" + extractor.GetToken(tokenNode), out bit)) { vector |= bit; } }); originalNode.NextTokenNodes(node) .Where(IsMeaningfulIdentifier) .Take(extractor.TokenRight) .ForEach( tokenNode => { if (featureString2Bit.TryGetValue( "'+" + extractor.GetToken(tokenNode), out bit)) { vector |= bit; } }); } GetFeatureVector(node, path, featureString2Bit, extractor, ancestors, ref vector); return(vector); }