public Tuple <int, int> ConvertToIndicies(StructuredCode code) { int inclusiveStart, exclusiveEnd; ConvertToIndicies(code, out inclusiveStart, out exclusiveEnd); return(Tuple.Create(inclusiveStart, exclusiveEnd)); }
/// <summary> /// Update SurroundingRange, TargetRange, and Node properties then return the last index of the code processed. /// </summary> /// <param name="structuredCode">The structured code processed</param> /// <param name="cst">The concrete syntax tree</param> /// <param name="fragments"></param> /// <returns>The updated last index of the code processed</returns> public static List<SeedNode> ConstructAcceptingFragments(StructuredCode structuredCode, CstNode cst, IList<SelectedFragment> fragments) { var seedNodes = CreateSeedNodes(structuredCode, cst, fragments); var uppermostSeedAcceptedNodes = seedNodes .Select(node => node.Node.AncestorWithSingleChild()) .ToImmutableHashSet(); // We can select multiple nodes in corresponding to a fragment selected by a user // and it means that we have multiple choices for selecting node names to filter nodes // This code tries to select good node names to not filter nodes wanted by a user var selectedNodeNames = LearningExperimentUtil.FindGoodNodeNames(uppermostSeedAcceptedNodes) .ToImmutableHashSet(); foreach (var seedNode in seedNodes) { // Update the node in corresponding to the selected node names keeping the code range of the node seedNode.Node = seedNode.Node.DescendantsOfSingleAndSelf() .First(e => selectedNodeNames.Contains(e.Name)); var rootNode = seedNode.SurroundingRange.FindInnermostNode(cst); var node = seedNode.Node; var path = node.Name; while ((node = node.Parent) != rootNode) { path = path + "<" + node.Name + node.RuleId; } seedNode.Path = path; } return seedNodes; }
public static CodeRange ConvertFromIndicies( StructuredCode code, int inclusiveStart, int exclusiveEnd) { var startLocation = CodeLocation.ConvertFromIndex(code, inclusiveStart); var endLocation = CodeLocation.ConvertFromIndex(code, exclusiveEnd, startLocation); return(new CodeRange(startLocation, endLocation)); }
public static CodeRange ConvertFromIndiciesSkippingWhitespaces( StructuredCode code, ref int inclusiveStart, ref int exclusiveEnd) { while (char.IsWhiteSpace(code[inclusiveStart])) { inclusiveStart++; } while (char.IsWhiteSpace(code[exclusiveEnd - 1])) { exclusiveEnd--; } return(ConvertFromIndicies(code, inclusiveStart, exclusiveEnd)); }
private static List<SeedNode> CreateSeedNodes(StructuredCode structuredCode, CstNode cst, IEnumerable<SelectedFragment> fragments) { var lastIndex = -1; return fragments.Select(fragment => { var startLineIndex = Math.Max(lastIndex + 1, structuredCode.GetIndex(fragment.StartLine, 0)); var surroundingIndex = structuredCode.Code.IndexOf(fragment.SurroundingText, startLineIndex); var targetIndex = structuredCode.Code.IndexOf(fragment.TargetText, surroundingIndex); if (surroundingIndex < 0 || targetIndex < 0) { throw new Exception("The selected code fragment is invalid."); } var surroundingRange = structuredCode.GetRange(surroundingIndex, surroundingIndex + fragment.SurroundingText.Length); var targetRange = structuredCode.GetRange(targetIndex, targetIndex + fragment.TargetText.Length); var node = targetRange.FindOutermostNode(cst); lastIndex = surroundingIndex; return new SeedNode(node, targetRange, surroundingRange); }).ToList(); }
private static void ConvertIndiciesToRange( CstNode node, string code, int inclusiveStart, int exclusiveEnd, CodeRange range) { var newRange = CodeRange.ConvertFromIndicies(code, inclusiveStart, exclusiveEnd); var newInclusiveStart = char.IsWhiteSpace(code[inclusiveStart - 1]) ? inclusiveStart - 1 : inclusiveStart; var newExclusiveEnd = char.IsWhiteSpace(code[exclusiveEnd]) ? exclusiveEnd + 1 : exclusiveEnd; var elem = CodeRange.ConvertFromIndiciesSkippingWhitespaces( code, ref newInclusiveStart, ref newExclusiveEnd) .FindInnermostNode(node); Assert.That(CodeRange.Locate(elem), Is.EqualTo(newRange)); newRange.ConvertToIndicies(code, out newInclusiveStart, out newExclusiveEnd); Assert.That(newRange, Is.EqualTo(range)); Assert.That(newInclusiveStart, Is.EqualTo(inclusiveStart)); Assert.That(newExclusiveEnd, Is.EqualTo(exclusiveEnd)); var scode = new StructuredCode(code); Assert.That( CodeRange.ConvertFromIndicies(scode, inclusiveStart, exclusiveEnd), Is.EqualTo(range)); newRange.ConvertToIndicies(scode, out newInclusiveStart, out newExclusiveEnd); Assert.That(newInclusiveStart, Is.EqualTo(inclusiveStart)); Assert.That(newExclusiveEnd, Is.EqualTo(exclusiveEnd)); }
public void ConvertToIndicies( StructuredCode code, out int inclusiveStart, out int exclusiveEnd) { inclusiveStart = code.GetIndex(StartLocation); exclusiveEnd = code.GetIndex(EndLocation); }
public string GetCodeFragment(StructuredCode code) { return(code.GetFragment(this)); }
public void GetLine(string code, int lineNumber) { var structuredCode = new StructuredCode(code); structuredCode.GetLine(lineNumber); }
public LearningResult Learn( ICollection<string> seedPaths, ICollection<string> codePaths, string searchPattern, StreamWriter writer = null) { var allCsts = GenerateValidCsts(codePaths); var seedCsts = GenerateValidCsts(seedPaths).ToList(); var seedNodes = seedCsts .SelectMany( cst => LearningExperimentUtil.GetUppermostNodesByNames(cst, OracleNames)) .Where(ProtectedIsAcceptedUsingOracle) .ToList(); var seedCst = seedCsts.First(); var seedCode = seedCst.Code; var structuredCode = new StructuredCode(seedCode); var acceptingFragments = ConstructAcceptingFragments(structuredCode, seedCst, seedNodes); var rejectingFragments = ConstructRejectingFragments(structuredCode, seedCst); SeedNodeSet.Create(acceptingFragments, this); var preparingTime = Environment.TickCount; var extractor = CreateExtractor(); var seedNodeSet = new SeedNodeSet(acceptingFragments.Select(f => f.Node), seedCsts, this); Console.WriteLine("#Accepted seed nodes: " + seedNodeSet.AcceptedNodes.Count + " (" + acceptingFragments.Count + ")"); Console.WriteLine("#Rejected seed nodes: " + seedNodeSet.RejectedNodes.Count + " (" + rejectingFragments.Count + ")"); var featureSet = new FeatuerSet(seedNodeSet, extractor, acceptingFragments, rejectingFragments); var groupPaths = seedNodeSet.SelectedNodeNames.Select(n => ">" + n + ">"); var classifier = new Classifier(groupPaths, featureSet); Console.WriteLine( "#Features: " + featureSet.AcceptingFeatureCount + ", " + featureSet.RejectingFeatureCount); Console.WriteLine("Inner: " + extractor.IsInner); var featureEncoder = new FeatureEncoder(seedNodeSet.SelectedNodeNames, extractor, featureSet); var encodingResult = featureEncoder.Encode(codePaths, allCsts, this, seedNodeSet); Console.WriteLine("#Unique Elements: " + encodingResult.VectorCount); if (encodingResult.IdealAcceptedVector2GroupPath.Keys.ToHashSet() .Overlaps(encodingResult.IdealRejectedVector2GroupPath.Keys.ToHashSet())) { var others = encodingResult.IdealRejectedVector2GroupPath; var vector = encodingResult.IdealAcceptedVector2GroupPath.Keys.First(others.ContainsKey); foreach (var featureString in featureEncoder.GetFeatureStringsByVector(vector)) { Console.WriteLine(Experiment.Beautify(featureString)); } throw new Exception("Master predicates can't classify elements!"); } var groupCache = new GroupCache(encodingResult, classifier); var trainingSet = encodingResult.CreateTrainingVectorSet(); classifier.Create(trainingSet, groupCache); Experiment.WriteFeatureStrings(Console.Out, classifier, featureEncoder); Console.WriteLine("Preparing time: " + (Environment.TickCount - preparingTime)); var count = 0; var sumTime = Environment.TickCount; ClassificationResult classificationResult; while (true) { var time = Environment.TickCount; classificationResult = Classify(count, classifier, groupCache, encodingResult, trainingSet); if (classificationResult.SuspiciousNodes == null) { break; } var additionalAcceptedSet = RevealSuspiciousElements( encodingResult.IdealAcceptedVector2GroupPath.Keys, classificationResult.SuspiciousNodes, encodingResult, trainingSet); if (!classifier.Update(additionalAcceptedSet, trainingSet, groupCache)) { count++; } else { count = 0; } Console.WriteLine("Time: " + (Environment.TickCount - time)); } classifier.MakeImmutable(); Console.WriteLine(); Console.WriteLine("Sum time: " + (Environment.TickCount - sumTime)); var trainingVectorCount = trainingSet.Count; var idealVectorCount = encodingResult.IdealVectorSet.Count; Console.WriteLine("#Required vectors: " + trainingVectorCount + " / " + idealVectorCount); if (writer != null) { encodingResult.WriteResult(writer, trainingSet); } foreach (var groupPath in classifier.GroupPaths) { Console.WriteLine(groupPath); } classifier.Optimize(encodingResult.IdealRejectedVector2GroupPath.Keys, groupCache); return new LearningResult { ClassificationResult = classificationResult, Classifier = classifier, EncodingResult = encodingResult, FeatureEncoder = featureEncoder, }; }
private List<SelectedFragment> ConstructAcceptingFragments( StructuredCode structuredCode, CstNode seedCst, List<CstNode> seedNodes) { var acceptingFragments = AcceptingFragments.ToList(); var lastIndex = -1; for (int i = 0; i < acceptingFragments.Count; i++) { lastIndex = acceptingFragments[i].Update(structuredCode, seedCst, lastIndex); if (acceptingFragments[i].Node != seedNodes[i].AncestorWithSingleChild()) { throw new Exception("The selected node should be the node selected by the oracle."); } } return acceptingFragments; }
private List<SelectedFragment> ConstructRejectingFragments(StructuredCode structuredCode, CstNode seedCst) { var rejectingFragments = RejectingFragments.ToList(); var lastIndex = -1; foreach (var fragment in rejectingFragments) { lastIndex = fragment.Update(structuredCode, seedCst, lastIndex); } return rejectingFragments; }
public static CodeLocation ConvertFromIndex( StructuredCode code, int index, CodeLocation startLocation) { return(code.GetLocation(index, startLocation)); }